rf_parityscan.c revision 1.3 1 /* $NetBSD: rf_parityscan.c,v 1.3 1999/02/05 00:06:14 oster Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: Mark Holland
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29 /*****************************************************************************
30 *
31 * rf_parityscan.c -- misc utilities related to parity verification
32 *
33 *****************************************************************************/
34
35 #include "rf_types.h"
36 #include "rf_raid.h"
37 #include "rf_dag.h"
38 #include "rf_dagfuncs.h"
39 #include "rf_dagutils.h"
40 #include "rf_mcpair.h"
41 #include "rf_general.h"
42 #include "rf_engine.h"
43 #include "rf_parityscan.h"
44 #include "rf_map.h"
45 #include "rf_sys.h"
46
47 /*****************************************************************************************
48 *
49 * walk through the entire arry and write new parity.
50 * This works by creating two DAGs, one to read a stripe of data and one to
51 * write new parity. The first is executed, the data is xored together, and
52 * then the second is executed. To avoid constantly building and tearing down
53 * the DAGs, we create them a priori and fill them in with the mapping
54 * information as we go along.
55 *
56 * there should never be more than one thread running this.
57 *
58 ****************************************************************************************/
59
60 int
61 rf_RewriteParity(raidPtr)
62 RF_Raid_t *raidPtr;
63 {
64 RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
65 RF_AccessStripeMapHeader_t *asm_h;
66 int old_pctg, new_pctg, rc;
67 RF_PhysDiskAddr_t pda;
68 RF_SectorNum_t i;
69
70 pda.startSector = 0;
71 pda.numSector = raidPtr->Layout.sectorsPerStripeUnit;
72 old_pctg = -1;
73
74 /* rf_verifyParityDebug=1; */
75 for (i = 0; i < raidPtr->totalSectors; i += layoutPtr->dataSectorsPerStripe) {
76 asm_h = rf_MapAccess(raidPtr, i, layoutPtr->dataSectorsPerStripe, NULL, RF_DONT_REMAP);
77 rc = rf_VerifyParity(raidPtr, asm_h->stripeMap, 1, 0);
78 /* printf("Parity verified: rc=%d\n",rc); */
79 switch (rc) {
80 case RF_PARITY_OKAY:
81 case RF_PARITY_CORRECTED:
82 break;
83 case RF_PARITY_BAD:
84 printf("Parity bad during correction\n");
85 RF_PANIC();
86 break;
87 case RF_PARITY_COULD_NOT_CORRECT:
88 printf("Could not correct bad parity\n");
89 RF_PANIC();
90 break;
91 case RF_PARITY_COULD_NOT_VERIFY:
92 printf("Could not verify parity\n");
93 RF_PANIC();
94 break;
95 default:
96 printf("Bad rc=%d from VerifyParity in RewriteParity\n", rc);
97 RF_PANIC();
98 }
99 rf_FreeAccessStripeMap(asm_h);
100 new_pctg = i * 1000 / raidPtr->totalSectors;
101 if (new_pctg != old_pctg) {
102 }
103 old_pctg = new_pctg;
104 }
105 #if 1
106 return (0); /* XXX nothing was here.. GO */
107 #endif
108 }
109 /*****************************************************************************************
110 *
111 * verify that the parity in a particular stripe is correct.
112 * we validate only the range of parity defined by parityPDA, since
113 * this is all we have locked. The way we do this is to create an asm
114 * that maps the whole stripe and then range-restrict it to the parity
115 * region defined by the parityPDA.
116 *
117 ****************************************************************************************/
118 int
119 rf_VerifyParity(raidPtr, aasm, correct_it, flags)
120 RF_Raid_t *raidPtr;
121 RF_AccessStripeMap_t *aasm;
122 int correct_it;
123 RF_RaidAccessFlags_t flags;
124 {
125 RF_PhysDiskAddr_t *parityPDA;
126 RF_AccessStripeMap_t *doasm;
127 RF_LayoutSW_t *lp;
128 int lrc, rc;
129
130 lp = raidPtr->Layout.map;
131 if (lp->faultsTolerated == 0) {
132 /*
133 * There isn't any parity. Call it "okay."
134 */
135 return (RF_PARITY_OKAY);
136 }
137 rc = RF_PARITY_OKAY;
138 if (lp->VerifyParity) {
139 for (doasm = aasm; doasm; doasm = doasm->next) {
140 for (parityPDA = doasm->parityInfo; parityPDA; parityPDA = parityPDA->next) {
141 lrc = lp->VerifyParity(raidPtr, doasm->raidAddress, parityPDA,
142 correct_it, flags);
143 if (lrc > rc) {
144 /* see rf_parityscan.h for why this
145 * works */
146 rc = lrc;
147 }
148 }
149 }
150 } else {
151 rc = RF_PARITY_COULD_NOT_VERIFY;
152 }
153 return (rc);
154 }
155
156 int
157 rf_VerifyParityBasic(raidPtr, raidAddr, parityPDA, correct_it, flags)
158 RF_Raid_t *raidPtr;
159 RF_RaidAddr_t raidAddr;
160 RF_PhysDiskAddr_t *parityPDA;
161 int correct_it;
162 RF_RaidAccessFlags_t flags;
163 {
164 RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
165 RF_RaidAddr_t startAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, raidAddr);
166 RF_SectorCount_t numsector = parityPDA->numSector;
167 int numbytes = rf_RaidAddressToByte(raidPtr, numsector);
168 int bytesPerStripe = numbytes * layoutPtr->numDataCol;
169 RF_DagHeader_t *rd_dag_h, *wr_dag_h; /* read, write dag */
170 RF_DagNode_t *blockNode, *unblockNode, *wrBlock, *wrUnblock;
171 RF_AccessStripeMapHeader_t *asm_h;
172 RF_AccessStripeMap_t *asmap;
173 RF_AllocListElem_t *alloclist;
174 RF_PhysDiskAddr_t *pda;
175 char *pbuf, *buf, *end_p, *p;
176 int i, retcode;
177 RF_ReconUnitNum_t which_ru;
178 RF_StripeNum_t psID = rf_RaidAddressToParityStripeID(layoutPtr, raidAddr, &which_ru);
179 int stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol;
180 RF_AccTraceEntry_t tracerec;
181 RF_MCPair_t *mcpair;
182
183 retcode = RF_PARITY_OKAY;
184
185 mcpair = rf_AllocMCPair();
186 rf_MakeAllocList(alloclist);
187 RF_MallocAndAdd(buf, numbytes * (layoutPtr->numDataCol + layoutPtr->numParityCol), (char *), alloclist);
188 RF_CallocAndAdd(pbuf, 1, numbytes, (char *), alloclist); /* use calloc to make
189 * sure buffer is zeroed */
190 end_p = buf + bytesPerStripe;
191
192 rd_dag_h = rf_MakeSimpleDAG(raidPtr, stripeWidth, numbytes, buf, rf_DiskReadFunc, rf_DiskReadUndoFunc,
193 "Rod", alloclist, flags, RF_IO_NORMAL_PRIORITY);
194 blockNode = rd_dag_h->succedents[0];
195 unblockNode = blockNode->succedents[0]->succedents[0];
196
197 /* map the stripe and fill in the PDAs in the dag */
198 asm_h = rf_MapAccess(raidPtr, startAddr, layoutPtr->dataSectorsPerStripe, buf, RF_DONT_REMAP);
199 asmap = asm_h->stripeMap;
200
201 for (pda = asmap->physInfo, i = 0; i < layoutPtr->numDataCol; i++, pda = pda->next) {
202 RF_ASSERT(pda);
203 rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1);
204 RF_ASSERT(pda->numSector != 0);
205 if (rf_TryToRedirectPDA(raidPtr, pda, 0))
206 goto out; /* no way to verify parity if disk is
207 * dead. return w/ good status */
208 blockNode->succedents[i]->params[0].p = pda;
209 blockNode->succedents[i]->params[2].v = psID;
210 blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
211 }
212
213 RF_ASSERT(!asmap->parityInfo->next);
214 rf_RangeRestrictPDA(raidPtr, parityPDA, asmap->parityInfo, 0, 1);
215 RF_ASSERT(asmap->parityInfo->numSector != 0);
216 if (rf_TryToRedirectPDA(raidPtr, asmap->parityInfo, 1))
217 goto out;
218 blockNode->succedents[layoutPtr->numDataCol]->params[0].p = asmap->parityInfo;
219
220 /* fire off the DAG */
221 bzero((char *) &tracerec, sizeof(tracerec));
222 rd_dag_h->tracerec = &tracerec;
223
224 if (rf_verifyParityDebug) {
225 printf("Parity verify read dag:\n");
226 rf_PrintDAGList(rd_dag_h);
227 }
228 RF_LOCK_MUTEX(mcpair->mutex);
229 mcpair->flag = 0;
230 rf_DispatchDAG(rd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
231 (void *) mcpair);
232 while (!mcpair->flag)
233 RF_WAIT_COND(mcpair->cond, mcpair->mutex);
234 RF_UNLOCK_MUTEX(mcpair->mutex);
235 if (rd_dag_h->status != rf_enable) {
236 RF_ERRORMSG("Unable to verify parity: can't read the stripe\n");
237 retcode = RF_PARITY_COULD_NOT_VERIFY;
238 goto out;
239 }
240 for (p = buf; p < end_p; p += numbytes) {
241 rf_bxor(p, pbuf, numbytes, NULL);
242 }
243 for (i = 0; i < numbytes; i++) {
244 #if 0
245 if (pbuf[i] != 0 || buf[bytesPerStripe + i] != 0) {
246 printf("Bytes: %d %d %d\n", i, pbuf[i], buf[bytesPerStripe + i]);
247 }
248 #endif
249 if (pbuf[i] != buf[bytesPerStripe + i]) {
250 if (!correct_it)
251 RF_ERRORMSG3("Parity verify error: byte %d of parity is 0x%x should be 0x%x\n",
252 i, (u_char) buf[bytesPerStripe + i], (u_char) pbuf[i]);
253 retcode = RF_PARITY_BAD;
254 break;
255 }
256 }
257
258 if (retcode && correct_it) {
259 wr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, numbytes, pbuf, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
260 "Wnp", alloclist, flags, RF_IO_NORMAL_PRIORITY);
261 wrBlock = wr_dag_h->succedents[0];
262 wrUnblock = wrBlock->succedents[0]->succedents[0];
263 wrBlock->succedents[0]->params[0].p = asmap->parityInfo;
264 wrBlock->succedents[0]->params[2].v = psID;
265 wrBlock->succedents[0]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
266 bzero((char *) &tracerec, sizeof(tracerec));
267 wr_dag_h->tracerec = &tracerec;
268 if (rf_verifyParityDebug) {
269 printf("Parity verify write dag:\n");
270 rf_PrintDAGList(wr_dag_h);
271 }
272 RF_LOCK_MUTEX(mcpair->mutex);
273 mcpair->flag = 0;
274 rf_DispatchDAG(wr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
275 (void *) mcpair);
276 while (!mcpair->flag)
277 RF_WAIT_COND(mcpair->cond, mcpair->mutex);
278 RF_UNLOCK_MUTEX(mcpair->mutex);
279 if (wr_dag_h->status != rf_enable) {
280 RF_ERRORMSG("Unable to correct parity in VerifyParity: can't write the stripe\n");
281 retcode = RF_PARITY_COULD_NOT_CORRECT;
282 }
283 rf_FreeDAG(wr_dag_h);
284 if (retcode == RF_PARITY_BAD)
285 retcode = RF_PARITY_CORRECTED;
286 }
287 out:
288 rf_FreeAccessStripeMap(asm_h);
289 rf_FreeAllocList(alloclist);
290 rf_FreeDAG(rd_dag_h);
291 rf_FreeMCPair(mcpair);
292 return (retcode);
293 }
294
295 int
296 rf_TryToRedirectPDA(raidPtr, pda, parity)
297 RF_Raid_t *raidPtr;
298 RF_PhysDiskAddr_t *pda;
299 int parity;
300 {
301 if (raidPtr->Disks[pda->row][pda->col].status == rf_ds_reconstructing) {
302 if (rf_CheckRUReconstructed(raidPtr->reconControl[pda->row]->reconMap, pda->startSector)) {
303 if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
304 RF_RowCol_t or = pda->row, oc = pda->col;
305 RF_SectorNum_t os = pda->startSector;
306 if (parity) {
307 (raidPtr->Layout.map->MapParity) (raidPtr, pda->raidAddress, &pda->row, &pda->col, &pda->startSector, RF_REMAP);
308 if (rf_verifyParityDebug)
309 printf("VerifyParity: Redir P r %d c %d sect %ld -> r %d c %d sect %ld\n",
310 or, oc, (long) os, pda->row, pda->col, (long) pda->startSector);
311 } else {
312 (raidPtr->Layout.map->MapSector) (raidPtr, pda->raidAddress, &pda->row, &pda->col, &pda->startSector, RF_REMAP);
313 if (rf_verifyParityDebug)
314 printf("VerifyParity: Redir D r %d c %d sect %ld -> r %d c %d sect %ld\n",
315 or, oc, (long) os, pda->row, pda->col, (long) pda->startSector);
316 }
317 } else {
318 RF_RowCol_t spRow = raidPtr->Disks[pda->row][pda->col].spareRow;
319 RF_RowCol_t spCol = raidPtr->Disks[pda->row][pda->col].spareCol;
320 pda->row = spRow;
321 pda->col = spCol;
322 }
323 }
324 }
325 if (RF_DEAD_DISK(raidPtr->Disks[pda->row][pda->col].status))
326 return (1);
327 return (0);
328 }
329 /*****************************************************************************************
330 *
331 * currently a stub.
332 *
333 * takes as input an ASM describing a write operation and containing one failure, and
334 * verifies that the parity was correctly updated to reflect the write.
335 *
336 * if it's a data unit that's failed, we read the other data units in the stripe and
337 * the parity unit, XOR them together, and verify that we get the data intended for
338 * the failed disk. Since it's easy, we also validate that the right data got written
339 * to the surviving data disks.
340 *
341 * If it's the parity that failed, there's really no validation we can do except the
342 * above verification that the right data got written to all disks. This is because
343 * the new data intended for the failed disk is supplied in the ASM, but this is of
344 * course not the case for the new parity.
345 *
346 ****************************************************************************************/
347 int
348 rf_VerifyDegrModeWrite(raidPtr, asmh)
349 RF_Raid_t *raidPtr;
350 RF_AccessStripeMapHeader_t *asmh;
351 {
352 return (0);
353 }
354 /* creates a simple DAG with a header, a block-recon node at level 1,
355 * nNodes nodes at level 2, an unblock-recon node at level 3, and
356 * a terminator node at level 4. The stripe address field in
357 * the block and unblock nodes are not touched, nor are the pda
358 * fields in the second-level nodes, so they must be filled in later.
359 *
360 * commit point is established at unblock node - this means that any
361 * failure during dag execution causes the dag to fail
362 */
363 RF_DagHeader_t *
364 rf_MakeSimpleDAG(raidPtr, nNodes, bytesPerSU, databuf, doFunc, undoFunc, name, alloclist, flags, priority)
365 RF_Raid_t *raidPtr;
366 int nNodes;
367 int bytesPerSU;
368 char *databuf;
369 int (*doFunc) (RF_DagNode_t * node);
370 int (*undoFunc) (RF_DagNode_t * node);
371 char *name; /* node names at the second level */
372 RF_AllocListElem_t *alloclist;
373 RF_RaidAccessFlags_t flags;
374 int priority;
375 {
376 RF_DagHeader_t *dag_h;
377 RF_DagNode_t *nodes, *termNode, *blockNode, *unblockNode;
378 int i;
379
380 /* create the nodes, the block & unblock nodes, and the terminator
381 * node */
382 RF_CallocAndAdd(nodes, nNodes + 3, sizeof(RF_DagNode_t), (RF_DagNode_t *), alloclist);
383 blockNode = &nodes[nNodes];
384 unblockNode = blockNode + 1;
385 termNode = unblockNode + 1;
386
387 dag_h = rf_AllocDAGHeader();
388 dag_h->raidPtr = (void *) raidPtr;
389 dag_h->allocList = NULL;/* we won't use this alloc list */
390 dag_h->status = rf_enable;
391 dag_h->numSuccedents = 1;
392 dag_h->creator = "SimpleDAG";
393
394 /* this dag can not commit until the unblock node is reached errors
395 * prior to the commit point imply the dag has failed */
396 dag_h->numCommitNodes = 1;
397 dag_h->numCommits = 0;
398
399 dag_h->succedents[0] = blockNode;
400 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0, dag_h, "Nil", alloclist);
401 rf_InitNode(unblockNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nNodes, 0, 0, dag_h, "Nil", alloclist);
402 unblockNode->succedents[0] = termNode;
403 for (i = 0; i < nNodes; i++) {
404 blockNode->succedents[i] = unblockNode->antecedents[i] = &nodes[i];
405 unblockNode->antType[i] = rf_control;
406 rf_InitNode(&nodes[i], rf_wait, RF_FALSE, doFunc, undoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, name, alloclist);
407 nodes[i].succedents[0] = unblockNode;
408 nodes[i].antecedents[0] = blockNode;
409 nodes[i].antType[0] = rf_control;
410 nodes[i].params[1].p = (databuf + (i * bytesPerSU));
411 }
412 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", alloclist);
413 termNode->antecedents[0] = unblockNode;
414 termNode->antType[0] = rf_control;
415 return (dag_h);
416 }
417