rf_evenodd.c revision 1.4.6.5 1 1.4.6.3 nathanw /* $NetBSD: rf_evenodd.c,v 1.4.6.5 2002/10/18 02:43:49 nathanw Exp $ */
2 1.1 oster /*
3 1.1 oster * Copyright (c) 1995 Carnegie-Mellon University.
4 1.1 oster * All rights reserved.
5 1.1 oster *
6 1.1 oster * Author: Chang-Ming Wu
7 1.1 oster *
8 1.1 oster * Permission to use, copy, modify and distribute this software and
9 1.1 oster * its documentation is hereby granted, provided that both the copyright
10 1.1 oster * notice and this permission notice appear in all copies of the
11 1.1 oster * software, derivative works or modified versions, and any portions
12 1.1 oster * thereof, and that both notices appear in supporting documentation.
13 1.1 oster *
14 1.1 oster * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 1.1 oster * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 1.1 oster * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 1.1 oster *
18 1.1 oster * Carnegie Mellon requests users of this software to return to
19 1.1 oster *
20 1.1 oster * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 1.1 oster * School of Computer Science
22 1.1 oster * Carnegie Mellon University
23 1.1 oster * Pittsburgh PA 15213-3890
24 1.1 oster *
25 1.1 oster * any improvements or extensions that they make and grant Carnegie the
26 1.1 oster * rights to redistribute these changes.
27 1.1 oster */
28 1.1 oster
29 1.1 oster /*****************************************************************************************
30 1.1 oster *
31 1.1 oster * rf_evenodd.c -- implements EVENODD array architecture
32 1.1 oster *
33 1.1 oster ****************************************************************************************/
34 1.4.6.3 nathanw
35 1.4.6.3 nathanw #include <sys/cdefs.h>
36 1.4.6.3 nathanw __KERNEL_RCSID(0, "$NetBSD: rf_evenodd.c,v 1.4.6.5 2002/10/18 02:43:49 nathanw Exp $");
37 1.1 oster
38 1.1 oster #include "rf_archs.h"
39 1.1 oster
40 1.1 oster #if RF_INCLUDE_EVENODD > 0
41 1.1 oster
42 1.4.6.2 nathanw #include <dev/raidframe/raidframevar.h>
43 1.4.6.2 nathanw
44 1.1 oster #include "rf_raid.h"
45 1.1 oster #include "rf_dag.h"
46 1.1 oster #include "rf_dagffrd.h"
47 1.1 oster #include "rf_dagffwr.h"
48 1.1 oster #include "rf_dagdegrd.h"
49 1.1 oster #include "rf_dagdegwr.h"
50 1.1 oster #include "rf_dagutils.h"
51 1.1 oster #include "rf_dagfuncs.h"
52 1.1 oster #include "rf_etimer.h"
53 1.1 oster #include "rf_general.h"
54 1.1 oster #include "rf_evenodd.h"
55 1.1 oster #include "rf_parityscan.h"
56 1.1 oster #include "rf_utils.h"
57 1.1 oster #include "rf_map.h"
58 1.1 oster #include "rf_pq.h"
59 1.1 oster #include "rf_mcpair.h"
60 1.1 oster #include "rf_evenodd.h"
61 1.1 oster #include "rf_evenodd_dagfuncs.h"
62 1.1 oster #include "rf_evenodd_dags.h"
63 1.1 oster #include "rf_engine.h"
64 1.1 oster
65 1.1 oster typedef struct RF_EvenOddConfigInfo_s {
66 1.2 oster RF_RowCol_t **stripeIdentifier; /* filled in at config time & used by
67 1.2 oster * IdentifyStripe */
68 1.2 oster } RF_EvenOddConfigInfo_t;
69 1.2 oster
70 1.2 oster int
71 1.2 oster rf_ConfigureEvenOdd(listp, raidPtr, cfgPtr)
72 1.2 oster RF_ShutdownList_t **listp;
73 1.2 oster RF_Raid_t *raidPtr;
74 1.2 oster RF_Config_t *cfgPtr;
75 1.2 oster {
76 1.2 oster RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
77 1.2 oster RF_EvenOddConfigInfo_t *info;
78 1.2 oster RF_RowCol_t i, j, startdisk;
79 1.2 oster
80 1.2 oster RF_MallocAndAdd(info, sizeof(RF_EvenOddConfigInfo_t), (RF_EvenOddConfigInfo_t *), raidPtr->cleanupList);
81 1.2 oster layoutPtr->layoutSpecificInfo = (void *) info;
82 1.2 oster
83 1.2 oster RF_ASSERT(raidPtr->numRow == 1);
84 1.2 oster
85 1.2 oster info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol, raidPtr->numCol, raidPtr->cleanupList);
86 1.2 oster startdisk = 0;
87 1.2 oster for (i = 0; i < raidPtr->numCol; i++) {
88 1.2 oster for (j = 0; j < raidPtr->numCol; j++) {
89 1.2 oster info->stripeIdentifier[i][j] = (startdisk + j) % raidPtr->numCol;
90 1.2 oster }
91 1.2 oster if ((startdisk -= 2) < 0)
92 1.2 oster startdisk += raidPtr->numCol;
93 1.2 oster }
94 1.1 oster
95 1.2 oster /* fill in the remaining layout parameters */
96 1.2 oster layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk;
97 1.2 oster layoutPtr->numDataCol = raidPtr->numCol - 2; /* ORIG:
98 1.2 oster * layoutPtr->numDataCol
99 1.2 oster * = raidPtr->numCol-1; */
100 1.1 oster #if RF_EO_MATRIX_DIM > 17
101 1.2 oster if (raidPtr->numCol <= 17) {
102 1.2 oster printf("Number of stripe units in a parity stripe is smaller than 17. Please\n");
103 1.2 oster printf("define the macro RF_EO_MATRIX_DIM in file rf_evenodd_dagfuncs.h to \n");
104 1.2 oster printf("be 17 to increase performance. \n");
105 1.2 oster return (EINVAL);
106 1.2 oster }
107 1.1 oster #elif RF_EO_MATRIX_DIM == 17
108 1.2 oster if (raidPtr->numCol > 17) {
109 1.2 oster printf("Number of stripe units in a parity stripe is bigger than 17. Please\n");
110 1.2 oster printf("define the macro RF_EO_MATRIX_DIM in file rf_evenodd_dagfuncs.h to \n");
111 1.2 oster printf("be 257 for encoding and decoding functions to work. \n");
112 1.2 oster return (EINVAL);
113 1.2 oster }
114 1.1 oster #endif
115 1.2 oster layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit;
116 1.2 oster layoutPtr->numParityCol = 2;
117 1.2 oster layoutPtr->dataStripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk;
118 1.2 oster raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit;
119 1.1 oster
120 1.2 oster raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit;
121 1.1 oster
122 1.2 oster return (0);
123 1.1 oster }
124 1.1 oster
125 1.2 oster int
126 1.2 oster rf_GetDefaultNumFloatingReconBuffersEvenOdd(RF_Raid_t * raidPtr)
127 1.1 oster {
128 1.2 oster return (20);
129 1.1 oster }
130 1.1 oster
131 1.2 oster RF_HeadSepLimit_t
132 1.2 oster rf_GetDefaultHeadSepLimitEvenOdd(RF_Raid_t * raidPtr)
133 1.1 oster {
134 1.2 oster return (10);
135 1.1 oster }
136 1.1 oster
137 1.2 oster void
138 1.2 oster rf_IdentifyStripeEvenOdd(
139 1.2 oster RF_Raid_t * raidPtr,
140 1.2 oster RF_RaidAddr_t addr,
141 1.2 oster RF_RowCol_t ** diskids,
142 1.2 oster RF_RowCol_t * outRow)
143 1.1 oster {
144 1.2 oster RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, addr);
145 1.2 oster RF_EvenOddConfigInfo_t *info = (RF_EvenOddConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
146 1.1 oster
147 1.2 oster *outRow = 0;
148 1.2 oster *diskids = info->stripeIdentifier[stripeID % raidPtr->numCol];
149 1.1 oster }
150 1.2 oster /* The layout of stripe unit on the disks are: c0 c1 c2 c3 c4
151 1.1 oster
152 1.1 oster 0 1 2 E P
153 1.1 oster 5 E P 3 4
154 1.1 oster P 6 7 8 E
155 1.1 oster 10 11 E P 9
156 1.1 oster E P 12 13 14
157 1.1 oster ....
158 1.1 oster
159 1.2 oster We use the MapSectorRAID5 to map data information because the routine can be shown to map exactly
160 1.1 oster the layout of data stripe unit as shown above although we have 2 redundant information now.
161 1.1 oster But for E and P, we use rf_MapEEvenOdd and rf_MapParityEvenOdd which are different method from raid-5.
162 1.1 oster */
163 1.1 oster
164 1.1 oster
165 1.2 oster void
166 1.2 oster rf_MapParityEvenOdd(
167 1.2 oster RF_Raid_t * raidPtr,
168 1.2 oster RF_RaidAddr_t raidSector,
169 1.2 oster RF_RowCol_t * row,
170 1.2 oster RF_RowCol_t * col,
171 1.2 oster RF_SectorNum_t * diskSector,
172 1.2 oster int remap)
173 1.1 oster {
174 1.2 oster RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
175 1.2 oster RF_StripeNum_t endSUIDofthisStrip = (SUID / raidPtr->Layout.numDataCol + 1) * raidPtr->Layout.numDataCol - 1;
176 1.1 oster
177 1.2 oster *row = 0;
178 1.2 oster *col = (endSUIDofthisStrip + 2) % raidPtr->numCol;
179 1.2 oster *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit +
180 1.2 oster (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
181 1.1 oster }
182 1.1 oster
183 1.2 oster void
184 1.2 oster rf_MapEEvenOdd(
185 1.2 oster RF_Raid_t * raidPtr,
186 1.2 oster RF_RaidAddr_t raidSector,
187 1.2 oster RF_RowCol_t * row,
188 1.2 oster RF_RowCol_t * col,
189 1.2 oster RF_SectorNum_t * diskSector,
190 1.2 oster int remap)
191 1.1 oster {
192 1.2 oster RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
193 1.2 oster RF_StripeNum_t endSUIDofthisStrip = (SUID / raidPtr->Layout.numDataCol + 1) * raidPtr->Layout.numDataCol - 1;
194 1.1 oster
195 1.2 oster *row = 0;
196 1.2 oster *col = (endSUIDofthisStrip + 1) % raidPtr->numCol;
197 1.2 oster *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit +
198 1.2 oster (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
199 1.1 oster }
200 1.1 oster
201 1.2 oster void
202 1.2 oster rf_EODagSelect(
203 1.2 oster RF_Raid_t * raidPtr,
204 1.2 oster RF_IoType_t type,
205 1.2 oster RF_AccessStripeMap_t * asmap,
206 1.2 oster RF_VoidFuncPtr * createFunc)
207 1.1 oster {
208 1.2 oster RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
209 1.2 oster unsigned ndfail = asmap->numDataFailed;
210 1.2 oster unsigned npfail = asmap->numParityFailed + asmap->numQFailed;
211 1.2 oster unsigned ntfail = npfail + ndfail;
212 1.2 oster
213 1.2 oster RF_ASSERT(RF_IO_IS_R_OR_W(type));
214 1.2 oster if (ntfail > 2) {
215 1.2 oster RF_ERRORMSG("more than two disks failed in a single group! Aborting I/O operation.\n");
216 1.2 oster /* *infoFunc = */ *createFunc = NULL;
217 1.2 oster return;
218 1.2 oster }
219 1.2 oster /* ok, we can do this I/O */
220 1.2 oster if (type == RF_IO_TYPE_READ) {
221 1.2 oster switch (ndfail) {
222 1.2 oster case 0:
223 1.2 oster /* fault free read */
224 1.2 oster *createFunc = (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG; /* same as raid 5 */
225 1.2 oster break;
226 1.2 oster case 1:
227 1.2 oster /* lost a single data unit */
228 1.2 oster /* two cases: (1) parity is not lost. do a normal raid
229 1.2 oster * 5 reconstruct read. (2) parity is lost. do a
230 1.2 oster * reconstruct read using "e". */
231 1.2 oster if (ntfail == 2) { /* also lost redundancy */
232 1.2 oster if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY)
233 1.2 oster *createFunc = (RF_VoidFuncPtr) rf_EO_110_CreateReadDAG;
234 1.2 oster else
235 1.2 oster *createFunc = (RF_VoidFuncPtr) rf_EO_101_CreateReadDAG;
236 1.2 oster } else {
237 1.2 oster /* P and E are ok. But is there a failure in
238 1.2 oster * some unaccessed data unit? */
239 1.2 oster if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2)
240 1.2 oster *createFunc = (RF_VoidFuncPtr) rf_EO_200_CreateReadDAG;
241 1.2 oster else
242 1.2 oster *createFunc = (RF_VoidFuncPtr) rf_EO_100_CreateReadDAG;
243 1.2 oster }
244 1.2 oster break;
245 1.2 oster case 2:
246 1.2 oster /* *createFunc = rf_EO_200_CreateReadDAG; */
247 1.2 oster *createFunc = NULL;
248 1.2 oster break;
249 1.2 oster }
250 1.2 oster return;
251 1.2 oster }
252 1.2 oster /* a write */
253 1.2 oster switch (ntfail) {
254 1.2 oster case 0: /* fault free */
255 1.2 oster if (rf_suppressLocksAndLargeWrites ||
256 1.2 oster (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) ||
257 1.2 oster (asmap->parityInfo->next != NULL) || (asmap->qInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) {
258 1.2 oster
259 1.2 oster *createFunc = (RF_VoidFuncPtr) rf_EOCreateSmallWriteDAG;
260 1.2 oster } else {
261 1.2 oster *createFunc = (RF_VoidFuncPtr) rf_EOCreateLargeWriteDAG;
262 1.2 oster }
263 1.2 oster break;
264 1.2 oster
265 1.2 oster case 1: /* single disk fault */
266 1.2 oster if (npfail == 1) {
267 1.2 oster RF_ASSERT((asmap->failedPDAs[0]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q));
268 1.2 oster if (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q) { /* q died, treat like
269 1.2 oster * normal mode raid5
270 1.2 oster * write. */
271 1.2 oster if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
272 1.2 oster || (asmap->parityInfo->next != NULL) || rf_NumFailedDataUnitsInStripe(raidPtr, asmap))
273 1.2 oster *createFunc = (RF_VoidFuncPtr) rf_EO_001_CreateSmallWriteDAG;
274 1.2 oster else
275 1.2 oster *createFunc = (RF_VoidFuncPtr) rf_EO_001_CreateLargeWriteDAG;
276 1.2 oster } else {/* parity died, small write only updating Q */
277 1.2 oster if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
278 1.2 oster || (asmap->qInfo->next != NULL) || rf_NumFailedDataUnitsInStripe(raidPtr, asmap))
279 1.2 oster *createFunc = (RF_VoidFuncPtr) rf_EO_010_CreateSmallWriteDAG;
280 1.2 oster else
281 1.2 oster *createFunc = (RF_VoidFuncPtr) rf_EO_010_CreateLargeWriteDAG;
282 1.2 oster }
283 1.2 oster } else { /* data missing. Do a P reconstruct write if
284 1.2 oster * only a single data unit is lost in the
285 1.2 oster * stripe, otherwise a reconstruct write which
286 1.2 oster * employnig both P and E units. */
287 1.2 oster if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2) {
288 1.2 oster if (asmap->numStripeUnitsAccessed == 1)
289 1.2 oster *createFunc = (RF_VoidFuncPtr) rf_EO_200_CreateWriteDAG;
290 1.2 oster else
291 1.2 oster *createFunc = NULL; /* No direct support for
292 1.2 oster * this case now, like
293 1.2 oster * that in Raid-5 */
294 1.2 oster } else {
295 1.2 oster if (asmap->numStripeUnitsAccessed != 1 && asmap->failedPDAs[0]->numSector != layoutPtr->sectorsPerStripeUnit)
296 1.2 oster *createFunc = NULL; /* No direct support for
297 1.2 oster * this case now, like
298 1.2 oster * that in Raid-5 */
299 1.2 oster else
300 1.2 oster *createFunc = (RF_VoidFuncPtr) rf_EO_100_CreateWriteDAG;
301 1.2 oster }
302 1.2 oster }
303 1.2 oster break;
304 1.2 oster
305 1.2 oster case 2: /* two disk faults */
306 1.2 oster switch (npfail) {
307 1.2 oster case 2: /* both p and q dead */
308 1.2 oster *createFunc = (RF_VoidFuncPtr) rf_EO_011_CreateWriteDAG;
309 1.2 oster break;
310 1.2 oster case 1: /* either p or q and dead data */
311 1.2 oster RF_ASSERT(asmap->failedPDAs[0]->type == RF_PDA_TYPE_DATA);
312 1.2 oster RF_ASSERT((asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q));
313 1.2 oster if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q) {
314 1.2 oster if (asmap->numStripeUnitsAccessed != 1 && asmap->failedPDAs[0]->numSector != layoutPtr->sectorsPerStripeUnit)
315 1.2 oster *createFunc = NULL; /* In both PQ and
316 1.2 oster * EvenOdd, no direct
317 1.2 oster * support for this case
318 1.2 oster * now, like that in
319 1.2 oster * Raid-5 */
320 1.2 oster else
321 1.2 oster *createFunc = (RF_VoidFuncPtr) rf_EO_101_CreateWriteDAG;
322 1.2 oster } else {
323 1.2 oster if (asmap->numStripeUnitsAccessed != 1 && asmap->failedPDAs[0]->numSector != layoutPtr->sectorsPerStripeUnit)
324 1.2 oster *createFunc = NULL; /* No direct support for
325 1.2 oster * this case, like that
326 1.2 oster * in Raid-5 */
327 1.2 oster else
328 1.2 oster *createFunc = (RF_VoidFuncPtr) rf_EO_110_CreateWriteDAG;
329 1.2 oster }
330 1.2 oster break;
331 1.2 oster case 0: /* double data loss */
332 1.2 oster /* if(asmap->failedPDAs[0]->numSector +
333 1.2 oster * asmap->failedPDAs[1]->numSector == 2 *
334 1.2 oster * layoutPtr->sectorsPerStripeUnit ) createFunc =
335 1.2 oster * rf_EOCreateLargeWriteDAG; else */
336 1.2 oster *createFunc = NULL; /* currently, in Evenodd, No
337 1.2 oster * support for simultaneous
338 1.2 oster * access of both failed SUs */
339 1.2 oster break;
340 1.2 oster }
341 1.2 oster break;
342 1.2 oster
343 1.2 oster default: /* more than 2 disk faults */
344 1.2 oster *createFunc = NULL;
345 1.2 oster RF_PANIC();
346 1.2 oster }
347 1.2 oster return;
348 1.1 oster }
349 1.1 oster
350 1.1 oster
351 1.2 oster int
352 1.2 oster rf_VerifyParityEvenOdd(raidPtr, raidAddr, parityPDA, correct_it, flags)
353 1.2 oster RF_Raid_t *raidPtr;
354 1.2 oster RF_RaidAddr_t raidAddr;
355 1.2 oster RF_PhysDiskAddr_t *parityPDA;
356 1.2 oster int correct_it;
357 1.2 oster RF_RaidAccessFlags_t flags;
358 1.1 oster {
359 1.2 oster RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
360 1.2 oster RF_RaidAddr_t startAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, raidAddr);
361 1.2 oster RF_SectorCount_t numsector = parityPDA->numSector;
362 1.2 oster int numbytes = rf_RaidAddressToByte(raidPtr, numsector);
363 1.2 oster int bytesPerStripe = numbytes * layoutPtr->numDataCol;
364 1.2 oster RF_DagHeader_t *rd_dag_h, *wr_dag_h; /* read, write dag */
365 1.2 oster RF_DagNode_t *blockNode, *unblockNode, *wrBlock, *wrUnblock;
366 1.2 oster RF_AccessStripeMapHeader_t *asm_h;
367 1.2 oster RF_AccessStripeMap_t *asmap;
368 1.2 oster RF_AllocListElem_t *alloclist;
369 1.2 oster RF_PhysDiskAddr_t *pda;
370 1.2 oster char *pbuf, *buf, *end_p, *p;
371 1.2 oster char *redundantbuf2;
372 1.2 oster int redundantTwoErr = 0, redundantOneErr = 0;
373 1.2 oster int parity_cant_correct = RF_FALSE, red2_cant_correct = RF_FALSE,
374 1.2 oster parity_corrected = RF_FALSE, red2_corrected = RF_FALSE;
375 1.2 oster int i, retcode;
376 1.2 oster RF_ReconUnitNum_t which_ru;
377 1.2 oster RF_StripeNum_t psID = rf_RaidAddressToParityStripeID(layoutPtr, raidAddr, &which_ru);
378 1.2 oster int stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol;
379 1.2 oster RF_AccTraceEntry_t tracerec;
380 1.2 oster RF_MCPair_t *mcpair;
381 1.2 oster
382 1.2 oster retcode = RF_PARITY_OKAY;
383 1.2 oster
384 1.2 oster mcpair = rf_AllocMCPair();
385 1.2 oster rf_MakeAllocList(alloclist);
386 1.2 oster RF_MallocAndAdd(buf, numbytes * (layoutPtr->numDataCol + layoutPtr->numParityCol), (char *), alloclist);
387 1.2 oster RF_CallocAndAdd(pbuf, 1, numbytes, (char *), alloclist); /* use calloc to make
388 1.2 oster * sure buffer is zeroed */
389 1.2 oster end_p = buf + bytesPerStripe;
390 1.2 oster RF_CallocAndAdd(redundantbuf2, 1, numbytes, (char *), alloclist); /* use calloc to make
391 1.2 oster * sure buffer is zeroed */
392 1.2 oster
393 1.2 oster rd_dag_h = rf_MakeSimpleDAG(raidPtr, stripeWidth, numbytes, buf, rf_DiskReadFunc, rf_DiskReadUndoFunc,
394 1.2 oster "Rod", alloclist, flags, RF_IO_NORMAL_PRIORITY);
395 1.2 oster blockNode = rd_dag_h->succedents[0];
396 1.2 oster unblockNode = blockNode->succedents[0]->succedents[0];
397 1.2 oster
398 1.2 oster /* map the stripe and fill in the PDAs in the dag */
399 1.2 oster asm_h = rf_MapAccess(raidPtr, startAddr, layoutPtr->dataSectorsPerStripe, buf, RF_DONT_REMAP);
400 1.2 oster asmap = asm_h->stripeMap;
401 1.2 oster
402 1.2 oster for (pda = asmap->physInfo, i = 0; i < layoutPtr->numDataCol; i++, pda = pda->next) {
403 1.2 oster RF_ASSERT(pda);
404 1.2 oster rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1);
405 1.2 oster RF_ASSERT(pda->numSector != 0);
406 1.2 oster if (rf_TryToRedirectPDA(raidPtr, pda, 0))
407 1.2 oster goto out; /* no way to verify parity if disk is
408 1.2 oster * dead. return w/ good status */
409 1.2 oster blockNode->succedents[i]->params[0].p = pda;
410 1.2 oster blockNode->succedents[i]->params[2].v = psID;
411 1.2 oster blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
412 1.2 oster }
413 1.2 oster
414 1.2 oster RF_ASSERT(!asmap->parityInfo->next);
415 1.2 oster rf_RangeRestrictPDA(raidPtr, parityPDA, asmap->parityInfo, 0, 1);
416 1.2 oster RF_ASSERT(asmap->parityInfo->numSector != 0);
417 1.2 oster if (rf_TryToRedirectPDA(raidPtr, asmap->parityInfo, 1))
418 1.2 oster goto out;
419 1.2 oster blockNode->succedents[layoutPtr->numDataCol]->params[0].p = asmap->parityInfo;
420 1.2 oster
421 1.2 oster RF_ASSERT(!asmap->qInfo->next);
422 1.2 oster rf_RangeRestrictPDA(raidPtr, parityPDA, asmap->qInfo, 0, 1);
423 1.2 oster RF_ASSERT(asmap->qInfo->numSector != 0);
424 1.2 oster if (rf_TryToRedirectPDA(raidPtr, asmap->qInfo, 1))
425 1.2 oster goto out;
426 1.2 oster /* if disk is dead, b/c no reconstruction is implemented right now,
427 1.2 oster * the function "rf_TryToRedirectPDA" always return one, which cause
428 1.2 oster * go to out and return w/ good status */
429 1.2 oster blockNode->succedents[layoutPtr->numDataCol + 1]->params[0].p = asmap->qInfo;
430 1.2 oster
431 1.2 oster /* fire off the DAG */
432 1.4.6.1 nathanw memset((char *) &tracerec, 0, sizeof(tracerec));
433 1.2 oster rd_dag_h->tracerec = &tracerec;
434 1.2 oster
435 1.4.6.4 nathanw #if RF_DEBUG_VALIDATE_DAG
436 1.2 oster if (rf_verifyParityDebug) {
437 1.2 oster printf("Parity verify read dag:\n");
438 1.2 oster rf_PrintDAGList(rd_dag_h);
439 1.2 oster }
440 1.4.6.4 nathanw #endif
441 1.2 oster RF_LOCK_MUTEX(mcpair->mutex);
442 1.2 oster mcpair->flag = 0;
443 1.2 oster rf_DispatchDAG(rd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
444 1.2 oster (void *) mcpair);
445 1.2 oster while (!mcpair->flag)
446 1.2 oster RF_WAIT_COND(mcpair->cond, mcpair->mutex);
447 1.2 oster RF_UNLOCK_MUTEX(mcpair->mutex);
448 1.2 oster if (rd_dag_h->status != rf_enable) {
449 1.2 oster RF_ERRORMSG("Unable to verify parity: can't read the stripe\n");
450 1.2 oster retcode = RF_PARITY_COULD_NOT_VERIFY;
451 1.2 oster goto out;
452 1.2 oster }
453 1.2 oster for (p = buf, i = 0; p < end_p; p += numbytes, i++) {
454 1.2 oster rf_e_encToBuf(raidPtr, i, p, RF_EO_MATRIX_DIM - 2, redundantbuf2, numsector);
455 1.2 oster /* the corresponding columes in EvenOdd encoding Matrix for
456 1.2 oster * these p pointers which point to the databuffer in a full
457 1.2 oster * stripe are sequentially from 0 to layoutPtr->numDataCol-1 */
458 1.2 oster rf_bxor(p, pbuf, numbytes, NULL);
459 1.2 oster }
460 1.2 oster RF_ASSERT(i == layoutPtr->numDataCol);
461 1.2 oster
462 1.2 oster for (i = 0; i < numbytes; i++) {
463 1.2 oster if (pbuf[i] != buf[bytesPerStripe + i]) {
464 1.2 oster if (!correct_it) {
465 1.2 oster RF_ERRORMSG3("Parity verify error: byte %d of parity is 0x%x should be 0x%x\n",
466 1.2 oster i, (u_char) buf[bytesPerStripe + i], (u_char) pbuf[i]);
467 1.2 oster }
468 1.2 oster }
469 1.2 oster redundantOneErr = 1;
470 1.2 oster break;
471 1.2 oster }
472 1.2 oster
473 1.2 oster for (i = 0; i < numbytes; i++) {
474 1.2 oster if (redundantbuf2[i] != buf[bytesPerStripe + numbytes + i]) {
475 1.2 oster if (!correct_it) {
476 1.2 oster RF_ERRORMSG3("Parity verify error: byte %d of second redundant information is 0x%x should be 0x%x\n",
477 1.2 oster i, (u_char) buf[bytesPerStripe + numbytes + i], (u_char) redundantbuf2[i]);
478 1.2 oster }
479 1.2 oster redundantTwoErr = 1;
480 1.2 oster break;
481 1.2 oster }
482 1.2 oster }
483 1.2 oster if (redundantOneErr || redundantTwoErr)
484 1.2 oster retcode = RF_PARITY_BAD;
485 1.2 oster
486 1.2 oster /* correct the first redundant disk, ie parity if it is error */
487 1.2 oster if (redundantOneErr && correct_it) {
488 1.2 oster wr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, numbytes, pbuf, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
489 1.2 oster "Wnp", alloclist, flags, RF_IO_NORMAL_PRIORITY);
490 1.2 oster wrBlock = wr_dag_h->succedents[0];
491 1.2 oster wrUnblock = wrBlock->succedents[0]->succedents[0];
492 1.2 oster wrBlock->succedents[0]->params[0].p = asmap->parityInfo;
493 1.2 oster wrBlock->succedents[0]->params[2].v = psID;
494 1.2 oster wrBlock->succedents[0]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
495 1.4.6.1 nathanw memset((char *) &tracerec, 0, sizeof(tracerec));
496 1.2 oster wr_dag_h->tracerec = &tracerec;
497 1.4.6.4 nathanw #if RF_DEBUG_VALIDATE_DAG
498 1.2 oster if (rf_verifyParityDebug) {
499 1.2 oster printf("Parity verify write dag:\n");
500 1.2 oster rf_PrintDAGList(wr_dag_h);
501 1.2 oster }
502 1.4.6.4 nathanw #endif
503 1.2 oster RF_LOCK_MUTEX(mcpair->mutex);
504 1.2 oster mcpair->flag = 0;
505 1.2 oster rf_DispatchDAG(wr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
506 1.2 oster (void *) mcpair);
507 1.2 oster while (!mcpair->flag)
508 1.2 oster RF_WAIT_COND(mcpair->cond, mcpair->mutex);
509 1.2 oster RF_UNLOCK_MUTEX(mcpair->mutex);
510 1.2 oster if (wr_dag_h->status != rf_enable) {
511 1.2 oster RF_ERRORMSG("Unable to correct parity in VerifyParity: can't write the stripe\n");
512 1.2 oster parity_cant_correct = RF_TRUE;
513 1.2 oster } else {
514 1.2 oster parity_corrected = RF_TRUE;
515 1.2 oster }
516 1.2 oster rf_FreeDAG(wr_dag_h);
517 1.2 oster }
518 1.2 oster if (redundantTwoErr && correct_it) {
519 1.2 oster wr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, numbytes, redundantbuf2, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
520 1.2 oster "Wnred2", alloclist, flags, RF_IO_NORMAL_PRIORITY);
521 1.2 oster wrBlock = wr_dag_h->succedents[0];
522 1.2 oster wrUnblock = wrBlock->succedents[0]->succedents[0];
523 1.2 oster wrBlock->succedents[0]->params[0].p = asmap->qInfo;
524 1.2 oster wrBlock->succedents[0]->params[2].v = psID;
525 1.2 oster wrBlock->succedents[0]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
526 1.4.6.1 nathanw memset((char *) &tracerec, 0, sizeof(tracerec));
527 1.2 oster wr_dag_h->tracerec = &tracerec;
528 1.4.6.4 nathanw #if RF_DEBUG_VALIDATE_DAG
529 1.2 oster if (rf_verifyParityDebug) {
530 1.2 oster printf("Dag of write new second redundant information in parity verify :\n");
531 1.2 oster rf_PrintDAGList(wr_dag_h);
532 1.2 oster }
533 1.4.6.4 nathanw #endif
534 1.2 oster RF_LOCK_MUTEX(mcpair->mutex);
535 1.2 oster mcpair->flag = 0;
536 1.2 oster rf_DispatchDAG(wr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
537 1.2 oster (void *) mcpair);
538 1.2 oster while (!mcpair->flag)
539 1.2 oster RF_WAIT_COND(mcpair->cond, mcpair->mutex);
540 1.2 oster RF_UNLOCK_MUTEX(mcpair->mutex);
541 1.2 oster if (wr_dag_h->status != rf_enable) {
542 1.2 oster RF_ERRORMSG("Unable to correct second redundant information in VerifyParity: can't write the stripe\n");
543 1.2 oster red2_cant_correct = RF_TRUE;
544 1.2 oster } else {
545 1.2 oster red2_corrected = RF_TRUE;
546 1.2 oster }
547 1.2 oster rf_FreeDAG(wr_dag_h);
548 1.2 oster }
549 1.2 oster if ((redundantOneErr && parity_cant_correct) ||
550 1.2 oster (redundantTwoErr && red2_cant_correct))
551 1.2 oster retcode = RF_PARITY_COULD_NOT_CORRECT;
552 1.2 oster if ((retcode = RF_PARITY_BAD) && parity_corrected && red2_corrected)
553 1.2 oster retcode = RF_PARITY_CORRECTED;
554 1.1 oster
555 1.1 oster
556 1.1 oster out:
557 1.2 oster rf_FreeAccessStripeMap(asm_h);
558 1.2 oster rf_FreeAllocList(alloclist);
559 1.2 oster rf_FreeDAG(rd_dag_h);
560 1.2 oster rf_FreeMCPair(mcpair);
561 1.2 oster return (retcode);
562 1.1 oster }
563 1.2 oster #endif /* RF_INCLUDE_EVENODD > 0 */
564