rf_evenodd_dagfuncs.c revision 1.18.46.1 1 1.18.46.1 haad /* $NetBSD: rf_evenodd_dagfuncs.c,v 1.18.46.1 2008/12/13 01:14:48 haad Exp $ */
2 1.1 oster /*
3 1.1 oster * Copyright (c) 1995 Carnegie-Mellon University.
4 1.1 oster * All rights reserved.
5 1.1 oster *
6 1.1 oster * Author: ChangMing Wu
7 1.1 oster *
8 1.1 oster * Permission to use, copy, modify and distribute this software and
9 1.1 oster * its documentation is hereby granted, provided that both the copyright
10 1.1 oster * notice and this permission notice appear in all copies of the
11 1.1 oster * software, derivative works or modified versions, and any portions
12 1.1 oster * thereof, and that both notices appear in supporting documentation.
13 1.1 oster *
14 1.1 oster * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 1.1 oster * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 1.1 oster * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 1.1 oster *
18 1.1 oster * Carnegie Mellon requests users of this software to return to
19 1.1 oster *
20 1.1 oster * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 1.1 oster * School of Computer Science
22 1.1 oster * Carnegie Mellon University
23 1.1 oster * Pittsburgh PA 15213-3890
24 1.1 oster *
25 1.1 oster * any improvements or extensions that they make and grant Carnegie the
26 1.1 oster * rights to redistribute these changes.
27 1.1 oster */
28 1.1 oster
29 1.1 oster /*
30 1.1 oster * Code for RAID-EVENODD architecture.
31 1.1 oster */
32 1.11 lukem
33 1.11 lukem #include <sys/cdefs.h>
34 1.18.46.1 haad __KERNEL_RCSID(0, "$NetBSD: rf_evenodd_dagfuncs.c,v 1.18.46.1 2008/12/13 01:14:48 haad Exp $");
35 1.1 oster
36 1.7 oster #include "rf_archs.h"
37 1.18.46.1 haad
38 1.18.46.1 haad #ifdef _KERNEL_OPT
39 1.12 martin #include "opt_raid_diagnostic.h"
40 1.18.46.1 haad #endif
41 1.7 oster
42 1.7 oster #if RF_INCLUDE_EVENODD > 0
43 1.7 oster
44 1.10 oster #include <dev/raidframe/raidframevar.h>
45 1.10 oster
46 1.1 oster #include "rf_raid.h"
47 1.1 oster #include "rf_dag.h"
48 1.1 oster #include "rf_dagffrd.h"
49 1.1 oster #include "rf_dagffwr.h"
50 1.1 oster #include "rf_dagdegrd.h"
51 1.1 oster #include "rf_dagdegwr.h"
52 1.1 oster #include "rf_dagutils.h"
53 1.1 oster #include "rf_dagfuncs.h"
54 1.1 oster #include "rf_etimer.h"
55 1.1 oster #include "rf_general.h"
56 1.1 oster #include "rf_parityscan.h"
57 1.1 oster #include "rf_evenodd.h"
58 1.1 oster #include "rf_evenodd_dagfuncs.h"
59 1.1 oster
60 1.1 oster /* These redundant functions are for small write */
61 1.2 oster RF_RedFuncs_t rf_EOSmallWritePFuncs = {rf_RegularXorFunc, "Regular Old-New P", rf_SimpleXorFunc, "Simple Old-New P"};
62 1.2 oster RF_RedFuncs_t rf_EOSmallWriteEFuncs = {rf_RegularONEFunc, "Regular Old-New E", rf_SimpleONEFunc, "Regular Old-New E"};
63 1.1 oster /* These redundant functions are for degraded read */
64 1.2 oster RF_RedFuncs_t rf_eoPRecoveryFuncs = {rf_RecoveryXorFunc, "Recovery Xr", rf_RecoveryXorFunc, "Recovery Xr"};
65 1.2 oster RF_RedFuncs_t rf_eoERecoveryFuncs = {rf_RecoveryEFunc, "Recovery E Func", rf_RecoveryEFunc, "Recovery E Func"};
66 1.1 oster /**********************************************************************************************
67 1.2 oster * the following encoding node functions is used in EO_000_CreateLargeWriteDAG
68 1.1 oster **********************************************************************************************/
69 1.14 perry int
70 1.2 oster rf_RegularPEFunc(node)
71 1.2 oster RF_DagNode_t *node;
72 1.1 oster {
73 1.2 oster rf_RegularESubroutine(node, node->results[1]);
74 1.2 oster rf_RegularXorFunc(node);/* does the wakeup here! */
75 1.1 oster #if 1
76 1.2 oster return (0); /* XXX This was missing... GO */
77 1.1 oster #endif
78 1.1 oster }
79 1.1 oster
80 1.1 oster
81 1.1 oster /************************************************************************************************
82 1.1 oster * For EO_001_CreateSmallWriteDAG, there are (i)RegularONEFunc() and (ii)SimpleONEFunc() to
83 1.1 oster * be used. The previous case is when write access at least sectors of full stripe unit.
84 1.1 oster * The later function is used when the write access two stripe units but with total sectors
85 1.1 oster * less than sectors per SU. In this case, the access of parity and 'E' are shown as disconnected
86 1.1 oster * areas in their stripe unit and parity write and 'E' write are both devided into two distinct
87 1.1 oster * writes( totally four). This simple old-new write and regular old-new write happen as in RAID-5
88 1.1 oster ************************************************************************************************/
89 1.1 oster
90 1.2 oster /* Algorithm:
91 1.1 oster 1. Store the difference of old data and new data in the Rod buffer.
92 1.2 oster 2. then encode this buffer into the buffer which already have old 'E' information inside it,
93 1.1 oster the result can be shown to be the new 'E' information.
94 1.1 oster 3. xor the Wnd buffer into the difference buffer to recover the original old data.
95 1.2 oster Here we have another alternative: to allocate a temporary buffer for storing the difference of
96 1.2 oster old data and new data, then encode temp buf into old 'E' buf to form new 'E', but this approach
97 1.1 oster take the same speed as the previous, and need more memory.
98 1.1 oster */
99 1.14 perry int
100 1.2 oster rf_RegularONEFunc(node)
101 1.2 oster RF_DagNode_t *node;
102 1.2 oster {
103 1.2 oster RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
104 1.2 oster RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
105 1.2 oster int EpdaIndex = (node->numParams - 1) / 2 - 1; /* the parameter of node
106 1.2 oster * where you can find
107 1.2 oster * e-pda */
108 1.2 oster int i, k, retcode = 0;
109 1.2 oster int suoffset, length;
110 1.2 oster RF_RowCol_t scol;
111 1.2 oster char *srcbuf, *destbuf;
112 1.2 oster RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
113 1.2 oster RF_Etimer_t timer;
114 1.9 thorpej RF_PhysDiskAddr_t *pda;
115 1.9 thorpej #ifdef RAID_DIAGNOSTIC
116 1.9 thorpej RF_PhysDiskAddr_t *EPDA =
117 1.9 thorpej (RF_PhysDiskAddr_t *) node->params[EpdaIndex].p;
118 1.9 thorpej int ESUOffset = rf_StripeUnitOffset(layoutPtr, EPDA->startSector);
119 1.9 thorpej #endif /* RAID_DIAGNOSTIC */
120 1.2 oster
121 1.2 oster RF_ASSERT(EPDA->type == RF_PDA_TYPE_Q);
122 1.2 oster RF_ASSERT(ESUOffset == 0);
123 1.2 oster
124 1.2 oster RF_ETIMER_START(timer);
125 1.2 oster
126 1.2 oster /* Xor the Wnd buffer into Rod buffer, the difference of old data and
127 1.2 oster * new data is stored in Rod buffer */
128 1.2 oster for (k = 0; k < EpdaIndex; k += 2) {
129 1.2 oster length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector);
130 1.16 oster retcode = rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length);
131 1.2 oster }
132 1.2 oster /* Start to encoding the buffer storing the difference of old data and
133 1.2 oster * new data into 'E' buffer */
134 1.2 oster for (i = 0; i < EpdaIndex; i += 2)
135 1.2 oster if (node->params[i + 1].p != node->results[0]) { /* results[0] is buf ptr
136 1.2 oster * of E */
137 1.2 oster pda = (RF_PhysDiskAddr_t *) node->params[i].p;
138 1.2 oster srcbuf = (char *) node->params[i + 1].p;
139 1.2 oster scol = rf_EUCol(layoutPtr, pda->raidAddress);
140 1.2 oster suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
141 1.2 oster destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset);
142 1.2 oster rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
143 1.2 oster }
144 1.2 oster /* Recover the original old data to be used by parity encoding
145 1.2 oster * function in XorNode */
146 1.2 oster for (k = 0; k < EpdaIndex; k += 2) {
147 1.2 oster length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector);
148 1.16 oster retcode = rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length);
149 1.2 oster }
150 1.2 oster RF_ETIMER_STOP(timer);
151 1.2 oster RF_ETIMER_EVAL(timer);
152 1.2 oster tracerec->q_us += RF_ETIMER_VAL_US(timer);
153 1.2 oster rf_GenericWakeupFunc(node, 0);
154 1.1 oster #if 1
155 1.2 oster return (0); /* XXX this was missing.. GO */
156 1.1 oster #endif
157 1.1 oster }
158 1.1 oster
159 1.14 perry int
160 1.2 oster rf_SimpleONEFunc(node)
161 1.2 oster RF_DagNode_t *node;
162 1.2 oster {
163 1.2 oster RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
164 1.2 oster RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
165 1.2 oster RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
166 1.2 oster int retcode = 0;
167 1.2 oster char *srcbuf, *destbuf;
168 1.2 oster RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
169 1.2 oster int length;
170 1.2 oster RF_RowCol_t scol;
171 1.2 oster RF_Etimer_t timer;
172 1.2 oster
173 1.2 oster RF_ASSERT(((RF_PhysDiskAddr_t *) node->params[2].p)->type == RF_PDA_TYPE_Q);
174 1.2 oster if (node->dagHdr->status == rf_enable) {
175 1.2 oster RF_ETIMER_START(timer);
176 1.2 oster length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[4].p)->numSector); /* this is a pda of
177 1.2 oster * writeDataNodes */
178 1.2 oster /* bxor to buffer of readDataNodes */
179 1.16 oster retcode = rf_bxor(node->params[5].p, node->params[1].p, length);
180 1.2 oster /* find out the corresponding colume in encoding matrix for
181 1.2 oster * write colume to be encoded into redundant disk 'E' */
182 1.2 oster scol = rf_EUCol(layoutPtr, pda->raidAddress);
183 1.2 oster srcbuf = node->params[1].p;
184 1.2 oster destbuf = node->params[3].p;
185 1.2 oster /* Start encoding process */
186 1.2 oster rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
187 1.16 oster rf_bxor(node->params[5].p, node->params[1].p, length);
188 1.2 oster RF_ETIMER_STOP(timer);
189 1.2 oster RF_ETIMER_EVAL(timer);
190 1.2 oster tracerec->q_us += RF_ETIMER_VAL_US(timer);
191 1.2 oster
192 1.2 oster }
193 1.2 oster return (rf_GenericWakeupFunc(node, retcode)); /* call wake func
194 1.2 oster * explicitly since no
195 1.2 oster * I/O in this node */
196 1.1 oster }
197 1.1 oster
198 1.1 oster
199 1.1 oster /****** called by rf_RegularPEFunc(node) and rf_RegularEFunc(node) in f.f. large write ********/
200 1.14 perry void
201 1.2 oster rf_RegularESubroutine(node, ebuf)
202 1.2 oster RF_DagNode_t *node;
203 1.2 oster char *ebuf;
204 1.2 oster {
205 1.2 oster RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
206 1.2 oster RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
207 1.2 oster RF_PhysDiskAddr_t *pda;
208 1.2 oster int i, suoffset;
209 1.2 oster RF_RowCol_t scol;
210 1.2 oster char *srcbuf, *destbuf;
211 1.2 oster RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
212 1.2 oster RF_Etimer_t timer;
213 1.2 oster
214 1.2 oster RF_ETIMER_START(timer);
215 1.2 oster for (i = 0; i < node->numParams - 2; i += 2) {
216 1.2 oster RF_ASSERT(node->params[i + 1].p != ebuf);
217 1.2 oster pda = (RF_PhysDiskAddr_t *) node->params[i].p;
218 1.2 oster suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
219 1.2 oster scol = rf_EUCol(layoutPtr, pda->raidAddress);
220 1.2 oster srcbuf = (char *) node->params[i + 1].p;
221 1.2 oster destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset);
222 1.2 oster rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
223 1.2 oster }
224 1.2 oster RF_ETIMER_STOP(timer);
225 1.2 oster RF_ETIMER_EVAL(timer);
226 1.2 oster tracerec->xor_us += RF_ETIMER_VAL_US(timer);
227 1.1 oster }
228 1.1 oster
229 1.1 oster
230 1.1 oster /*******************************************************************************************
231 1.2 oster * Used in EO_001_CreateLargeWriteDAG
232 1.1 oster ******************************************************************************************/
233 1.14 perry int
234 1.2 oster rf_RegularEFunc(node)
235 1.2 oster RF_DagNode_t *node;
236 1.1 oster {
237 1.2 oster rf_RegularESubroutine(node, node->results[0]);
238 1.2 oster rf_GenericWakeupFunc(node, 0);
239 1.1 oster #if 1
240 1.2 oster return (0); /* XXX this was missing?.. GO */
241 1.1 oster #endif
242 1.1 oster }
243 1.1 oster /*******************************************************************************************
244 1.2 oster * This degraded function allow only two case:
245 1.2 oster * 1. when write access the full failed stripe unit, then the access can be more than
246 1.1 oster * one tripe units.
247 1.2 oster * 2. when write access only part of the failed SU, we assume accesses of more than
248 1.2 oster * one stripe unit is not allowed so that the write can be dealt with like a
249 1.2 oster * large write.
250 1.2 oster * The following function is based on these assumptions. So except in the second case,
251 1.1 oster * it looks the same as a large write encodeing function. But this is not exactly the
252 1.2 oster * normal way for doing a degraded write, since raidframe have to break cases of access
253 1.2 oster * other than the above two into smaller accesses. We may have to change
254 1.2 oster * DegrESubroutin in the future.
255 1.1 oster *******************************************************************************************/
256 1.14 perry void
257 1.2 oster rf_DegrESubroutine(node, ebuf)
258 1.2 oster RF_DagNode_t *node;
259 1.2 oster char *ebuf;
260 1.2 oster {
261 1.2 oster RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
262 1.2 oster RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
263 1.2 oster RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
264 1.2 oster RF_PhysDiskAddr_t *pda;
265 1.2 oster int i, suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
266 1.2 oster RF_RowCol_t scol;
267 1.2 oster char *srcbuf, *destbuf;
268 1.2 oster RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
269 1.2 oster RF_Etimer_t timer;
270 1.2 oster
271 1.2 oster RF_ETIMER_START(timer);
272 1.2 oster for (i = 0; i < node->numParams - 2; i += 2) {
273 1.2 oster RF_ASSERT(node->params[i + 1].p != ebuf);
274 1.2 oster pda = (RF_PhysDiskAddr_t *) node->params[i].p;
275 1.2 oster suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
276 1.2 oster scol = rf_EUCol(layoutPtr, pda->raidAddress);
277 1.2 oster srcbuf = (char *) node->params[i + 1].p;
278 1.2 oster destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
279 1.2 oster rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
280 1.2 oster }
281 1.2 oster
282 1.2 oster RF_ETIMER_STOP(timer);
283 1.2 oster RF_ETIMER_EVAL(timer);
284 1.2 oster tracerec->q_us += RF_ETIMER_VAL_US(timer);
285 1.1 oster }
286 1.1 oster
287 1.1 oster
288 1.1 oster /**************************************************************************************
289 1.2 oster * This function is used in case where one data disk failed and both redundant disks
290 1.1 oster * alive. It is used in the EO_100_CreateWriteDAG. Note: if there is another disk
291 1.1 oster * failed in the stripe but not accessed at this time, then we should, instead, use
292 1.1 oster * the rf_EOWriteDoubleRecoveryFunc().
293 1.1 oster **************************************************************************************/
294 1.14 perry int
295 1.2 oster rf_Degraded_100_EOFunc(node)
296 1.2 oster RF_DagNode_t *node;
297 1.1 oster {
298 1.2 oster rf_DegrESubroutine(node, node->results[1]);
299 1.2 oster rf_RecoveryXorFunc(node); /* does the wakeup here! */
300 1.1 oster #if 1
301 1.2 oster return (0); /* XXX this was missing... SHould these be
302 1.2 oster * void functions??? GO */
303 1.1 oster #endif
304 1.1 oster }
305 1.1 oster /**************************************************************************************
306 1.1 oster * This function is to encode one sector in one of the data disks to the E disk.
307 1.2 oster * However, in evenodd this function can also be used as decoding function to recover
308 1.1 oster * data from dead disk in the case of parity failure and a single data failure.
309 1.1 oster **************************************************************************************/
310 1.14 perry void
311 1.2 oster rf_e_EncOneSect(
312 1.2 oster RF_RowCol_t srcLogicCol,
313 1.2 oster char *srcSecbuf,
314 1.2 oster RF_RowCol_t destLogicCol,
315 1.2 oster char *destSecbuf,
316 1.2 oster int bytesPerSector)
317 1.1 oster {
318 1.2 oster int S_index; /* index of the EU in the src col which need
319 1.2 oster * be Xored into all EUs in a dest sector */
320 1.2 oster int numRowInEncMatix = (RF_EO_MATRIX_DIM) - 1;
321 1.2 oster RF_RowCol_t j, indexInDest, /* row index of an encoding unit in
322 1.2 oster * the destination colume of encoding
323 1.2 oster * matrix */
324 1.2 oster indexInSrc; /* row index of an encoding unit in the source
325 1.2 oster * colume used for recovery */
326 1.2 oster int bytesPerEU = bytesPerSector / numRowInEncMatix;
327 1.1 oster
328 1.1 oster #if RF_EO_MATRIX_DIM > 17
329 1.2 oster int shortsPerEU = bytesPerEU / sizeof(short);
330 1.2 oster short *destShortBuf, *srcShortBuf1, *srcShortBuf2;
331 1.6 augustss short temp1;
332 1.1 oster #elif RF_EO_MATRIX_DIM == 17
333 1.2 oster int longsPerEU = bytesPerEU / sizeof(long);
334 1.2 oster long *destLongBuf, *srcLongBuf1, *srcLongBuf2;
335 1.6 augustss long temp1;
336 1.1 oster #endif
337 1.1 oster
338 1.1 oster #if RF_EO_MATRIX_DIM > 17
339 1.2 oster RF_ASSERT(sizeof(short) == 2 || sizeof(short) == 1);
340 1.2 oster RF_ASSERT(bytesPerEU % sizeof(short) == 0);
341 1.1 oster #elif RF_EO_MATRIX_DIM == 17
342 1.2 oster RF_ASSERT(sizeof(long) == 8 || sizeof(long) == 4);
343 1.2 oster RF_ASSERT(bytesPerEU % sizeof(long) == 0);
344 1.1 oster #endif
345 1.1 oster
346 1.2 oster S_index = rf_EO_Mod((RF_EO_MATRIX_DIM - 1 + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM);
347 1.1 oster #if RF_EO_MATRIX_DIM > 17
348 1.2 oster srcShortBuf1 = (short *) (srcSecbuf + S_index * bytesPerEU);
349 1.1 oster #elif RF_EO_MATRIX_DIM == 17
350 1.2 oster srcLongBuf1 = (long *) (srcSecbuf + S_index * bytesPerEU);
351 1.1 oster #endif
352 1.1 oster
353 1.2 oster for (indexInDest = 0; indexInDest < numRowInEncMatix; indexInDest++) {
354 1.2 oster indexInSrc = rf_EO_Mod((indexInDest + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM);
355 1.1 oster
356 1.1 oster #if RF_EO_MATRIX_DIM > 17
357 1.2 oster destShortBuf = (short *) (destSecbuf + indexInDest * bytesPerEU);
358 1.2 oster srcShortBuf2 = (short *) (srcSecbuf + indexInSrc * bytesPerEU);
359 1.2 oster for (j = 0; j < shortsPerEU; j++) {
360 1.2 oster temp1 = destShortBuf[j] ^ srcShortBuf1[j];
361 1.2 oster /* note: S_index won't be at the end row for any src
362 1.2 oster * col! */
363 1.2 oster if (indexInSrc != RF_EO_MATRIX_DIM - 1)
364 1.2 oster destShortBuf[j] = (srcShortBuf2[j]) ^ temp1;
365 1.2 oster /* if indexInSrc is at the end row, ie.
366 1.2 oster * RF_EO_MATRIX_DIM -1, then all elements are zero! */
367 1.2 oster else
368 1.2 oster destShortBuf[j] = temp1;
369 1.2 oster }
370 1.1 oster
371 1.1 oster #elif RF_EO_MATRIX_DIM == 17
372 1.2 oster destLongBuf = (long *) (destSecbuf + indexInDest * bytesPerEU);
373 1.2 oster srcLongBuf2 = (long *) (srcSecbuf + indexInSrc * bytesPerEU);
374 1.2 oster for (j = 0; j < longsPerEU; j++) {
375 1.2 oster temp1 = destLongBuf[j] ^ srcLongBuf1[j];
376 1.2 oster if (indexInSrc != RF_EO_MATRIX_DIM - 1)
377 1.2 oster destLongBuf[j] = (srcLongBuf2[j]) ^ temp1;
378 1.2 oster else
379 1.2 oster destLongBuf[j] = temp1;
380 1.2 oster }
381 1.1 oster #endif
382 1.2 oster }
383 1.1 oster }
384 1.1 oster
385 1.14 perry void
386 1.2 oster rf_e_encToBuf(
387 1.2 oster RF_Raid_t * raidPtr,
388 1.2 oster RF_RowCol_t srcLogicCol,
389 1.2 oster char *srcbuf,
390 1.2 oster RF_RowCol_t destLogicCol,
391 1.2 oster char *destbuf,
392 1.2 oster int numSector)
393 1.1 oster {
394 1.2 oster int i, bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
395 1.1 oster
396 1.2 oster for (i = 0; i < numSector; i++) {
397 1.2 oster rf_e_EncOneSect(srcLogicCol, srcbuf, destLogicCol, destbuf, bytesPerSector);
398 1.2 oster srcbuf += bytesPerSector;
399 1.2 oster destbuf += bytesPerSector;
400 1.2 oster }
401 1.1 oster }
402 1.2 oster /**************************************************************************************
403 1.2 oster * when parity die and one data die, We use second redundant information, 'E',
404 1.2 oster * to recover the data in dead disk. This function is used in the recovery node of
405 1.2 oster * for EO_110_CreateReadDAG
406 1.1 oster **************************************************************************************/
407 1.14 perry int
408 1.2 oster rf_RecoveryEFunc(node)
409 1.2 oster RF_DagNode_t *node;
410 1.2 oster {
411 1.2 oster RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
412 1.2 oster RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
413 1.2 oster RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
414 1.2 oster RF_RowCol_t scol, /* source logical column */
415 1.2 oster fcol = rf_EUCol(layoutPtr, failedPDA->raidAddress); /* logical column of
416 1.2 oster * failed SU */
417 1.2 oster int i;
418 1.2 oster RF_PhysDiskAddr_t *pda;
419 1.2 oster int suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
420 1.2 oster char *srcbuf, *destbuf;
421 1.2 oster RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
422 1.2 oster RF_Etimer_t timer;
423 1.2 oster
424 1.8 thorpej memset((char *) node->results[0], 0,
425 1.8 thorpej rf_RaidAddressToByte(raidPtr, failedPDA->numSector));
426 1.2 oster if (node->dagHdr->status == rf_enable) {
427 1.2 oster RF_ETIMER_START(timer);
428 1.2 oster for (i = 0; i < node->numParams - 2; i += 2)
429 1.2 oster if (node->params[i + 1].p != node->results[0]) {
430 1.2 oster pda = (RF_PhysDiskAddr_t *) node->params[i].p;
431 1.2 oster if (i == node->numParams - 4)
432 1.2 oster scol = RF_EO_MATRIX_DIM - 2; /* the colume of
433 1.2 oster * redundant E */
434 1.2 oster else
435 1.2 oster scol = rf_EUCol(layoutPtr, pda->raidAddress);
436 1.2 oster srcbuf = (char *) node->params[i + 1].p;
437 1.2 oster suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
438 1.2 oster destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
439 1.2 oster rf_e_encToBuf(raidPtr, scol, srcbuf, fcol, destbuf, pda->numSector);
440 1.2 oster }
441 1.2 oster RF_ETIMER_STOP(timer);
442 1.2 oster RF_ETIMER_EVAL(timer);
443 1.2 oster tracerec->xor_us += RF_ETIMER_VAL_US(timer);
444 1.2 oster }
445 1.2 oster return (rf_GenericWakeupFunc(node, 0)); /* node execute successfully */
446 1.1 oster }
447 1.1 oster /**************************************************************************************
448 1.1 oster * This function is used in the case where one data and the parity have filed.
449 1.1 oster * (in EO_110_CreateWriteDAG )
450 1.1 oster **************************************************************************************/
451 1.14 perry int
452 1.2 oster rf_EO_DegradedWriteEFunc(RF_DagNode_t * node)
453 1.1 oster {
454 1.2 oster rf_DegrESubroutine(node, node->results[0]);
455 1.2 oster rf_GenericWakeupFunc(node, 0);
456 1.1 oster #if 1
457 1.2 oster return (0); /* XXX Yet another one!! GO */
458 1.1 oster #endif
459 1.1 oster }
460 1.1 oster
461 1.1 oster
462 1.2 oster
463 1.1 oster /**************************************************************************************
464 1.1 oster * THE FUNCTION IS FOR DOUBLE DEGRADED READ AND WRITE CASES
465 1.1 oster **************************************************************************************/
466 1.1 oster
467 1.14 perry void
468 1.2 oster rf_doubleEOdecode(
469 1.2 oster RF_Raid_t * raidPtr,
470 1.2 oster char **rrdbuf,
471 1.2 oster char **dest,
472 1.2 oster RF_RowCol_t * fcol,
473 1.2 oster char *pbuf,
474 1.2 oster char *ebuf)
475 1.2 oster {
476 1.2 oster RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
477 1.2 oster int i, j, k, f1, f2, row;
478 1.2 oster int rrdrow, erow, count = 0;
479 1.2 oster int bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
480 1.2 oster int numRowInEncMatix = (RF_EO_MATRIX_DIM) - 1;
481 1.1 oster #if 0
482 1.2 oster int pcol = (RF_EO_MATRIX_DIM) - 1;
483 1.1 oster #endif
484 1.2 oster int ecol = (RF_EO_MATRIX_DIM) - 2;
485 1.2 oster int bytesPerEU = bytesPerSector / numRowInEncMatix;
486 1.2 oster int numDataCol = layoutPtr->numDataCol;
487 1.2 oster #if RF_EO_MATRIX_DIM > 17
488 1.2 oster int shortsPerEU = bytesPerEU / sizeof(short);
489 1.2 oster short *rrdbuf_current, *pbuf_current, *ebuf_current;
490 1.2 oster short *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current;
491 1.6 augustss short *temp;
492 1.2 oster short *P;
493 1.2 oster
494 1.2 oster RF_ASSERT(bytesPerEU % sizeof(short) == 0);
495 1.2 oster RF_Malloc(P, bytesPerEU, (short *));
496 1.2 oster RF_Malloc(temp, bytesPerEU, (short *));
497 1.2 oster #elif RF_EO_MATRIX_DIM == 17
498 1.2 oster int longsPerEU = bytesPerEU / sizeof(long);
499 1.2 oster long *rrdbuf_current, *pbuf_current, *ebuf_current;
500 1.2 oster long *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current;
501 1.6 augustss long *temp;
502 1.2 oster long *P;
503 1.2 oster
504 1.2 oster RF_ASSERT(bytesPerEU % sizeof(long) == 0);
505 1.2 oster RF_Malloc(P, bytesPerEU, (long *));
506 1.2 oster RF_Malloc(temp, bytesPerEU, (long *));
507 1.2 oster #endif
508 1.2 oster RF_ASSERT(*((long *) dest[0]) == 0);
509 1.2 oster RF_ASSERT(*((long *) dest[1]) == 0);
510 1.8 thorpej memset((char *) P, 0, bytesPerEU);
511 1.8 thorpej memset((char *) temp, 0, bytesPerEU);
512 1.2 oster RF_ASSERT(*P == 0);
513 1.2 oster /* calculate the 'P' parameter, which, not parity, is the Xor of all
514 1.2 oster * elements in the last two column, ie. 'E' and 'parity' colume, see
515 1.2 oster * the Ref. paper by Blaum, et al 1993 */
516 1.2 oster for (i = 0; i < numRowInEncMatix; i++)
517 1.2 oster for (k = 0; k < longsPerEU; k++) {
518 1.2 oster #if RF_EO_MATRIX_DIM > 17
519 1.2 oster ebuf_current = ((short *) ebuf) + i * shortsPerEU + k;
520 1.2 oster pbuf_current = ((short *) pbuf) + i * shortsPerEU + k;
521 1.2 oster #elif RF_EO_MATRIX_DIM == 17
522 1.2 oster ebuf_current = ((long *) ebuf) + i * longsPerEU + k;
523 1.2 oster pbuf_current = ((long *) pbuf) + i * longsPerEU + k;
524 1.2 oster #endif
525 1.2 oster P[k] ^= *ebuf_current;
526 1.2 oster P[k] ^= *pbuf_current;
527 1.2 oster }
528 1.2 oster RF_ASSERT(fcol[0] != fcol[1]);
529 1.2 oster if (fcol[0] < fcol[1]) {
530 1.2 oster #if RF_EO_MATRIX_DIM > 17
531 1.2 oster dest_smaller = (short *) (dest[0]);
532 1.2 oster dest_larger = (short *) (dest[1]);
533 1.2 oster #elif RF_EO_MATRIX_DIM == 17
534 1.2 oster dest_smaller = (long *) (dest[0]);
535 1.2 oster dest_larger = (long *) (dest[1]);
536 1.2 oster #endif
537 1.2 oster f1 = fcol[0];
538 1.2 oster f2 = fcol[1];
539 1.2 oster } else {
540 1.2 oster #if RF_EO_MATRIX_DIM > 17
541 1.2 oster dest_smaller = (short *) (dest[1]);
542 1.2 oster dest_larger = (short *) (dest[0]);
543 1.2 oster #elif RF_EO_MATRIX_DIM == 17
544 1.2 oster dest_smaller = (long *) (dest[1]);
545 1.2 oster dest_larger = (long *) (dest[0]);
546 1.2 oster #endif
547 1.2 oster f1 = fcol[1];
548 1.2 oster f2 = fcol[0];
549 1.2 oster }
550 1.2 oster row = (RF_EO_MATRIX_DIM) - 1;
551 1.2 oster while ((row = rf_EO_Mod((row + f1 - f2), RF_EO_MATRIX_DIM)) != ((RF_EO_MATRIX_DIM) - 1)) {
552 1.2 oster #if RF_EO_MATRIX_DIM > 17
553 1.2 oster dest_larger_current = dest_larger + row * shortsPerEU;
554 1.2 oster dest_smaller_current = dest_smaller + row * shortsPerEU;
555 1.2 oster #elif RF_EO_MATRIX_DIM == 17
556 1.2 oster dest_larger_current = dest_larger + row * longsPerEU;
557 1.2 oster dest_smaller_current = dest_smaller + row * longsPerEU;
558 1.2 oster #endif
559 1.2 oster /** Do the diagonal recovery. Initially, temp[k] = (failed 1),
560 1.2 oster which is the failed data in the colume which has smaller col index. **/
561 1.2 oster /* step 1: ^(SUM of nonfailed in-diagonal A(rrdrow,0..m-3)) */
562 1.2 oster for (j = 0; j < numDataCol; j++) {
563 1.2 oster if (j == f1 || j == f2)
564 1.2 oster continue;
565 1.2 oster rrdrow = rf_EO_Mod((row + f2 - j), RF_EO_MATRIX_DIM);
566 1.2 oster if (rrdrow != (RF_EO_MATRIX_DIM) - 1) {
567 1.2 oster #if RF_EO_MATRIX_DIM > 17
568 1.2 oster rrdbuf_current = (short *) (rrdbuf[j]) + rrdrow * shortsPerEU;
569 1.2 oster for (k = 0; k < shortsPerEU; k++)
570 1.2 oster temp[k] ^= *(rrdbuf_current + k);
571 1.2 oster #elif RF_EO_MATRIX_DIM == 17
572 1.2 oster rrdbuf_current = (long *) (rrdbuf[j]) + rrdrow * longsPerEU;
573 1.2 oster for (k = 0; k < longsPerEU; k++)
574 1.2 oster temp[k] ^= *(rrdbuf_current + k);
575 1.2 oster #endif
576 1.2 oster }
577 1.2 oster }
578 1.2 oster /* step 2: ^E(erow,m-2), If erow is at the buttom row, don't
579 1.2 oster * Xor into it E(erow,m-2) = (principle diagonal) ^ (failed
580 1.2 oster * 1) ^ (failed 2) ^ ( SUM of nonfailed in-diagonal
581 1.2 oster * A(rrdrow,0..m-3) ) After this step, temp[k] = (principle
582 1.2 oster * diagonal) ^ (failed 2) */
583 1.2 oster
584 1.2 oster erow = rf_EO_Mod((row + f2 - ecol), (RF_EO_MATRIX_DIM));
585 1.2 oster if (erow != (RF_EO_MATRIX_DIM) - 1) {
586 1.2 oster #if RF_EO_MATRIX_DIM > 17
587 1.2 oster ebuf_current = (short *) ebuf + shortsPerEU * erow;
588 1.2 oster for (k = 0; k < shortsPerEU; k++)
589 1.2 oster temp[k] ^= *(ebuf_current + k);
590 1.2 oster #elif RF_EO_MATRIX_DIM == 17
591 1.2 oster ebuf_current = (long *) ebuf + longsPerEU * erow;
592 1.2 oster for (k = 0; k < longsPerEU; k++)
593 1.2 oster temp[k] ^= *(ebuf_current + k);
594 1.2 oster #endif
595 1.2 oster }
596 1.2 oster /* step 3: ^P to obtain the failed data (failed 2). P can be
597 1.2 oster * proved to be actually (principle diagonal) After this
598 1.2 oster * step, temp[k] = (failed 2), the failed data to be recovered */
599 1.2 oster #if RF_EO_MATRIX_DIM > 17
600 1.2 oster for (k = 0; k < shortsPerEU; k++)
601 1.2 oster temp[k] ^= P[k];
602 1.2 oster /* Put the data to the destination buffer */
603 1.2 oster for (k = 0; k < shortsPerEU; k++)
604 1.2 oster dest_larger_current[k] = temp[k];
605 1.2 oster #elif RF_EO_MATRIX_DIM == 17
606 1.2 oster for (k = 0; k < longsPerEU; k++)
607 1.2 oster temp[k] ^= P[k];
608 1.2 oster /* Put the data to the destination buffer */
609 1.2 oster for (k = 0; k < longsPerEU; k++)
610 1.2 oster dest_larger_current[k] = temp[k];
611 1.2 oster #endif
612 1.2 oster
613 1.2 oster /** THE FOLLOWING DO THE HORIZONTAL XOR **/
614 1.2 oster /* step 1: ^(SUM of A(row,0..m-3)), ie. all nonfailed data
615 1.2 oster * columes */
616 1.2 oster for (j = 0; j < numDataCol; j++) {
617 1.2 oster if (j == f1 || j == f2)
618 1.2 oster continue;
619 1.2 oster #if RF_EO_MATRIX_DIM > 17
620 1.2 oster rrdbuf_current = (short *) (rrdbuf[j]) + row * shortsPerEU;
621 1.2 oster for (k = 0; k < shortsPerEU; k++)
622 1.2 oster temp[k] ^= *(rrdbuf_current + k);
623 1.2 oster #elif RF_EO_MATRIX_DIM == 17
624 1.2 oster rrdbuf_current = (long *) (rrdbuf[j]) + row * longsPerEU;
625 1.2 oster for (k = 0; k < longsPerEU; k++)
626 1.2 oster temp[k] ^= *(rrdbuf_current + k);
627 1.2 oster #endif
628 1.2 oster }
629 1.2 oster /* step 2: ^A(row,m-1) */
630 1.2 oster /* step 3: Put the data to the destination buffer */
631 1.2 oster #if RF_EO_MATRIX_DIM > 17
632 1.2 oster pbuf_current = (short *) pbuf + shortsPerEU * row;
633 1.2 oster for (k = 0; k < shortsPerEU; k++)
634 1.2 oster temp[k] ^= *(pbuf_current + k);
635 1.2 oster for (k = 0; k < shortsPerEU; k++)
636 1.2 oster dest_smaller_current[k] = temp[k];
637 1.2 oster #elif RF_EO_MATRIX_DIM == 17
638 1.2 oster pbuf_current = (long *) pbuf + longsPerEU * row;
639 1.2 oster for (k = 0; k < longsPerEU; k++)
640 1.2 oster temp[k] ^= *(pbuf_current + k);
641 1.2 oster for (k = 0; k < longsPerEU; k++)
642 1.2 oster dest_smaller_current[k] = temp[k];
643 1.2 oster #endif
644 1.2 oster count++;
645 1.2 oster }
646 1.2 oster /* Check if all Encoding Unit in the data buffer have been decoded,
647 1.2 oster * according EvenOdd theory, if "RF_EO_MATRIX_DIM" is a prime number,
648 1.2 oster * this algorithm will covered all buffer */
649 1.2 oster RF_ASSERT(count == numRowInEncMatix);
650 1.2 oster RF_Free((char *) P, bytesPerEU);
651 1.2 oster RF_Free((char *) temp, bytesPerEU);
652 1.1 oster }
653 1.2 oster
654 1.1 oster
655 1.1 oster /***************************************************************************************
656 1.1 oster * This function is called by double degragded read
657 1.2 oster * EO_200_CreateReadDAG
658 1.1 oster *
659 1.1 oster ***************************************************************************************/
660 1.14 perry int
661 1.2 oster rf_EvenOddDoubleRecoveryFunc(node)
662 1.2 oster RF_DagNode_t *node;
663 1.2 oster {
664 1.2 oster int ndataParam = 0;
665 1.2 oster int np = node->numParams;
666 1.2 oster RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p;
667 1.2 oster RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
668 1.2 oster RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
669 1.2 oster int i, prm, sector, nresults = node->numResults;
670 1.2 oster RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
671 1.2 oster unsigned sosAddr;
672 1.2 oster int two = 0, mallc_one = 0, mallc_two = 0; /* flags to indicate if
673 1.2 oster * memory is allocated */
674 1.2 oster int bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
675 1.2 oster RF_PhysDiskAddr_t *ppda, *ppda2, *epda, *epda2, *pda, *pda0, *pda1,
676 1.2 oster npda;
677 1.2 oster RF_RowCol_t fcol[2], fsuoff[2], fsuend[2], numDataCol = layoutPtr->numDataCol;
678 1.2 oster char **buf, *ebuf, *pbuf, *dest[2];
679 1.17 christos long *suoff = NULL, *suend = NULL, *prmToCol = NULL,
680 1.17 christos psuoff = 0, esuoff = 0;
681 1.2 oster RF_SectorNum_t startSector, endSector;
682 1.2 oster RF_Etimer_t timer;
683 1.2 oster RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
684 1.2 oster
685 1.2 oster RF_ETIMER_START(timer);
686 1.2 oster
687 1.2 oster /* Find out the number of parameters which are pdas for data
688 1.2 oster * information */
689 1.2 oster for (i = 0; i <= np; i++)
690 1.2 oster if (((RF_PhysDiskAddr_t *) node->params[i].p)->type != RF_PDA_TYPE_DATA) {
691 1.2 oster ndataParam = i;
692 1.2 oster break;
693 1.2 oster }
694 1.2 oster RF_Malloc(buf, numDataCol * sizeof(char *), (char **));
695 1.2 oster if (ndataParam != 0) {
696 1.2 oster RF_Malloc(suoff, ndataParam * sizeof(long), (long *));
697 1.2 oster RF_Malloc(suend, ndataParam * sizeof(long), (long *));
698 1.2 oster RF_Malloc(prmToCol, ndataParam * sizeof(long), (long *));
699 1.2 oster }
700 1.2 oster if (asmap->failedPDAs[1] &&
701 1.2 oster (asmap->failedPDAs[1]->numSector + asmap->failedPDAs[0]->numSector < secPerSU)) {
702 1.2 oster RF_ASSERT(0); /* currently, no support for this situation */
703 1.2 oster ppda = node->params[np - 6].p;
704 1.2 oster ppda2 = node->params[np - 5].p;
705 1.2 oster RF_ASSERT(ppda2->type == RF_PDA_TYPE_PARITY);
706 1.2 oster epda = node->params[np - 4].p;
707 1.2 oster epda2 = node->params[np - 3].p;
708 1.2 oster RF_ASSERT(epda2->type == RF_PDA_TYPE_Q);
709 1.2 oster two = 1;
710 1.2 oster } else {
711 1.2 oster ppda = node->params[np - 4].p;
712 1.2 oster epda = node->params[np - 3].p;
713 1.2 oster psuoff = rf_StripeUnitOffset(layoutPtr, ppda->startSector);
714 1.2 oster esuoff = rf_StripeUnitOffset(layoutPtr, epda->startSector);
715 1.2 oster RF_ASSERT(psuoff == esuoff);
716 1.2 oster }
717 1.2 oster /*
718 1.2 oster the followings have three goals:
719 1.2 oster 1. determine the startSector to begin decoding and endSector to end decoding.
720 1.2 oster 2. determine the colume numbers of the two failed disks.
721 1.2 oster 3. determine the offset and end offset of the access within each failed stripe unit.
722 1.2 oster */
723 1.2 oster if (nresults == 1) {
724 1.2 oster /* find the startSector to begin decoding */
725 1.2 oster pda = node->results[0];
726 1.8 thorpej memset(pda->bufPtr, 0, bytesPerSector * pda->numSector);
727 1.2 oster fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda->startSector);
728 1.2 oster fsuend[0] = fsuoff[0] + pda->numSector;
729 1.17 christos fsuoff[1] = 0;
730 1.17 christos fsuend[1] = 0;
731 1.2 oster startSector = fsuoff[0];
732 1.2 oster endSector = fsuend[0];
733 1.2 oster
734 1.5 soren /* find out the column of failed disk being accessed */
735 1.2 oster fcol[0] = rf_EUCol(layoutPtr, pda->raidAddress);
736 1.2 oster
737 1.2 oster /* find out the other failed colume not accessed */
738 1.2 oster sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
739 1.2 oster for (i = 0; i < numDataCol; i++) {
740 1.2 oster npda.raidAddress = sosAddr + (i * secPerSU);
741 1.13 oster (raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.col), &(npda.startSector), 0);
742 1.2 oster /* skip over dead disks */
743 1.13 oster if (RF_DEAD_DISK(raidPtr->Disks[npda.col].status))
744 1.2 oster if (i != fcol[0])
745 1.2 oster break;
746 1.2 oster }
747 1.2 oster RF_ASSERT(i < numDataCol);
748 1.2 oster fcol[1] = i;
749 1.2 oster } else {
750 1.2 oster RF_ASSERT(nresults == 2);
751 1.2 oster pda0 = node->results[0];
752 1.8 thorpej memset(pda0->bufPtr, 0, bytesPerSector * pda0->numSector);
753 1.2 oster pda1 = node->results[1];
754 1.8 thorpej memset(pda1->bufPtr, 0, bytesPerSector * pda1->numSector);
755 1.2 oster /* determine the failed colume numbers of the two failed
756 1.2 oster * disks. */
757 1.2 oster fcol[0] = rf_EUCol(layoutPtr, pda0->raidAddress);
758 1.2 oster fcol[1] = rf_EUCol(layoutPtr, pda1->raidAddress);
759 1.2 oster /* determine the offset and end offset of the access within
760 1.2 oster * each failed stripe unit. */
761 1.2 oster fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda0->startSector);
762 1.2 oster fsuend[0] = fsuoff[0] + pda0->numSector;
763 1.2 oster fsuoff[1] = rf_StripeUnitOffset(layoutPtr, pda1->startSector);
764 1.2 oster fsuend[1] = fsuoff[1] + pda1->numSector;
765 1.2 oster /* determine the startSector to begin decoding */
766 1.2 oster startSector = RF_MIN(pda0->startSector, pda1->startSector);
767 1.2 oster /* determine the endSector to end decoding */
768 1.2 oster endSector = RF_MAX(fsuend[0], fsuend[1]);
769 1.2 oster }
770 1.2 oster /*
771 1.2 oster assign the beginning sector and the end sector for each parameter
772 1.2 oster find out the corresponding colume # for each parameter
773 1.2 oster */
774 1.2 oster for (prm = 0; prm < ndataParam; prm++) {
775 1.2 oster pda = node->params[prm].p;
776 1.2 oster suoff[prm] = rf_StripeUnitOffset(layoutPtr, pda->startSector);
777 1.2 oster suend[prm] = suoff[prm] + pda->numSector;
778 1.2 oster prmToCol[prm] = rf_EUCol(layoutPtr, pda->raidAddress);
779 1.2 oster }
780 1.2 oster /* 'sector' is the sector for the current decoding algorithm. For each
781 1.2 oster * sector in the failed SU, find out the corresponding parameters that
782 1.2 oster * cover the current sector and that are needed for decoding of this
783 1.2 oster * sector in failed SU. 2. Find out if sector is in the shadow of any
784 1.2 oster * accessed failed SU. If not, malloc a temporary space of a sector in
785 1.2 oster * size. */
786 1.2 oster for (sector = startSector; sector < endSector; sector++) {
787 1.2 oster if (nresults == 2)
788 1.2 oster if (!(fsuoff[0] <= sector && sector < fsuend[0]) && !(fsuoff[1] <= sector && sector < fsuend[1]))
789 1.2 oster continue;
790 1.2 oster for (prm = 0; prm < ndataParam; prm++)
791 1.2 oster if (suoff[prm] <= sector && sector < suend[prm])
792 1.18 christos buf[(prmToCol[prm])] = (char *)((RF_PhysDiskAddr_t *) node->params[prm].p)->bufPtr +
793 1.2 oster rf_RaidAddressToByte(raidPtr, sector - suoff[prm]);
794 1.2 oster /* find out if sector is in the shadow of any accessed failed
795 1.2 oster * SU. If yes, assign dest[0], dest[1] to point at suitable
796 1.2 oster * position of the buffer corresponding to failed SUs. if no,
797 1.2 oster * malloc a temporary space of a sector in size for
798 1.2 oster * destination of decoding. */
799 1.2 oster RF_ASSERT(nresults == 1 || nresults == 2);
800 1.2 oster if (nresults == 1) {
801 1.18 christos dest[0] = (char *)((RF_PhysDiskAddr_t *) node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]);
802 1.2 oster /* Always malloc temp buffer to dest[1] */
803 1.2 oster RF_Malloc(dest[1], bytesPerSector, (char *));
804 1.8 thorpej memset(dest[1], 0, bytesPerSector);
805 1.2 oster mallc_two = 1;
806 1.2 oster } else {
807 1.2 oster if (fsuoff[0] <= sector && sector < fsuend[0])
808 1.18 christos dest[0] = (char *)((RF_PhysDiskAddr_t *) node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]);
809 1.2 oster else {
810 1.2 oster RF_Malloc(dest[0], bytesPerSector, (char *));
811 1.8 thorpej memset(dest[0], 0, bytesPerSector);
812 1.2 oster mallc_one = 1;
813 1.2 oster }
814 1.2 oster if (fsuoff[1] <= sector && sector < fsuend[1])
815 1.18 christos dest[1] = (char *)((RF_PhysDiskAddr_t *) node->results[1])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[1]);
816 1.2 oster else {
817 1.2 oster RF_Malloc(dest[1], bytesPerSector, (char *));
818 1.8 thorpej memset(dest[1], 0, bytesPerSector);
819 1.2 oster mallc_two = 1;
820 1.2 oster }
821 1.2 oster RF_ASSERT(mallc_one == 0 || mallc_two == 0);
822 1.2 oster }
823 1.18 christos pbuf = (char *)ppda->bufPtr + rf_RaidAddressToByte(raidPtr, sector - psuoff);
824 1.18 christos ebuf = (char *)epda->bufPtr + rf_RaidAddressToByte(raidPtr, sector - esuoff);
825 1.2 oster /*
826 1.2 oster * After finish finding all needed sectors, call doubleEOdecode function for decoding
827 1.2 oster * one sector to destination.
828 1.2 oster */
829 1.2 oster rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf);
830 1.2 oster /* free all allocated memory, and mark flag to indicate no
831 1.2 oster * memory is being allocated */
832 1.2 oster if (mallc_one == 1)
833 1.2 oster RF_Free(dest[0], bytesPerSector);
834 1.2 oster if (mallc_two == 1)
835 1.2 oster RF_Free(dest[1], bytesPerSector);
836 1.2 oster mallc_one = mallc_two = 0;
837 1.2 oster }
838 1.2 oster RF_Free(buf, numDataCol * sizeof(char *));
839 1.2 oster if (ndataParam != 0) {
840 1.2 oster RF_Free(suoff, ndataParam * sizeof(long));
841 1.2 oster RF_Free(suend, ndataParam * sizeof(long));
842 1.2 oster RF_Free(prmToCol, ndataParam * sizeof(long));
843 1.2 oster }
844 1.2 oster RF_ETIMER_STOP(timer);
845 1.2 oster RF_ETIMER_EVAL(timer);
846 1.2 oster if (tracerec) {
847 1.2 oster tracerec->q_us += RF_ETIMER_VAL_US(timer);
848 1.2 oster }
849 1.2 oster rf_GenericWakeupFunc(node, 0);
850 1.1 oster #if 1
851 1.2 oster return (0); /* XXX is this even close!!?!?!!? GO */
852 1.1 oster #endif
853 1.1 oster }
854 1.1 oster
855 1.1 oster
856 1.2 oster /* currently, only access of one of the two failed SU is allowed in this function.
857 1.2 oster * also, asmap->numStripeUnitsAccessed is limited to be one, the RaidFrame will break large access into
858 1.1 oster * many accesses of single stripe unit.
859 1.1 oster */
860 1.1 oster
861 1.14 perry int
862 1.2 oster rf_EOWriteDoubleRecoveryFunc(node)
863 1.2 oster RF_DagNode_t *node;
864 1.2 oster {
865 1.2 oster int np = node->numParams;
866 1.2 oster RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p;
867 1.2 oster RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
868 1.2 oster RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
869 1.2 oster RF_SectorNum_t sector;
870 1.2 oster RF_RowCol_t col, scol;
871 1.2 oster int prm, i, j;
872 1.2 oster RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
873 1.2 oster unsigned sosAddr;
874 1.2 oster unsigned bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
875 1.2 oster RF_int64 numbytes;
876 1.2 oster RF_SectorNum_t startSector, endSector;
877 1.2 oster RF_PhysDiskAddr_t *ppda, *epda, *pda, *fpda, npda;
878 1.2 oster RF_RowCol_t fcol[2], numDataCol = layoutPtr->numDataCol;
879 1.2 oster char **buf; /* buf[0], buf[1], buf[2], ...etc. point to
880 1.2 oster * buffer storing data read from col0, col1,
881 1.2 oster * col2 */
882 1.2 oster char *ebuf, *pbuf, *dest[2], *olddata[2];
883 1.2 oster RF_Etimer_t timer;
884 1.2 oster RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
885 1.2 oster
886 1.2 oster RF_ASSERT(asmap->numDataFailed == 1); /* currently only support this
887 1.2 oster * case, the other failed SU
888 1.2 oster * is not being accessed */
889 1.2 oster RF_ETIMER_START(timer);
890 1.2 oster RF_Malloc(buf, numDataCol * sizeof(char *), (char **));
891 1.2 oster
892 1.2 oster ppda = node->results[0];/* Instead of being buffers, node->results[0]
893 1.2 oster * and [1] are Ppda and Epda */
894 1.2 oster epda = node->results[1];
895 1.2 oster fpda = asmap->failedPDAs[0];
896 1.2 oster
897 1.2 oster /* First, recovery the failed old SU using EvenOdd double decoding */
898 1.2 oster /* determine the startSector and endSector for decoding */
899 1.2 oster startSector = rf_StripeUnitOffset(layoutPtr, fpda->startSector);
900 1.2 oster endSector = startSector + fpda->numSector;
901 1.2 oster /* Assign buf[col] pointers to point to each non-failed colume and
902 1.2 oster * initialize the pbuf and ebuf to point at the beginning of each
903 1.2 oster * source buffers and destination buffers */
904 1.2 oster for (prm = 0; prm < numDataCol - 2; prm++) {
905 1.2 oster pda = (RF_PhysDiskAddr_t *) node->params[prm].p;
906 1.2 oster col = rf_EUCol(layoutPtr, pda->raidAddress);
907 1.2 oster buf[col] = pda->bufPtr;
908 1.2 oster }
909 1.2 oster /* pbuf and ebuf: they will change values as double recovery decoding
910 1.2 oster * goes on */
911 1.2 oster pbuf = ppda->bufPtr;
912 1.2 oster ebuf = epda->bufPtr;
913 1.2 oster /* find out the logical colume numbers in the encoding matrix of the
914 1.2 oster * two failed columes */
915 1.2 oster fcol[0] = rf_EUCol(layoutPtr, fpda->raidAddress);
916 1.2 oster
917 1.2 oster /* find out the other failed colume not accessed this time */
918 1.2 oster sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
919 1.2 oster for (i = 0; i < numDataCol; i++) {
920 1.2 oster npda.raidAddress = sosAddr + (i * secPerSU);
921 1.13 oster (raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.col), &(npda.startSector), 0);
922 1.2 oster /* skip over dead disks */
923 1.13 oster if (RF_DEAD_DISK(raidPtr->Disks[npda.col].status))
924 1.2 oster if (i != fcol[0])
925 1.2 oster break;
926 1.2 oster }
927 1.2 oster RF_ASSERT(i < numDataCol);
928 1.2 oster fcol[1] = i;
929 1.2 oster /* assign temporary space to put recovered failed SU */
930 1.2 oster numbytes = fpda->numSector * bytesPerSector;
931 1.2 oster RF_Malloc(olddata[0], numbytes, (char *));
932 1.2 oster RF_Malloc(olddata[1], numbytes, (char *));
933 1.2 oster dest[0] = olddata[0];
934 1.2 oster dest[1] = olddata[1];
935 1.8 thorpej memset(olddata[0], 0, numbytes);
936 1.8 thorpej memset(olddata[1], 0, numbytes);
937 1.2 oster /* Begin the recovery decoding, initially buf[j], ebuf, pbuf, dest[j]
938 1.2 oster * have already pointed at the beginning of each source buffers and
939 1.2 oster * destination buffers */
940 1.2 oster for (sector = startSector, i = 0; sector < endSector; sector++, i++) {
941 1.2 oster rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf);
942 1.2 oster for (j = 0; j < numDataCol; j++)
943 1.2 oster if ((j != fcol[0]) && (j != fcol[1]))
944 1.2 oster buf[j] += bytesPerSector;
945 1.2 oster dest[0] += bytesPerSector;
946 1.2 oster dest[1] += bytesPerSector;
947 1.2 oster ebuf += bytesPerSector;
948 1.2 oster pbuf += bytesPerSector;
949 1.2 oster }
950 1.2 oster /* after recovery, the buffer pointed by olddata[0] is the old failed
951 1.2 oster * data. With new writing data and this old data, use small write to
952 1.2 oster * calculate the new redundant informations */
953 1.2 oster /* node->params[ 0, ... PDAPerDisk * (numDataCol - 2)-1 ] are Pdas of
954 1.2 oster * Rrd; params[ PDAPerDisk*(numDataCol - 2), ... PDAPerDisk*numDataCol
955 1.2 oster * -1 ] are Pdas of Rp, ( Rp2 ), Re, ( Re2 ) ; params[
956 1.2 oster * PDAPerDisk*numDataCol, ... PDAPerDisk*numDataCol
957 1.2 oster * +asmap->numStripeUnitsAccessed -asmap->numDataFailed-1] are Pdas of
958 1.2 oster * wudNodes; For current implementation, we assume the simplest case:
959 1.2 oster * asmap->numStripeUnitsAccessed == 1 and asmap->numDataFailed == 1
960 1.2 oster * ie. PDAPerDisk = 1 then node->params[numDataCol] must be the new
961 1.2 oster * data to be writen to the failed disk. We first bxor the new data
962 1.2 oster * into the old recovered data, then do the same things as small
963 1.2 oster * write. */
964 1.2 oster
965 1.16 oster rf_bxor(((RF_PhysDiskAddr_t *) node->params[numDataCol].p)->bufPtr, olddata[0], numbytes);
966 1.2 oster /* do new 'E' calculation */
967 1.2 oster /* find out the corresponding colume in encoding matrix for write
968 1.2 oster * colume to be encoded into redundant disk 'E' */
969 1.2 oster scol = rf_EUCol(layoutPtr, fpda->raidAddress);
970 1.2 oster /* olddata[0] now is source buffer pointer; epda->bufPtr is the dest
971 1.2 oster * buffer pointer */
972 1.2 oster rf_e_encToBuf(raidPtr, scol, olddata[0], RF_EO_MATRIX_DIM - 2, epda->bufPtr, fpda->numSector);
973 1.2 oster
974 1.2 oster /* do new 'P' calculation */
975 1.16 oster rf_bxor(olddata[0], ppda->bufPtr, numbytes);
976 1.2 oster /* Free the allocated buffer */
977 1.2 oster RF_Free(olddata[0], numbytes);
978 1.2 oster RF_Free(olddata[1], numbytes);
979 1.2 oster RF_Free(buf, numDataCol * sizeof(char *));
980 1.2 oster
981 1.2 oster RF_ETIMER_STOP(timer);
982 1.2 oster RF_ETIMER_EVAL(timer);
983 1.2 oster if (tracerec) {
984 1.2 oster tracerec->q_us += RF_ETIMER_VAL_US(timer);
985 1.2 oster }
986 1.2 oster rf_GenericWakeupFunc(node, 0);
987 1.2 oster return (0);
988 1.1 oster }
989 1.7 oster #endif /* RF_INCLUDE_EVENODD > 0 */
990