rf_evenodd_dagfuncs.c revision 1.1 1 1.1 oster /* $NetBSD: rf_evenodd_dagfuncs.c,v 1.1 1998/11/13 04:20:29 oster Exp $ */
2 1.1 oster /*
3 1.1 oster * Copyright (c) 1995 Carnegie-Mellon University.
4 1.1 oster * All rights reserved.
5 1.1 oster *
6 1.1 oster * Author: ChangMing Wu
7 1.1 oster *
8 1.1 oster * Permission to use, copy, modify and distribute this software and
9 1.1 oster * its documentation is hereby granted, provided that both the copyright
10 1.1 oster * notice and this permission notice appear in all copies of the
11 1.1 oster * software, derivative works or modified versions, and any portions
12 1.1 oster * thereof, and that both notices appear in supporting documentation.
13 1.1 oster *
14 1.1 oster * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 1.1 oster * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 1.1 oster * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 1.1 oster *
18 1.1 oster * Carnegie Mellon requests users of this software to return to
19 1.1 oster *
20 1.1 oster * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 1.1 oster * School of Computer Science
22 1.1 oster * Carnegie Mellon University
23 1.1 oster * Pittsburgh PA 15213-3890
24 1.1 oster *
25 1.1 oster * any improvements or extensions that they make and grant Carnegie the
26 1.1 oster * rights to redistribute these changes.
27 1.1 oster */
28 1.1 oster
29 1.1 oster /*
30 1.1 oster * Code for RAID-EVENODD architecture.
31 1.1 oster */
32 1.1 oster
33 1.1 oster #include "rf_types.h"
34 1.1 oster #include "rf_raid.h"
35 1.1 oster #include "rf_dag.h"
36 1.1 oster #include "rf_dagffrd.h"
37 1.1 oster #include "rf_dagffwr.h"
38 1.1 oster #include "rf_dagdegrd.h"
39 1.1 oster #include "rf_dagdegwr.h"
40 1.1 oster #include "rf_dagutils.h"
41 1.1 oster #include "rf_dagfuncs.h"
42 1.1 oster #include "rf_threadid.h"
43 1.1 oster #include "rf_etimer.h"
44 1.1 oster #include "rf_general.h"
45 1.1 oster #include "rf_configure.h"
46 1.1 oster #include "rf_parityscan.h"
47 1.1 oster #include "rf_sys.h"
48 1.1 oster #include "rf_evenodd.h"
49 1.1 oster #include "rf_evenodd_dagfuncs.h"
50 1.1 oster
51 1.1 oster /* These redundant functions are for small write */
52 1.1 oster RF_RedFuncs_t rf_EOSmallWritePFuncs = { rf_RegularXorFunc, "Regular Old-New P", rf_SimpleXorFunc, "Simple Old-New P" };
53 1.1 oster RF_RedFuncs_t rf_EOSmallWriteEFuncs = { rf_RegularONEFunc, "Regular Old-New E", rf_SimpleONEFunc, "Regular Old-New E" };
54 1.1 oster
55 1.1 oster /* These redundant functions are for degraded read */
56 1.1 oster RF_RedFuncs_t rf_eoPRecoveryFuncs = { rf_RecoveryXorFunc, "Recovery Xr", rf_RecoveryXorFunc, "Recovery Xr"};
57 1.1 oster RF_RedFuncs_t rf_eoERecoveryFuncs = { rf_RecoveryEFunc, "Recovery E Func", rf_RecoveryEFunc, "Recovery E Func" };
58 1.1 oster
59 1.1 oster /**********************************************************************************************
60 1.1 oster * the following encoding node functions is used in EO_000_CreateLargeWriteDAG
61 1.1 oster **********************************************************************************************/
62 1.1 oster int rf_RegularPEFunc(node)
63 1.1 oster RF_DagNode_t *node;
64 1.1 oster {
65 1.1 oster rf_RegularESubroutine(node,node->results[1]);
66 1.1 oster rf_RegularXorFunc(node); /* does the wakeup here! */
67 1.1 oster #if 1
68 1.1 oster return(0); /* XXX This was missing... GO */
69 1.1 oster #endif
70 1.1 oster }
71 1.1 oster
72 1.1 oster
73 1.1 oster /************************************************************************************************
74 1.1 oster * For EO_001_CreateSmallWriteDAG, there are (i)RegularONEFunc() and (ii)SimpleONEFunc() to
75 1.1 oster * be used. The previous case is when write access at least sectors of full stripe unit.
76 1.1 oster * The later function is used when the write access two stripe units but with total sectors
77 1.1 oster * less than sectors per SU. In this case, the access of parity and 'E' are shown as disconnected
78 1.1 oster * areas in their stripe unit and parity write and 'E' write are both devided into two distinct
79 1.1 oster * writes( totally four). This simple old-new write and regular old-new write happen as in RAID-5
80 1.1 oster ************************************************************************************************/
81 1.1 oster
82 1.1 oster /* Algorithm:
83 1.1 oster 1. Store the difference of old data and new data in the Rod buffer.
84 1.1 oster 2. then encode this buffer into the buffer which already have old 'E' information inside it,
85 1.1 oster the result can be shown to be the new 'E' information.
86 1.1 oster 3. xor the Wnd buffer into the difference buffer to recover the original old data.
87 1.1 oster Here we have another alternative: to allocate a temporary buffer for storing the difference of
88 1.1 oster old data and new data, then encode temp buf into old 'E' buf to form new 'E', but this approach
89 1.1 oster take the same speed as the previous, and need more memory.
90 1.1 oster */
91 1.1 oster int rf_RegularONEFunc(node)
92 1.1 oster RF_DagNode_t *node;
93 1.1 oster {
94 1.1 oster RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams-1].p;
95 1.1 oster RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &raidPtr->Layout;
96 1.1 oster int EpdaIndex = (node->numParams-1)/2 - 1; /* the parameter of node where you can find e-pda */
97 1.1 oster int i, k, retcode = 0;
98 1.1 oster int suoffset, length;
99 1.1 oster RF_RowCol_t scol;
100 1.1 oster char *srcbuf, *destbuf;
101 1.1 oster RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
102 1.1 oster RF_Etimer_t timer;
103 1.1 oster RF_PhysDiskAddr_t *pda, *EPDA = (RF_PhysDiskAddr_t *) node->params[EpdaIndex].p;
104 1.1 oster int ESUOffset = rf_StripeUnitOffset(layoutPtr, EPDA->startSector); /* generally zero */
105 1.1 oster
106 1.1 oster RF_ASSERT( EPDA->type == RF_PDA_TYPE_Q );
107 1.1 oster RF_ASSERT(ESUOffset == 0);
108 1.1 oster
109 1.1 oster RF_ETIMER_START(timer);
110 1.1 oster
111 1.1 oster /* Xor the Wnd buffer into Rod buffer, the difference of old data and new data is stored in Rod buffer */
112 1.1 oster for( k=0; k< EpdaIndex; k += 2) {
113 1.1 oster length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *)node->params[k].p)->numSector );
114 1.1 oster retcode = rf_bxor( node->params[k+EpdaIndex+3].p, node->params[k+1].p, length, node->dagHdr->bp);
115 1.1 oster }
116 1.1 oster /* Start to encoding the buffer storing the difference of old data and new data into 'E' buffer */
117 1.1 oster for (i=0; i<EpdaIndex; i+=2) if (node->params[i+1].p != node->results[0]) { /* results[0] is buf ptr of E */
118 1.1 oster pda = (RF_PhysDiskAddr_t *) node->params[i].p;
119 1.1 oster srcbuf = (char *) node->params[i+1].p;
120 1.1 oster scol = rf_EUCol(layoutPtr, pda->raidAddress );
121 1.1 oster suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
122 1.1 oster destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr,suoffset);
123 1.1 oster rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
124 1.1 oster }
125 1.1 oster /* Recover the original old data to be used by parity encoding function in XorNode */
126 1.1 oster for( k=0; k< EpdaIndex; k += 2) {
127 1.1 oster length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *)node->params[k].p)->numSector );
128 1.1 oster retcode = rf_bxor( node->params[k+EpdaIndex+3].p, node->params[k+1].p, length, node->dagHdr->bp);
129 1.1 oster }
130 1.1 oster RF_ETIMER_STOP(timer);
131 1.1 oster RF_ETIMER_EVAL(timer);
132 1.1 oster tracerec->q_us += RF_ETIMER_VAL_US(timer);
133 1.1 oster rf_GenericWakeupFunc(node, 0);
134 1.1 oster #if 1
135 1.1 oster return(0); /* XXX this was missing.. GO */
136 1.1 oster #endif
137 1.1 oster }
138 1.1 oster
139 1.1 oster int rf_SimpleONEFunc(node)
140 1.1 oster RF_DagNode_t *node;
141 1.1 oster {
142 1.1 oster RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams-1].p;
143 1.1 oster RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &raidPtr->Layout;
144 1.1 oster RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
145 1.1 oster int retcode = 0;
146 1.1 oster char *srcbuf, *destbuf;
147 1.1 oster RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
148 1.1 oster int length;
149 1.1 oster RF_RowCol_t scol;
150 1.1 oster RF_Etimer_t timer;
151 1.1 oster
152 1.1 oster RF_ASSERT( ((RF_PhysDiskAddr_t *)node->params[2].p)->type == RF_PDA_TYPE_Q );
153 1.1 oster if (node->dagHdr->status == rf_enable) {
154 1.1 oster RF_ETIMER_START(timer);
155 1.1 oster length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *)node->params[4].p)->numSector );/* this is a pda of writeDataNodes */
156 1.1 oster /* bxor to buffer of readDataNodes */
157 1.1 oster retcode = rf_bxor( node->params[5].p, node->params[1].p, length, node->dagHdr->bp);
158 1.1 oster /* find out the corresponding colume in encoding matrix for write colume to be encoded into redundant disk 'E' */
159 1.1 oster scol = rf_EUCol(layoutPtr, pda->raidAddress );
160 1.1 oster srcbuf = node->params[1].p;
161 1.1 oster destbuf = node->params[3].p;
162 1.1 oster /* Start encoding process */
163 1.1 oster rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
164 1.1 oster rf_bxor( node->params[5].p, node->params[1].p, length, node->dagHdr->bp);
165 1.1 oster RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); tracerec->q_us += RF_ETIMER_VAL_US(timer);
166 1.1 oster
167 1.1 oster }
168 1.1 oster return(rf_GenericWakeupFunc(node, retcode)); /* call wake func explicitly since no I/O in this node */
169 1.1 oster }
170 1.1 oster
171 1.1 oster
172 1.1 oster /****** called by rf_RegularPEFunc(node) and rf_RegularEFunc(node) in f.f. large write ********/
173 1.1 oster void rf_RegularESubroutine(node, ebuf)
174 1.1 oster RF_DagNode_t *node;
175 1.1 oster char *ebuf;
176 1.1 oster {
177 1.1 oster RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams-1].p;
178 1.1 oster RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &raidPtr->Layout;
179 1.1 oster RF_PhysDiskAddr_t *pda;
180 1.1 oster int i, suoffset;
181 1.1 oster RF_RowCol_t scol;
182 1.1 oster char *srcbuf, *destbuf;
183 1.1 oster RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
184 1.1 oster RF_Etimer_t timer;
185 1.1 oster
186 1.1 oster RF_ETIMER_START(timer);
187 1.1 oster for (i=0; i<node->numParams-2; i+=2) {
188 1.1 oster RF_ASSERT( node->params[i+1].p != ebuf );
189 1.1 oster pda = (RF_PhysDiskAddr_t *) node->params[i].p;
190 1.1 oster suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
191 1.1 oster scol = rf_EUCol(layoutPtr, pda->raidAddress );
192 1.1 oster srcbuf = (char *) node->params[i+1].p;
193 1.1 oster destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset );
194 1.1 oster rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
195 1.1 oster }
196 1.1 oster RF_ETIMER_STOP(timer);
197 1.1 oster RF_ETIMER_EVAL(timer);
198 1.1 oster tracerec->xor_us += RF_ETIMER_VAL_US(timer);
199 1.1 oster }
200 1.1 oster
201 1.1 oster
202 1.1 oster /*******************************************************************************************
203 1.1 oster * Used in EO_001_CreateLargeWriteDAG
204 1.1 oster ******************************************************************************************/
205 1.1 oster int rf_RegularEFunc(node)
206 1.1 oster RF_DagNode_t *node;
207 1.1 oster {
208 1.1 oster rf_RegularESubroutine(node, node->results[0]);
209 1.1 oster rf_GenericWakeupFunc(node, 0);
210 1.1 oster #if 1
211 1.1 oster return(0); /* XXX this was missing?.. GO */
212 1.1 oster #endif
213 1.1 oster }
214 1.1 oster
215 1.1 oster /*******************************************************************************************
216 1.1 oster * This degraded function allow only two case:
217 1.1 oster * 1. when write access the full failed stripe unit, then the access can be more than
218 1.1 oster * one tripe units.
219 1.1 oster * 2. when write access only part of the failed SU, we assume accesses of more than
220 1.1 oster * one stripe unit is not allowed so that the write can be dealt with like a
221 1.1 oster * large write.
222 1.1 oster * The following function is based on these assumptions. So except in the second case,
223 1.1 oster * it looks the same as a large write encodeing function. But this is not exactly the
224 1.1 oster * normal way for doing a degraded write, since raidframe have to break cases of access
225 1.1 oster * other than the above two into smaller accesses. We may have to change
226 1.1 oster * DegrESubroutin in the future.
227 1.1 oster *******************************************************************************************/
228 1.1 oster void rf_DegrESubroutine(node, ebuf)
229 1.1 oster RF_DagNode_t *node;
230 1.1 oster char *ebuf;
231 1.1 oster {
232 1.1 oster RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams-1].p;
233 1.1 oster RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &raidPtr->Layout;
234 1.1 oster RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams-2].p;
235 1.1 oster RF_PhysDiskAddr_t *pda;
236 1.1 oster int i, suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
237 1.1 oster RF_RowCol_t scol;
238 1.1 oster char *srcbuf, *destbuf;
239 1.1 oster RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
240 1.1 oster RF_Etimer_t timer;
241 1.1 oster
242 1.1 oster RF_ETIMER_START(timer);
243 1.1 oster for (i=0; i<node->numParams-2; i+=2) {
244 1.1 oster RF_ASSERT( node->params[i+1].p != ebuf );
245 1.1 oster pda = (RF_PhysDiskAddr_t *) node->params[i].p;
246 1.1 oster suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
247 1.1 oster scol = rf_EUCol(layoutPtr, pda->raidAddress );
248 1.1 oster srcbuf = (char *) node->params[i+1].p;
249 1.1 oster destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset-failedSUOffset);
250 1.1 oster rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
251 1.1 oster }
252 1.1 oster
253 1.1 oster RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); tracerec->q_us += RF_ETIMER_VAL_US(timer);
254 1.1 oster }
255 1.1 oster
256 1.1 oster
257 1.1 oster /**************************************************************************************
258 1.1 oster * This function is used in case where one data disk failed and both redundant disks
259 1.1 oster * alive. It is used in the EO_100_CreateWriteDAG. Note: if there is another disk
260 1.1 oster * failed in the stripe but not accessed at this time, then we should, instead, use
261 1.1 oster * the rf_EOWriteDoubleRecoveryFunc().
262 1.1 oster **************************************************************************************/
263 1.1 oster int rf_Degraded_100_EOFunc(node)
264 1.1 oster RF_DagNode_t *node;
265 1.1 oster {
266 1.1 oster rf_DegrESubroutine(node, node->results[1]);
267 1.1 oster rf_RecoveryXorFunc(node); /* does the wakeup here! */
268 1.1 oster #if 1
269 1.1 oster return(0); /* XXX this was missing... SHould these be void functions??? GO */
270 1.1 oster #endif
271 1.1 oster }
272 1.1 oster
273 1.1 oster /**************************************************************************************
274 1.1 oster * This function is to encode one sector in one of the data disks to the E disk.
275 1.1 oster * However, in evenodd this function can also be used as decoding function to recover
276 1.1 oster * data from dead disk in the case of parity failure and a single data failure.
277 1.1 oster **************************************************************************************/
278 1.1 oster void rf_e_EncOneSect(
279 1.1 oster RF_RowCol_t srcLogicCol,
280 1.1 oster char *srcSecbuf,
281 1.1 oster RF_RowCol_t destLogicCol,
282 1.1 oster char *destSecbuf,
283 1.1 oster int bytesPerSector)
284 1.1 oster {
285 1.1 oster int S_index; /* index of the EU in the src col which need be Xored into all EUs in a dest sector */
286 1.1 oster int numRowInEncMatix = (RF_EO_MATRIX_DIM) -1;
287 1.1 oster RF_RowCol_t j, indexInDest, /* row index of an encoding unit in the destination colume of encoding matrix */
288 1.1 oster indexInSrc; /* row index of an encoding unit in the source colume used for recovery */
289 1.1 oster int bytesPerEU = bytesPerSector/numRowInEncMatix;
290 1.1 oster
291 1.1 oster #if RF_EO_MATRIX_DIM > 17
292 1.1 oster int shortsPerEU = bytesPerEU/sizeof(short);
293 1.1 oster short *destShortBuf, *srcShortBuf1, *srcShortBuf2;
294 1.1 oster register short temp1;
295 1.1 oster #elif RF_EO_MATRIX_DIM == 17
296 1.1 oster int longsPerEU = bytesPerEU/sizeof(long);
297 1.1 oster long *destLongBuf, *srcLongBuf1, *srcLongBuf2;
298 1.1 oster register long temp1;
299 1.1 oster #endif
300 1.1 oster
301 1.1 oster #if RF_EO_MATRIX_DIM > 17
302 1.1 oster RF_ASSERT( sizeof(short) == 2 || sizeof(short) == 1 );
303 1.1 oster RF_ASSERT( bytesPerEU % sizeof(short) == 0 );
304 1.1 oster #elif RF_EO_MATRIX_DIM == 17
305 1.1 oster RF_ASSERT( sizeof(long) == 8 || sizeof(long) == 4 );
306 1.1 oster RF_ASSERT( bytesPerEU % sizeof(long) == 0);
307 1.1 oster #endif
308 1.1 oster
309 1.1 oster S_index = rf_EO_Mod( ( RF_EO_MATRIX_DIM -1 + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM);
310 1.1 oster #if RF_EO_MATRIX_DIM > 17
311 1.1 oster srcShortBuf1 = (short *)(srcSecbuf + S_index * bytesPerEU);
312 1.1 oster #elif RF_EO_MATRIX_DIM == 17
313 1.1 oster srcLongBuf1 = (long *)(srcSecbuf + S_index * bytesPerEU);
314 1.1 oster #endif
315 1.1 oster
316 1.1 oster for( indexInDest = 0; indexInDest < numRowInEncMatix ; indexInDest++){
317 1.1 oster indexInSrc = rf_EO_Mod( (indexInDest + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM );
318 1.1 oster
319 1.1 oster #if RF_EO_MATRIX_DIM > 17
320 1.1 oster destShortBuf = (short *)(destSecbuf + indexInDest * bytesPerEU);
321 1.1 oster srcShortBuf2 = (short *)(srcSecbuf + indexInSrc * bytesPerEU);
322 1.1 oster for(j=0; j < shortsPerEU; j++) {
323 1.1 oster temp1 = destShortBuf[j]^srcShortBuf1[j];
324 1.1 oster /* note: S_index won't be at the end row for any src col! */
325 1.1 oster if(indexInSrc != RF_EO_MATRIX_DIM -1) destShortBuf[j] = (srcShortBuf2[j])^temp1;
326 1.1 oster /* if indexInSrc is at the end row, ie. RF_EO_MATRIX_DIM -1, then all elements are zero! */
327 1.1 oster else destShortBuf[j] = temp1;
328 1.1 oster }
329 1.1 oster
330 1.1 oster #elif RF_EO_MATRIX_DIM == 17
331 1.1 oster destLongBuf = (long *)(destSecbuf + indexInDest * bytesPerEU);
332 1.1 oster srcLongBuf2 = (long *)(srcSecbuf + indexInSrc * bytesPerEU);
333 1.1 oster for(j=0; j < longsPerEU; j++) {
334 1.1 oster temp1 = destLongBuf[j]^srcLongBuf1[j];
335 1.1 oster if(indexInSrc != RF_EO_MATRIX_DIM -1) destLongBuf[j] = (srcLongBuf2[j])^temp1;
336 1.1 oster else destLongBuf[j] = temp1;
337 1.1 oster }
338 1.1 oster #endif
339 1.1 oster }
340 1.1 oster }
341 1.1 oster
342 1.1 oster void rf_e_encToBuf(
343 1.1 oster RF_Raid_t *raidPtr,
344 1.1 oster RF_RowCol_t srcLogicCol,
345 1.1 oster char *srcbuf,
346 1.1 oster RF_RowCol_t destLogicCol,
347 1.1 oster char *destbuf,
348 1.1 oster int numSector)
349 1.1 oster {
350 1.1 oster int i, bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
351 1.1 oster
352 1.1 oster for (i=0; i < numSector; i++)
353 1.1 oster {
354 1.1 oster rf_e_EncOneSect( srcLogicCol, srcbuf, destLogicCol, destbuf, bytesPerSector);
355 1.1 oster srcbuf += bytesPerSector;
356 1.1 oster destbuf += bytesPerSector;
357 1.1 oster }
358 1.1 oster }
359 1.1 oster
360 1.1 oster /**************************************************************************************
361 1.1 oster * when parity die and one data die, We use second redundant information, 'E',
362 1.1 oster * to recover the data in dead disk. This function is used in the recovery node of
363 1.1 oster * for EO_110_CreateReadDAG
364 1.1 oster **************************************************************************************/
365 1.1 oster int rf_RecoveryEFunc(node)
366 1.1 oster RF_DagNode_t *node;
367 1.1 oster {
368 1.1 oster RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams-1].p;
369 1.1 oster RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &raidPtr->Layout;
370 1.1 oster RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams-2].p;
371 1.1 oster RF_RowCol_t scol, /*source logical column*/
372 1.1 oster fcol = rf_EUCol(layoutPtr, failedPDA->raidAddress ); /* logical column of failed SU */
373 1.1 oster int i;
374 1.1 oster RF_PhysDiskAddr_t *pda;
375 1.1 oster int suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr,failedPDA->startSector);
376 1.1 oster char *srcbuf, *destbuf;
377 1.1 oster RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
378 1.1 oster RF_Etimer_t timer;
379 1.1 oster
380 1.1 oster bzero( (char *)node->results[0], rf_RaidAddressToByte(raidPtr,failedPDA->numSector));
381 1.1 oster if (node->dagHdr->status == rf_enable) {
382 1.1 oster RF_ETIMER_START(timer);
383 1.1 oster for (i=0; i<node->numParams-2; i+=2) if (node->params[i+1].p != node->results[0]) {
384 1.1 oster pda = (RF_PhysDiskAddr_t *) node->params[i].p;
385 1.1 oster if( i == node->numParams - 4 ) scol = RF_EO_MATRIX_DIM - 2; /* the colume of redundant E */
386 1.1 oster else scol = rf_EUCol(layoutPtr, pda->raidAddress );
387 1.1 oster srcbuf = (char *) node->params[i+1].p;
388 1.1 oster suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
389 1.1 oster destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr,suoffset-failedSUOffset);
390 1.1 oster rf_e_encToBuf(raidPtr, scol, srcbuf, fcol, destbuf, pda->numSector);
391 1.1 oster }
392 1.1 oster RF_ETIMER_STOP(timer);
393 1.1 oster RF_ETIMER_EVAL(timer);
394 1.1 oster tracerec->xor_us += RF_ETIMER_VAL_US(timer);
395 1.1 oster }
396 1.1 oster return (rf_GenericWakeupFunc(node, 0)); /* node execute successfully */
397 1.1 oster }
398 1.1 oster
399 1.1 oster /**************************************************************************************
400 1.1 oster * This function is used in the case where one data and the parity have filed.
401 1.1 oster * (in EO_110_CreateWriteDAG )
402 1.1 oster **************************************************************************************/
403 1.1 oster int rf_EO_DegradedWriteEFunc(RF_DagNode_t *node)
404 1.1 oster {
405 1.1 oster rf_DegrESubroutine(node, node->results[0]);
406 1.1 oster rf_GenericWakeupFunc(node, 0);
407 1.1 oster #if 1
408 1.1 oster return(0); /* XXX Yet another one!! GO */
409 1.1 oster #endif
410 1.1 oster }
411 1.1 oster
412 1.1 oster
413 1.1 oster
414 1.1 oster /**************************************************************************************
415 1.1 oster * THE FUNCTION IS FOR DOUBLE DEGRADED READ AND WRITE CASES
416 1.1 oster **************************************************************************************/
417 1.1 oster
418 1.1 oster void rf_doubleEOdecode(
419 1.1 oster RF_Raid_t *raidPtr,
420 1.1 oster char **rrdbuf,
421 1.1 oster char **dest,
422 1.1 oster RF_RowCol_t *fcol,
423 1.1 oster char *pbuf,
424 1.1 oster char *ebuf)
425 1.1 oster {
426 1.1 oster RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &(raidPtr->Layout);
427 1.1 oster int i, j, k, f1, f2, row;
428 1.1 oster int rrdrow, erow, count = 0;
429 1.1 oster int bytesPerSector = rf_RaidAddressToByte(raidPtr, 1 );
430 1.1 oster int numRowInEncMatix = (RF_EO_MATRIX_DIM) -1;
431 1.1 oster #if 0
432 1.1 oster int pcol = (RF_EO_MATRIX_DIM) - 1;
433 1.1 oster #endif
434 1.1 oster int ecol = (RF_EO_MATRIX_DIM) - 2;
435 1.1 oster int bytesPerEU = bytesPerSector/numRowInEncMatix;
436 1.1 oster int numDataCol = layoutPtr->numDataCol;
437 1.1 oster #if RF_EO_MATRIX_DIM > 17
438 1.1 oster int shortsPerEU = bytesPerEU/sizeof(short);
439 1.1 oster short *rrdbuf_current, *pbuf_current, *ebuf_current;
440 1.1 oster short *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current;
441 1.1 oster register short *temp;
442 1.1 oster short *P;
443 1.1 oster
444 1.1 oster RF_ASSERT( bytesPerEU % sizeof(short) == 0);
445 1.1 oster RF_Malloc(P, bytesPerEU, (short *));
446 1.1 oster RF_Malloc(temp, bytesPerEU, (short *));
447 1.1 oster #elif RF_EO_MATRIX_DIM == 17
448 1.1 oster int longsPerEU = bytesPerEU/sizeof(long);
449 1.1 oster long *rrdbuf_current, *pbuf_current, *ebuf_current;
450 1.1 oster long *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current;
451 1.1 oster register long *temp;
452 1.1 oster long *P;
453 1.1 oster
454 1.1 oster RF_ASSERT( bytesPerEU % sizeof(long) == 0);
455 1.1 oster RF_Malloc(P, bytesPerEU, (long *));
456 1.1 oster RF_Malloc(temp, bytesPerEU, (long *));
457 1.1 oster #endif
458 1.1 oster RF_ASSERT( *((long *)dest[0]) == 0);
459 1.1 oster RF_ASSERT( *((long *)dest[1]) == 0);
460 1.1 oster bzero((char *)P, bytesPerEU);
461 1.1 oster bzero((char *)temp, bytesPerEU);
462 1.1 oster RF_ASSERT( *P == 0 );
463 1.1 oster /* calculate the 'P' parameter, which, not parity, is the Xor of all elements in
464 1.1 oster the last two column, ie. 'E' and 'parity' colume, see the Ref. paper by Blaum, et al 1993 */
465 1.1 oster for( i=0; i< numRowInEncMatix; i++)
466 1.1 oster for( k=0; k< longsPerEU; k++) {
467 1.1 oster #if RF_EO_MATRIX_DIM > 17
468 1.1 oster ebuf_current = ((short *)ebuf) + i*shortsPerEU + k;
469 1.1 oster pbuf_current = ((short *)pbuf) + i*shortsPerEU + k;
470 1.1 oster #elif RF_EO_MATRIX_DIM == 17
471 1.1 oster ebuf_current = ((long *)ebuf) + i*longsPerEU + k;
472 1.1 oster pbuf_current = ((long *)pbuf) + i*longsPerEU + k;
473 1.1 oster #endif
474 1.1 oster P[k] ^= *ebuf_current;
475 1.1 oster P[k] ^= *pbuf_current;
476 1.1 oster }
477 1.1 oster RF_ASSERT( fcol[0] != fcol[1] );
478 1.1 oster if( fcol[0] < fcol[1] ) {
479 1.1 oster #if RF_EO_MATRIX_DIM > 17
480 1.1 oster dest_smaller = (short *)(dest[0]);
481 1.1 oster dest_larger = (short *)(dest[1]);
482 1.1 oster #elif RF_EO_MATRIX_DIM == 17
483 1.1 oster dest_smaller = (long *)(dest[0]);
484 1.1 oster dest_larger = (long *)(dest[1]);
485 1.1 oster #endif
486 1.1 oster f1 = fcol[0];
487 1.1 oster f2 = fcol[1];
488 1.1 oster }
489 1.1 oster else {
490 1.1 oster #if RF_EO_MATRIX_DIM > 17
491 1.1 oster dest_smaller = (short *)(dest[1]);
492 1.1 oster dest_larger = (short *)(dest[0]);
493 1.1 oster #elif RF_EO_MATRIX_DIM == 17
494 1.1 oster dest_smaller = (long *)(dest[1]);
495 1.1 oster dest_larger = (long *)(dest[0]);
496 1.1 oster #endif
497 1.1 oster f1 = fcol[1];
498 1.1 oster f2 = fcol[0];
499 1.1 oster }
500 1.1 oster row = (RF_EO_MATRIX_DIM) -1;
501 1.1 oster while( (row = rf_EO_Mod( (row+f1-f2), RF_EO_MATRIX_DIM )) != ( (RF_EO_MATRIX_DIM) -1) )
502 1.1 oster {
503 1.1 oster #if RF_EO_MATRIX_DIM > 17
504 1.1 oster dest_larger_current = dest_larger + row*shortsPerEU;
505 1.1 oster dest_smaller_current = dest_smaller + row*shortsPerEU;
506 1.1 oster #elif RF_EO_MATRIX_DIM == 17
507 1.1 oster dest_larger_current = dest_larger + row*longsPerEU;
508 1.1 oster dest_smaller_current = dest_smaller + row*longsPerEU;
509 1.1 oster #endif
510 1.1 oster /** Do the diagonal recovery. Initially, temp[k] = (failed 1),
511 1.1 oster which is the failed data in the colume which has smaller col index. **/
512 1.1 oster /* step 1: ^(SUM of nonfailed in-diagonal A(rrdrow,0..m-3)) */
513 1.1 oster for( j=0; j< numDataCol; j++)
514 1.1 oster {
515 1.1 oster if( j == f1 || j == f2 ) continue;
516 1.1 oster rrdrow = rf_EO_Mod( (row+f2-j), RF_EO_MATRIX_DIM );
517 1.1 oster if ( rrdrow != (RF_EO_MATRIX_DIM) -1 ) {
518 1.1 oster #if RF_EO_MATRIX_DIM > 17
519 1.1 oster rrdbuf_current = (short *)(rrdbuf[j]) + rrdrow * shortsPerEU;
520 1.1 oster for (k=0; k< shortsPerEU; k++) temp[k] ^= *(rrdbuf_current + k);
521 1.1 oster #elif RF_EO_MATRIX_DIM == 17
522 1.1 oster rrdbuf_current = (long *)(rrdbuf[j]) + rrdrow * longsPerEU;
523 1.1 oster for (k=0; k< longsPerEU; k++) temp[k] ^= *(rrdbuf_current + k);
524 1.1 oster #endif
525 1.1 oster }
526 1.1 oster }
527 1.1 oster /* step 2: ^E(erow,m-2), If erow is at the buttom row, don't Xor into it
528 1.1 oster E(erow,m-2) = (principle diagonal) ^ (failed 1) ^ (failed 2)
529 1.1 oster ^ ( SUM of nonfailed in-diagonal A(rrdrow,0..m-3) )
530 1.1 oster After this step, temp[k] = (principle diagonal) ^ (failed 2) */
531 1.1 oster
532 1.1 oster erow = rf_EO_Mod( (row+f2-ecol), (RF_EO_MATRIX_DIM) );
533 1.1 oster if ( erow != (RF_EO_MATRIX_DIM) -1) {
534 1.1 oster #if RF_EO_MATRIX_DIM > 17
535 1.1 oster ebuf_current = (short *)ebuf + shortsPerEU * erow;
536 1.1 oster for (k=0; k< shortsPerEU; k++) temp[k] ^= *(ebuf_current+k);
537 1.1 oster #elif RF_EO_MATRIX_DIM == 17
538 1.1 oster ebuf_current = (long *)ebuf + longsPerEU * erow;
539 1.1 oster for (k=0; k< longsPerEU; k++) temp[k] ^= *(ebuf_current+k);
540 1.1 oster #endif
541 1.1 oster }
542 1.1 oster /* step 3: ^P to obtain the failed data (failed 2).
543 1.1 oster P can be proved to be actually (principle diagonal)
544 1.1 oster After this step, temp[k] = (failed 2), the failed data to be recovered */
545 1.1 oster #if RF_EO_MATRIX_DIM > 17
546 1.1 oster for (k=0; k< shortsPerEU; k++) temp[k] ^= P[k];
547 1.1 oster /* Put the data to the destination buffer */
548 1.1 oster for (k=0; k< shortsPerEU; k++) dest_larger_current[k] = temp[k];
549 1.1 oster #elif RF_EO_MATRIX_DIM == 17
550 1.1 oster for (k=0; k< longsPerEU; k++) temp[k] ^= P[k];
551 1.1 oster /* Put the data to the destination buffer */
552 1.1 oster for (k=0; k< longsPerEU; k++) dest_larger_current[k] = temp[k];
553 1.1 oster #endif
554 1.1 oster
555 1.1 oster /** THE FOLLOWING DO THE HORIZONTAL XOR **/
556 1.1 oster /* step 1: ^(SUM of A(row,0..m-3)), ie. all nonfailed data columes */
557 1.1 oster for (j=0; j< numDataCol; j++)
558 1.1 oster {
559 1.1 oster if( j == f1 || j == f2 ) continue;
560 1.1 oster #if RF_EO_MATRIX_DIM > 17
561 1.1 oster rrdbuf_current = (short *)(rrdbuf[j]) + row * shortsPerEU;
562 1.1 oster for (k=0; k< shortsPerEU; k++) temp[k] ^= *(rrdbuf_current+k);
563 1.1 oster #elif RF_EO_MATRIX_DIM == 17
564 1.1 oster rrdbuf_current = (long *)(rrdbuf[j]) + row * longsPerEU;
565 1.1 oster for (k=0; k< longsPerEU; k++) temp[k] ^= *(rrdbuf_current+k);
566 1.1 oster #endif
567 1.1 oster }
568 1.1 oster /* step 2: ^A(row,m-1) */
569 1.1 oster /* step 3: Put the data to the destination buffer */
570 1.1 oster #if RF_EO_MATRIX_DIM > 17
571 1.1 oster pbuf_current = (short *)pbuf + shortsPerEU * row;
572 1.1 oster for (k=0; k< shortsPerEU; k++) temp[k] ^= *(pbuf_current+k);
573 1.1 oster for (k=0; k< shortsPerEU; k++) dest_smaller_current[k] = temp[k];
574 1.1 oster #elif RF_EO_MATRIX_DIM == 17
575 1.1 oster pbuf_current = (long *)pbuf + longsPerEU * row;
576 1.1 oster for (k=0; k< longsPerEU; k++) temp[k] ^= *(pbuf_current+k);
577 1.1 oster for (k=0; k< longsPerEU; k++) dest_smaller_current[k] = temp[k];
578 1.1 oster #endif
579 1.1 oster count++;
580 1.1 oster }
581 1.1 oster /* Check if all Encoding Unit in the data buffer have been decoded,
582 1.1 oster according EvenOdd theory, if "RF_EO_MATRIX_DIM" is a prime number,
583 1.1 oster this algorithm will covered all buffer */
584 1.1 oster RF_ASSERT( count == numRowInEncMatix );
585 1.1 oster RF_Free((char *)P, bytesPerEU);
586 1.1 oster RF_Free((char *)temp, bytesPerEU);
587 1.1 oster }
588 1.1 oster
589 1.1 oster
590 1.1 oster /***************************************************************************************
591 1.1 oster * This function is called by double degragded read
592 1.1 oster * EO_200_CreateReadDAG
593 1.1 oster *
594 1.1 oster ***************************************************************************************/
595 1.1 oster int rf_EvenOddDoubleRecoveryFunc(node)
596 1.1 oster RF_DagNode_t *node;
597 1.1 oster {
598 1.1 oster int ndataParam = 0;
599 1.1 oster int np = node->numParams;
600 1.1 oster RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np-1].p;
601 1.1 oster RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np-2].p;
602 1.1 oster RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &(raidPtr->Layout);
603 1.1 oster int i, prm, sector, nresults = node->numResults;
604 1.1 oster RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
605 1.1 oster unsigned sosAddr;
606 1.1 oster int two = 0, mallc_one= 0, mallc_two = 0; /* flags to indicate if memory is allocated */
607 1.1 oster int bytesPerSector = rf_RaidAddressToByte(raidPtr, 1 );
608 1.1 oster RF_PhysDiskAddr_t *ppda,*ppda2,*epda,*epda2,*pda, *pda0, *pda1, npda;
609 1.1 oster RF_RowCol_t fcol[2], fsuoff[2], fsuend[2], numDataCol = layoutPtr->numDataCol;
610 1.1 oster char **buf, *ebuf, *pbuf, *dest[2];
611 1.1 oster long *suoff=NULL, *suend=NULL, *prmToCol=NULL, psuoff, esuoff;
612 1.1 oster RF_SectorNum_t startSector, endSector;
613 1.1 oster RF_Etimer_t timer;
614 1.1 oster RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
615 1.1 oster
616 1.1 oster RF_ETIMER_START(timer);
617 1.1 oster
618 1.1 oster /* Find out the number of parameters which are pdas for data information */
619 1.1 oster for (i = 0; i<= np; i++)
620 1.1 oster if( ((RF_PhysDiskAddr_t *)node->params[i].p)->type != RF_PDA_TYPE_DATA) {ndataParam = i ; break; }
621 1.1 oster
622 1.1 oster RF_Malloc(buf, numDataCol*sizeof(char *), (char **));
623 1.1 oster if (ndataParam != 0 ){
624 1.1 oster RF_Malloc(suoff, ndataParam*sizeof(long), (long *) );
625 1.1 oster RF_Malloc(suend, ndataParam*sizeof(long), (long *) );
626 1.1 oster RF_Malloc(prmToCol, ndataParam*sizeof(long), (long *) );
627 1.1 oster }
628 1.1 oster
629 1.1 oster if (asmap->failedPDAs[1] &&
630 1.1 oster (asmap->failedPDAs[1]->numSector + asmap->failedPDAs[0]->numSector < secPerSU)) {
631 1.1 oster RF_ASSERT(0); /* currently, no support for this situation */
632 1.1 oster ppda = node->params[np-6].p;
633 1.1 oster ppda2 = node->params[np-5].p;
634 1.1 oster RF_ASSERT( ppda2->type == RF_PDA_TYPE_PARITY );
635 1.1 oster epda = node->params[np-4].p;
636 1.1 oster epda2 = node->params[np-3].p;
637 1.1 oster RF_ASSERT( epda2->type == RF_PDA_TYPE_Q );
638 1.1 oster two = 1;
639 1.1 oster }
640 1.1 oster else {
641 1.1 oster ppda = node->params[np-4].p;
642 1.1 oster epda = node->params[np-3].p;
643 1.1 oster psuoff = rf_StripeUnitOffset(layoutPtr, ppda->startSector);
644 1.1 oster esuoff = rf_StripeUnitOffset(layoutPtr, epda->startSector);
645 1.1 oster RF_ASSERT( psuoff == esuoff );
646 1.1 oster }
647 1.1 oster /*
648 1.1 oster the followings have three goals:
649 1.1 oster 1. determine the startSector to begin decoding and endSector to end decoding.
650 1.1 oster 2. determine the colume numbers of the two failed disks.
651 1.1 oster 3. determine the offset and end offset of the access within each failed stripe unit.
652 1.1 oster */
653 1.1 oster if( nresults == 1 ) {
654 1.1 oster /* find the startSector to begin decoding */
655 1.1 oster pda = node->results[0];
656 1.1 oster bzero(pda->bufPtr, bytesPerSector*pda->numSector );
657 1.1 oster fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda->startSector );
658 1.1 oster fsuend[0] = fsuoff[0] + pda->numSector;
659 1.1 oster startSector = fsuoff[0];
660 1.1 oster endSector = fsuend[0];
661 1.1 oster
662 1.1 oster /* find out the the column of failed disk being accessed */
663 1.1 oster fcol[0] = rf_EUCol(layoutPtr, pda->raidAddress );
664 1.1 oster
665 1.1 oster /* find out the other failed colume not accessed */
666 1.1 oster sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
667 1.1 oster for (i=0; i < numDataCol; i++) {
668 1.1 oster npda.raidAddress = sosAddr + (i * secPerSU);
669 1.1 oster (raidPtr->Layout.map->MapSector)(raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0);
670 1.1 oster /* skip over dead disks */
671 1.1 oster if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status))
672 1.1 oster if (i != fcol[0]) break;
673 1.1 oster }
674 1.1 oster RF_ASSERT (i < numDataCol);
675 1.1 oster fcol[1] = i;
676 1.1 oster }
677 1.1 oster else {
678 1.1 oster RF_ASSERT ( nresults == 2 );
679 1.1 oster pda0 = node->results[0]; bzero(pda0->bufPtr, bytesPerSector*pda0->numSector );
680 1.1 oster pda1 = node->results[1]; bzero(pda1->bufPtr, bytesPerSector*pda1->numSector );
681 1.1 oster /* determine the failed colume numbers of the two failed disks. */
682 1.1 oster fcol[0] = rf_EUCol(layoutPtr, pda0->raidAddress );
683 1.1 oster fcol[1] = rf_EUCol(layoutPtr, pda1->raidAddress );
684 1.1 oster /* determine the offset and end offset of the access within each failed stripe unit. */
685 1.1 oster fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda0->startSector );
686 1.1 oster fsuend[0] = fsuoff[0] + pda0->numSector;
687 1.1 oster fsuoff[1] = rf_StripeUnitOffset(layoutPtr, pda1->startSector );
688 1.1 oster fsuend[1] = fsuoff[1] + pda1->numSector;
689 1.1 oster /* determine the startSector to begin decoding */
690 1.1 oster startSector = RF_MIN( pda0->startSector, pda1->startSector );
691 1.1 oster /* determine the endSector to end decoding */
692 1.1 oster endSector = RF_MAX( fsuend[0], fsuend[1] );
693 1.1 oster }
694 1.1 oster /*
695 1.1 oster assign the beginning sector and the end sector for each parameter
696 1.1 oster find out the corresponding colume # for each parameter
697 1.1 oster */
698 1.1 oster for( prm=0; prm < ndataParam; prm++ ) {
699 1.1 oster pda = node->params[prm].p;
700 1.1 oster suoff[prm] = rf_StripeUnitOffset(layoutPtr, pda->startSector);
701 1.1 oster suend[prm] = suoff[prm] + pda->numSector;
702 1.1 oster prmToCol[prm] = rf_EUCol(layoutPtr, pda->raidAddress );
703 1.1 oster }
704 1.1 oster /* 'sector' is the sector for the current decoding algorithm. For each sector in the failed SU,
705 1.1 oster find out the corresponding parameters that cover the current sector and that are needed for
706 1.1 oster decoding of this sector in failed SU. 2. Find out if sector is in the shadow of any accessed
707 1.1 oster failed SU. If not, malloc a temporary space of a sector in size.
708 1.1 oster */
709 1.1 oster for( sector = startSector; sector < endSector; sector++ ){
710 1.1 oster if ( nresults == 2 )
711 1.1 oster if( !(fsuoff[0]<=sector && sector<fsuend[0]) && !(fsuoff[1]<=sector && sector<fsuend[1]) )continue;
712 1.1 oster for( prm=0; prm < ndataParam; prm++ )
713 1.1 oster if( suoff[prm] <= sector && sector < suend[prm] )
714 1.1 oster buf[(prmToCol[prm])] = ((RF_PhysDiskAddr_t *)node->params[prm].p)->bufPtr +
715 1.1 oster rf_RaidAddressToByte(raidPtr, sector-suoff[prm]);
716 1.1 oster /* find out if sector is in the shadow of any accessed failed SU. If yes, assign dest[0], dest[1] to point
717 1.1 oster at suitable position of the buffer corresponding to failed SUs. if no, malloc a temporary space of
718 1.1 oster a sector in size for destination of decoding.
719 1.1 oster */
720 1.1 oster RF_ASSERT( nresults == 1 || nresults == 2 );
721 1.1 oster if ( nresults == 1) {
722 1.1 oster dest[0] = ((RF_PhysDiskAddr_t *)node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector-fsuoff[0]);
723 1.1 oster /* Always malloc temp buffer to dest[1] */
724 1.1 oster RF_Malloc( dest[1], bytesPerSector, (char *) );
725 1.1 oster bzero(dest[1],bytesPerSector); mallc_two = 1; }
726 1.1 oster else {
727 1.1 oster if( fsuoff[0] <= sector && sector < fsuend[0] )
728 1.1 oster dest[0] = ((RF_PhysDiskAddr_t *)node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector-fsuoff[0]);
729 1.1 oster else { RF_Malloc( dest[0], bytesPerSector, (char *) );
730 1.1 oster bzero(dest[0],bytesPerSector); mallc_one = 1; }
731 1.1 oster if( fsuoff[1] <= sector && sector < fsuend[1] )
732 1.1 oster dest[1] = ((RF_PhysDiskAddr_t *)node->results[1])->bufPtr + rf_RaidAddressToByte(raidPtr, sector-fsuoff[1]);
733 1.1 oster else { RF_Malloc( dest[1], bytesPerSector, (char *) );
734 1.1 oster bzero(dest[1],bytesPerSector); mallc_two = 1; }
735 1.1 oster RF_ASSERT( mallc_one == 0 || mallc_two == 0 );
736 1.1 oster }
737 1.1 oster pbuf = ppda->bufPtr + rf_RaidAddressToByte(raidPtr, sector-psuoff );
738 1.1 oster ebuf = epda->bufPtr + rf_RaidAddressToByte(raidPtr, sector-esuoff );
739 1.1 oster /*
740 1.1 oster * After finish finding all needed sectors, call doubleEOdecode function for decoding
741 1.1 oster * one sector to destination.
742 1.1 oster */
743 1.1 oster rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf );
744 1.1 oster /* free all allocated memory, and mark flag to indicate no memory is being allocated */
745 1.1 oster if( mallc_one == 1) RF_Free( dest[0], bytesPerSector );
746 1.1 oster if( mallc_two == 1) RF_Free( dest[1], bytesPerSector );
747 1.1 oster mallc_one = mallc_two = 0;
748 1.1 oster }
749 1.1 oster RF_Free(buf, numDataCol*sizeof(char *));
750 1.1 oster if (ndataParam != 0){
751 1.1 oster RF_Free(suoff, ndataParam*sizeof(long));
752 1.1 oster RF_Free(suend, ndataParam*sizeof(long));
753 1.1 oster RF_Free(prmToCol, ndataParam*sizeof(long));
754 1.1 oster }
755 1.1 oster
756 1.1 oster RF_ETIMER_STOP(timer);
757 1.1 oster RF_ETIMER_EVAL(timer);
758 1.1 oster if (tracerec) {
759 1.1 oster tracerec->q_us += RF_ETIMER_VAL_US(timer);
760 1.1 oster }
761 1.1 oster rf_GenericWakeupFunc(node,0);
762 1.1 oster #if 1
763 1.1 oster return(0); /* XXX is this even close!!?!?!!? GO */
764 1.1 oster #endif
765 1.1 oster }
766 1.1 oster
767 1.1 oster
768 1.1 oster /* currently, only access of one of the two failed SU is allowed in this function.
769 1.1 oster * also, asmap->numStripeUnitsAccessed is limited to be one, the RaidFrame will break large access into
770 1.1 oster * many accesses of single stripe unit.
771 1.1 oster */
772 1.1 oster
773 1.1 oster int rf_EOWriteDoubleRecoveryFunc(node)
774 1.1 oster RF_DagNode_t *node;
775 1.1 oster {
776 1.1 oster int np = node->numParams;
777 1.1 oster RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np-1].p;
778 1.1 oster RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np-2].p;
779 1.1 oster RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &(raidPtr->Layout);
780 1.1 oster RF_SectorNum_t sector;
781 1.1 oster RF_RowCol_t col, scol;
782 1.1 oster int prm, i, j;
783 1.1 oster RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
784 1.1 oster unsigned sosAddr;
785 1.1 oster unsigned bytesPerSector = rf_RaidAddressToByte(raidPtr, 1 );
786 1.1 oster RF_int64 numbytes;
787 1.1 oster RF_SectorNum_t startSector, endSector;
788 1.1 oster RF_PhysDiskAddr_t *ppda,*epda,*pda, *fpda, npda;
789 1.1 oster RF_RowCol_t fcol[2], numDataCol = layoutPtr->numDataCol;
790 1.1 oster char **buf; /* buf[0], buf[1], buf[2], ...etc. point to buffer storing data read from col0, col1, col2 */
791 1.1 oster char *ebuf, *pbuf, *dest[2], *olddata[2];
792 1.1 oster RF_Etimer_t timer;
793 1.1 oster RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
794 1.1 oster
795 1.1 oster RF_ASSERT( asmap->numDataFailed == 1 ); /* currently only support this case, the other failed SU is not being accessed */
796 1.1 oster RF_ETIMER_START(timer);
797 1.1 oster RF_Malloc(buf, numDataCol*sizeof(char *), (char **));
798 1.1 oster
799 1.1 oster ppda = node->results[0]; /* Instead of being buffers, node->results[0] and [1] are Ppda and Epda */
800 1.1 oster epda = node->results[1];
801 1.1 oster fpda = asmap->failedPDAs[0];
802 1.1 oster
803 1.1 oster /* First, recovery the failed old SU using EvenOdd double decoding */
804 1.1 oster /* determine the startSector and endSector for decoding */
805 1.1 oster startSector = rf_StripeUnitOffset(layoutPtr, fpda->startSector );
806 1.1 oster endSector = startSector + fpda->numSector;
807 1.1 oster /* Assign buf[col] pointers to point to each non-failed colume and initialize the pbuf
808 1.1 oster and ebuf to point at the beginning of each source buffers and destination buffers */
809 1.1 oster for( prm=0; prm < numDataCol-2; prm++ ) {
810 1.1 oster pda = (RF_PhysDiskAddr_t *)node->params[prm].p;
811 1.1 oster col = rf_EUCol(layoutPtr, pda->raidAddress );
812 1.1 oster buf[col] = pda->bufPtr;
813 1.1 oster }
814 1.1 oster /* pbuf and ebuf: they will change values as double recovery decoding goes on */
815 1.1 oster pbuf = ppda->bufPtr;
816 1.1 oster ebuf = epda->bufPtr;
817 1.1 oster /* find out the logical colume numbers in the encoding matrix of the two failed columes */
818 1.1 oster fcol[0] = rf_EUCol(layoutPtr, fpda->raidAddress );
819 1.1 oster
820 1.1 oster /* find out the other failed colume not accessed this time */
821 1.1 oster sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
822 1.1 oster for (i=0; i < numDataCol; i++) {
823 1.1 oster npda.raidAddress = sosAddr + (i * secPerSU);
824 1.1 oster (raidPtr->Layout.map->MapSector)(raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0);
825 1.1 oster /* skip over dead disks */
826 1.1 oster if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status))
827 1.1 oster if (i != fcol[0]) break;
828 1.1 oster }
829 1.1 oster RF_ASSERT (i < numDataCol);
830 1.1 oster fcol[1] = i;
831 1.1 oster /* assign temporary space to put recovered failed SU */
832 1.1 oster numbytes = fpda->numSector * bytesPerSector;
833 1.1 oster RF_Malloc(olddata[0], numbytes, (char *) );
834 1.1 oster RF_Malloc(olddata[1], numbytes, (char *) );
835 1.1 oster dest[0] = olddata[0];
836 1.1 oster dest[1] = olddata[1];
837 1.1 oster bzero(olddata[0], numbytes);
838 1.1 oster bzero(olddata[1], numbytes);
839 1.1 oster /* Begin the recovery decoding, initially buf[j], ebuf, pbuf, dest[j] have already
840 1.1 oster pointed at the beginning of each source buffers and destination buffers */
841 1.1 oster for( sector = startSector, i=0; sector < endSector; sector++ , i++){
842 1.1 oster rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf );
843 1.1 oster for (j=0; j < numDataCol; j++)
844 1.1 oster if( ( j != fcol[0]) && ( j != fcol[1] ) ) buf[j] += bytesPerSector;
845 1.1 oster dest[0] += bytesPerSector;
846 1.1 oster dest[1] += bytesPerSector;
847 1.1 oster ebuf += bytesPerSector;
848 1.1 oster pbuf += bytesPerSector;
849 1.1 oster }
850 1.1 oster /* after recovery, the buffer pointed by olddata[0] is the old failed data.
851 1.1 oster With new writing data and this old data, use small write to calculate
852 1.1 oster the new redundant informations
853 1.1 oster */
854 1.1 oster /* node->params[ 0, ... PDAPerDisk * (numDataCol - 2)-1 ] are Pdas of Rrd;
855 1.1 oster params[ PDAPerDisk*(numDataCol - 2), ... PDAPerDisk*numDataCol -1 ] are Pdas of Rp, ( Rp2 ), Re, ( Re2 ) ;
856 1.1 oster params[ PDAPerDisk*numDataCol, ... PDAPerDisk*numDataCol +asmap->numStripeUnitsAccessed -asmap->numDataFailed-1]
857 1.1 oster are Pdas of wudNodes;
858 1.1 oster For current implementation, we assume the simplest case:
859 1.1 oster asmap->numStripeUnitsAccessed == 1 and asmap->numDataFailed == 1 ie. PDAPerDisk = 1
860 1.1 oster then node->params[numDataCol] must be the new data to be writen to the failed disk. We first bxor the new data
861 1.1 oster into the old recovered data, then do the same things as small write.
862 1.1 oster */
863 1.1 oster
864 1.1 oster rf_bxor( ((RF_PhysDiskAddr_t *)node->params[numDataCol].p)->bufPtr, olddata[0], numbytes, node->dagHdr->bp);
865 1.1 oster /* do new 'E' calculation */
866 1.1 oster /* find out the corresponding colume in encoding matrix for write colume to be encoded into redundant disk 'E' */
867 1.1 oster scol = rf_EUCol(layoutPtr, fpda->raidAddress );
868 1.1 oster /* olddata[0] now is source buffer pointer; epda->bufPtr is the dest buffer pointer */
869 1.1 oster rf_e_encToBuf(raidPtr, scol, olddata[0], RF_EO_MATRIX_DIM - 2, epda->bufPtr, fpda->numSector);
870 1.1 oster
871 1.1 oster /* do new 'P' calculation */
872 1.1 oster rf_bxor( olddata[0], ppda->bufPtr, numbytes, node->dagHdr->bp);
873 1.1 oster /* Free the allocated buffer */
874 1.1 oster RF_Free( olddata[0], numbytes );
875 1.1 oster RF_Free( olddata[1], numbytes );
876 1.1 oster RF_Free( buf, numDataCol*sizeof(char *));
877 1.1 oster
878 1.1 oster RF_ETIMER_STOP(timer);
879 1.1 oster RF_ETIMER_EVAL(timer);
880 1.1 oster if (tracerec) {
881 1.1 oster tracerec->q_us += RF_ETIMER_VAL_US(timer);
882 1.1 oster }
883 1.1 oster
884 1.1 oster rf_GenericWakeupFunc(node,0);
885 1.1 oster return(0);
886 1.1 oster }
887