rf_reconutil.c revision 1.1 1 1.1 oster /* $NetBSD: rf_reconutil.c,v 1.1 1998/11/13 04:20:34 oster Exp $ */
2 1.1 oster /*
3 1.1 oster * Copyright (c) 1995 Carnegie-Mellon University.
4 1.1 oster * All rights reserved.
5 1.1 oster *
6 1.1 oster * Author: Mark Holland
7 1.1 oster *
8 1.1 oster * Permission to use, copy, modify and distribute this software and
9 1.1 oster * its documentation is hereby granted, provided that both the copyright
10 1.1 oster * notice and this permission notice appear in all copies of the
11 1.1 oster * software, derivative works or modified versions, and any portions
12 1.1 oster * thereof, and that both notices appear in supporting documentation.
13 1.1 oster *
14 1.1 oster * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 1.1 oster * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 1.1 oster * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 1.1 oster *
18 1.1 oster * Carnegie Mellon requests users of this software to return to
19 1.1 oster *
20 1.1 oster * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 1.1 oster * School of Computer Science
22 1.1 oster * Carnegie Mellon University
23 1.1 oster * Pittsburgh PA 15213-3890
24 1.1 oster *
25 1.1 oster * any improvements or extensions that they make and grant Carnegie the
26 1.1 oster * rights to redistribute these changes.
27 1.1 oster */
28 1.1 oster
29 1.1 oster /********************************************
30 1.1 oster * rf_reconutil.c -- reconstruction utilities
31 1.1 oster ********************************************/
32 1.1 oster
33 1.1 oster /* :
34 1.1 oster * Log: rf_reconutil.c,v
35 1.1 oster * Revision 1.32 1996/07/29 14:05:12 jimz
36 1.1 oster * fix numPUs/numRUs confusion (everything is now numRUs)
37 1.1 oster * clean up some commenting, return values
38 1.1 oster *
39 1.1 oster * Revision 1.31 1996/07/15 05:40:41 jimz
40 1.1 oster * some recon datastructure cleanup
41 1.1 oster * better handling of multiple failures
42 1.1 oster * added undocumented double-recon test
43 1.1 oster *
44 1.1 oster * Revision 1.30 1996/07/13 00:00:59 jimz
45 1.1 oster * sanitized generalized reconstruction architecture
46 1.1 oster * cleaned up head sep, rbuf problems
47 1.1 oster *
48 1.1 oster * Revision 1.29 1996/06/19 17:53:48 jimz
49 1.1 oster * move GetNumSparePUs, InstallSpareTable ops into layout switch
50 1.1 oster *
51 1.1 oster * Revision 1.28 1996/06/07 21:33:04 jimz
52 1.1 oster * begin using consistent types for sector numbers,
53 1.1 oster * stripe numbers, row+col numbers, recon unit numbers
54 1.1 oster *
55 1.1 oster * Revision 1.27 1996/06/05 18:06:02 jimz
56 1.1 oster * Major code cleanup. The Great Renaming is now done.
57 1.1 oster * Better modularity. Better typing. Fixed a bunch of
58 1.1 oster * synchronization bugs. Made a lot of global stuff
59 1.1 oster * per-desc or per-array. Removed dead code.
60 1.1 oster *
61 1.1 oster * Revision 1.26 1996/06/03 23:28:26 jimz
62 1.1 oster * more bugfixes
63 1.1 oster * check in tree to sync for IPDS runs with current bugfixes
64 1.1 oster * there still may be a problem with threads in the script test
65 1.1 oster * getting I/Os stuck- not trivially reproducible (runs ~50 times
66 1.1 oster * in a row without getting stuck)
67 1.1 oster *
68 1.1 oster * Revision 1.25 1996/06/02 17:31:48 jimz
69 1.1 oster * Moved a lot of global stuff into array structure, where it belongs.
70 1.1 oster * Fixed up paritylogging, pss modules in this manner. Some general
71 1.1 oster * code cleanup. Removed lots of dead code, some dead files.
72 1.1 oster *
73 1.1 oster * Revision 1.24 1996/05/31 22:26:54 jimz
74 1.1 oster * fix a lot of mapping problems, memory allocation problems
75 1.1 oster * found some weird lock issues, fixed 'em
76 1.1 oster * more code cleanup
77 1.1 oster *
78 1.1 oster * Revision 1.23 1996/05/30 23:22:16 jimz
79 1.1 oster * bugfixes of serialization, timing problems
80 1.1 oster * more cleanup
81 1.1 oster *
82 1.1 oster * Revision 1.22 1996/05/30 11:29:41 jimz
83 1.1 oster * Numerous bug fixes. Stripe lock release code disagreed with the taking code
84 1.1 oster * about when stripes should be locked (I made it consistent: no parity, no lock)
85 1.1 oster * There was a lot of extra serialization of I/Os which I've removed- a lot of
86 1.1 oster * it was to calculate values for the cache code, which is no longer with us.
87 1.1 oster * More types, function, macro cleanup. Added code to properly quiesce the array
88 1.1 oster * on shutdown. Made a lot of stuff array-specific which was (bogusly) general
89 1.1 oster * before. Fixed memory allocation, freeing bugs.
90 1.1 oster *
91 1.1 oster * Revision 1.21 1996/05/27 18:56:37 jimz
92 1.1 oster * more code cleanup
93 1.1 oster * better typing
94 1.1 oster * compiles in all 3 environments
95 1.1 oster *
96 1.1 oster * Revision 1.20 1996/05/23 00:33:23 jimz
97 1.1 oster * code cleanup: move all debug decls to rf_options.c, all extern
98 1.1 oster * debug decls to rf_options.h, all debug vars preceded by rf_
99 1.1 oster *
100 1.1 oster * Revision 1.19 1996/05/20 16:14:55 jimz
101 1.1 oster * switch to rf_{mutex,cond}_{init,destroy}
102 1.1 oster *
103 1.1 oster * Revision 1.18 1996/05/18 19:51:34 jimz
104 1.1 oster * major code cleanup- fix syntax, make some types consistent,
105 1.1 oster * add prototypes, clean out dead code, et cetera
106 1.1 oster *
107 1.1 oster * Revision 1.17 1995/12/12 18:10:06 jimz
108 1.1 oster * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
109 1.1 oster * fix 80-column brain damage in comments
110 1.1 oster *
111 1.1 oster * Revision 1.16 1995/12/06 15:05:31 root
112 1.1 oster * added copyright info
113 1.1 oster *
114 1.1 oster */
115 1.1 oster
116 1.1 oster #include "rf_types.h"
117 1.1 oster #include "rf_raid.h"
118 1.1 oster #include "rf_desc.h"
119 1.1 oster #include "rf_reconutil.h"
120 1.1 oster #include "rf_reconbuffer.h"
121 1.1 oster #include "rf_general.h"
122 1.1 oster #include "rf_decluster.h"
123 1.1 oster #include "rf_raid5_rotatedspare.h"
124 1.1 oster #include "rf_interdecluster.h"
125 1.1 oster #include "rf_chaindecluster.h"
126 1.1 oster
127 1.1 oster /*******************************************************************
128 1.1 oster * allocates/frees the reconstruction control information structures
129 1.1 oster *******************************************************************/
130 1.1 oster RF_ReconCtrl_t *rf_MakeReconControl(reconDesc, frow, fcol, srow, scol)
131 1.1 oster RF_RaidReconDesc_t *reconDesc;
132 1.1 oster RF_RowCol_t frow; /* failed row and column */
133 1.1 oster RF_RowCol_t fcol;
134 1.1 oster RF_RowCol_t srow; /* identifies which spare we're using */
135 1.1 oster RF_RowCol_t scol;
136 1.1 oster {
137 1.1 oster RF_Raid_t *raidPtr = reconDesc->raidPtr;
138 1.1 oster RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
139 1.1 oster RF_ReconUnitCount_t RUsPerPU = layoutPtr->SUsPerPU / layoutPtr->SUsPerRU;
140 1.1 oster RF_ReconUnitCount_t numSpareRUs;
141 1.1 oster RF_ReconCtrl_t *reconCtrlPtr;
142 1.1 oster RF_ReconBuffer_t *rbuf;
143 1.1 oster RF_LayoutSW_t *lp;
144 1.1 oster int retcode, rc;
145 1.1 oster RF_RowCol_t i;
146 1.1 oster
147 1.1 oster lp = raidPtr->Layout.map;
148 1.1 oster
149 1.1 oster /* make and zero the global reconstruction structure and the per-disk structure */
150 1.1 oster RF_Calloc(reconCtrlPtr, 1, sizeof(RF_ReconCtrl_t), (RF_ReconCtrl_t *));
151 1.1 oster RF_Calloc(reconCtrlPtr->perDiskInfo, raidPtr->numCol, sizeof(RF_PerDiskReconCtrl_t), (RF_PerDiskReconCtrl_t *)); /* this zeros it */
152 1.1 oster reconCtrlPtr->reconDesc = reconDesc;
153 1.1 oster reconCtrlPtr->fcol = fcol;
154 1.1 oster reconCtrlPtr->spareRow = srow;
155 1.1 oster reconCtrlPtr->spareCol = scol;
156 1.1 oster reconCtrlPtr->lastPSID = layoutPtr->numStripe/layoutPtr->SUsPerPU;
157 1.1 oster reconCtrlPtr->percentComplete = 0;
158 1.1 oster
159 1.1 oster /* initialize each per-disk recon information structure */
160 1.1 oster for (i=0; i<raidPtr->numCol; i++) {
161 1.1 oster reconCtrlPtr->perDiskInfo[i].reconCtrl = reconCtrlPtr;
162 1.1 oster reconCtrlPtr->perDiskInfo[i].row = frow;
163 1.1 oster reconCtrlPtr->perDiskInfo[i].col = i;
164 1.1 oster reconCtrlPtr->perDiskInfo[i].curPSID = -1; /* make it appear as if we just finished an RU */
165 1.1 oster reconCtrlPtr->perDiskInfo[i].ru_count = RUsPerPU-1;
166 1.1 oster }
167 1.1 oster
168 1.1 oster /* Get the number of spare units per disk and the sparemap in case spare is distributed */
169 1.1 oster
170 1.1 oster if (lp->GetNumSpareRUs) {
171 1.1 oster numSpareRUs = lp->GetNumSpareRUs(raidPtr);
172 1.1 oster }
173 1.1 oster else {
174 1.1 oster numSpareRUs = 0;
175 1.1 oster }
176 1.1 oster
177 1.1 oster /*
178 1.1 oster * Not all distributed sparing archs need dynamic mappings
179 1.1 oster */
180 1.1 oster if (lp->InstallSpareTable) {
181 1.1 oster retcode = rf_InstallSpareTable(raidPtr, frow, fcol);
182 1.1 oster if (retcode) {
183 1.1 oster RF_PANIC(); /* XXX fix this*/
184 1.1 oster }
185 1.1 oster }
186 1.1 oster
187 1.1 oster /* make the reconstruction map */
188 1.1 oster reconCtrlPtr->reconMap = rf_MakeReconMap(raidPtr, (int) (layoutPtr->SUsPerRU * layoutPtr->sectorsPerStripeUnit),
189 1.1 oster raidPtr->sectorsPerDisk, numSpareRUs);
190 1.1 oster
191 1.1 oster /* make the per-disk reconstruction buffers */
192 1.1 oster for (i=0; i<raidPtr->numCol; i++) {
193 1.1 oster reconCtrlPtr->perDiskInfo[i].rbuf = (i==fcol) ? NULL : rf_MakeReconBuffer(raidPtr, frow, i, RF_RBUF_TYPE_EXCLUSIVE);
194 1.1 oster }
195 1.1 oster
196 1.1 oster /* initialize the event queue */
197 1.1 oster rc = rf_mutex_init(&reconCtrlPtr->eq_mutex);
198 1.1 oster if (rc) {
199 1.1 oster /* XXX deallocate, cleanup */
200 1.1 oster RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
201 1.1 oster __LINE__, rc);
202 1.1 oster return(NULL);
203 1.1 oster }
204 1.1 oster rc = rf_cond_init(&reconCtrlPtr->eq_cond);
205 1.1 oster if (rc) {
206 1.1 oster /* XXX deallocate, cleanup */
207 1.1 oster RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", __FILE__,
208 1.1 oster __LINE__, rc);
209 1.1 oster return(NULL);
210 1.1 oster }
211 1.1 oster reconCtrlPtr->eventQueue = NULL;
212 1.1 oster reconCtrlPtr->eq_count = 0;
213 1.1 oster
214 1.1 oster /* make the floating recon buffers and append them to the free list */
215 1.1 oster rc = rf_mutex_init(&reconCtrlPtr->rb_mutex);
216 1.1 oster if (rc) {
217 1.1 oster /* XXX deallocate, cleanup */
218 1.1 oster RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
219 1.1 oster __LINE__, rc);
220 1.1 oster return(NULL);
221 1.1 oster }
222 1.1 oster reconCtrlPtr->fullBufferList= NULL;
223 1.1 oster reconCtrlPtr->priorityList = NULL;
224 1.1 oster reconCtrlPtr->floatingRbufs = NULL;
225 1.1 oster reconCtrlPtr->committedRbufs= NULL;
226 1.1 oster for (i=0; i<raidPtr->numFloatingReconBufs; i++) {
227 1.1 oster rbuf = rf_MakeReconBuffer(raidPtr, frow, fcol, RF_RBUF_TYPE_FLOATING);
228 1.1 oster rbuf->next = reconCtrlPtr->floatingRbufs;
229 1.1 oster reconCtrlPtr->floatingRbufs = rbuf;
230 1.1 oster }
231 1.1 oster
232 1.1 oster /* create the parity stripe status table */
233 1.1 oster reconCtrlPtr->pssTable = rf_MakeParityStripeStatusTable(raidPtr);
234 1.1 oster
235 1.1 oster /* set the initial min head sep counter val */
236 1.1 oster reconCtrlPtr->minHeadSepCounter = 0;
237 1.1 oster
238 1.1 oster return(reconCtrlPtr);
239 1.1 oster }
240 1.1 oster
241 1.1 oster void rf_FreeReconControl(raidPtr, row)
242 1.1 oster RF_Raid_t *raidPtr;
243 1.1 oster RF_RowCol_t row;
244 1.1 oster {
245 1.1 oster RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[row];
246 1.1 oster RF_ReconBuffer_t *t;
247 1.1 oster RF_ReconUnitNum_t i;
248 1.1 oster
249 1.1 oster RF_ASSERT(reconCtrlPtr);
250 1.1 oster for (i=0; i<raidPtr->numCol; i++) if (reconCtrlPtr->perDiskInfo[i].rbuf) rf_FreeReconBuffer(reconCtrlPtr->perDiskInfo[i].rbuf);
251 1.1 oster for (i=0; i<raidPtr->numFloatingReconBufs; i++) {
252 1.1 oster t = reconCtrlPtr->floatingRbufs;
253 1.1 oster RF_ASSERT(t);
254 1.1 oster reconCtrlPtr->floatingRbufs = t->next;
255 1.1 oster rf_FreeReconBuffer(t);
256 1.1 oster }
257 1.1 oster rf_mutex_destroy(&reconCtrlPtr->rb_mutex);
258 1.1 oster rf_mutex_destroy(&reconCtrlPtr->eq_mutex);
259 1.1 oster rf_cond_destroy(&reconCtrlPtr->eq_cond);
260 1.1 oster rf_FreeReconMap(reconCtrlPtr->reconMap);
261 1.1 oster rf_FreeParityStripeStatusTable(raidPtr, reconCtrlPtr->pssTable);
262 1.1 oster RF_Free(reconCtrlPtr->perDiskInfo, raidPtr->numCol * sizeof(RF_PerDiskReconCtrl_t));
263 1.1 oster RF_Free(reconCtrlPtr, sizeof(*reconCtrlPtr));
264 1.1 oster }
265 1.1 oster
266 1.1 oster
267 1.1 oster /******************************************************************************
268 1.1 oster * computes the default head separation limit
269 1.1 oster *****************************************************************************/
270 1.1 oster RF_HeadSepLimit_t rf_GetDefaultHeadSepLimit(raidPtr)
271 1.1 oster RF_Raid_t *raidPtr;
272 1.1 oster {
273 1.1 oster RF_HeadSepLimit_t hsl;
274 1.1 oster RF_LayoutSW_t *lp;
275 1.1 oster
276 1.1 oster lp = raidPtr->Layout.map;
277 1.1 oster if (lp->GetDefaultHeadSepLimit == NULL)
278 1.1 oster return(-1);
279 1.1 oster hsl = lp->GetDefaultHeadSepLimit(raidPtr);
280 1.1 oster return(hsl);
281 1.1 oster }
282 1.1 oster
283 1.1 oster
284 1.1 oster /******************************************************************************
285 1.1 oster * computes the default number of floating recon buffers
286 1.1 oster *****************************************************************************/
287 1.1 oster int rf_GetDefaultNumFloatingReconBuffers(raidPtr)
288 1.1 oster RF_Raid_t *raidPtr;
289 1.1 oster {
290 1.1 oster RF_LayoutSW_t *lp;
291 1.1 oster int nrb;
292 1.1 oster
293 1.1 oster lp = raidPtr->Layout.map;
294 1.1 oster if (lp->GetDefaultNumFloatingReconBuffers == NULL)
295 1.1 oster return(3 * raidPtr->numCol);
296 1.1 oster nrb = lp->GetDefaultNumFloatingReconBuffers(raidPtr);
297 1.1 oster return(nrb);
298 1.1 oster }
299 1.1 oster
300 1.1 oster
301 1.1 oster /******************************************************************************
302 1.1 oster * creates and initializes a reconstruction buffer
303 1.1 oster *****************************************************************************/
304 1.1 oster RF_ReconBuffer_t *rf_MakeReconBuffer(
305 1.1 oster RF_Raid_t *raidPtr,
306 1.1 oster RF_RowCol_t row,
307 1.1 oster RF_RowCol_t col,
308 1.1 oster RF_RbufType_t type)
309 1.1 oster {
310 1.1 oster RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
311 1.1 oster RF_ReconBuffer_t *t;
312 1.1 oster u_int recon_buffer_size = rf_RaidAddressToByte(raidPtr, layoutPtr->SUsPerRU * layoutPtr->sectorsPerStripeUnit);
313 1.1 oster
314 1.1 oster RF_Malloc(t, sizeof(RF_ReconBuffer_t), (RF_ReconBuffer_t *));
315 1.1 oster RF_Malloc(t->buffer, recon_buffer_size, (caddr_t));
316 1.1 oster RF_Malloc(t->arrived, raidPtr->numCol * sizeof(char), (char *));
317 1.1 oster t->raidPtr = raidPtr;
318 1.1 oster t->row = row; t->col = col;
319 1.1 oster t->priority = RF_IO_RECON_PRIORITY;
320 1.1 oster t->type = type;
321 1.1 oster t->pssPtr = NULL;
322 1.1 oster t->next = NULL;
323 1.1 oster return(t);
324 1.1 oster }
325 1.1 oster
326 1.1 oster /******************************************************************************
327 1.1 oster * frees a reconstruction buffer
328 1.1 oster *****************************************************************************/
329 1.1 oster void rf_FreeReconBuffer(rbuf)
330 1.1 oster RF_ReconBuffer_t *rbuf;
331 1.1 oster {
332 1.1 oster RF_Raid_t *raidPtr = rbuf->raidPtr;
333 1.1 oster u_int recon_buffer_size = rf_RaidAddressToByte(raidPtr, raidPtr->Layout.SUsPerRU * raidPtr->Layout.sectorsPerStripeUnit);
334 1.1 oster
335 1.1 oster RF_Free(rbuf->arrived, raidPtr->numCol * sizeof(char));
336 1.1 oster RF_Free(rbuf->buffer, recon_buffer_size);
337 1.1 oster RF_Free(rbuf, sizeof(*rbuf));
338 1.1 oster }
339 1.1 oster
340 1.1 oster
341 1.1 oster /******************************************************************************
342 1.1 oster * debug only: sanity check the number of floating recon bufs in use
343 1.1 oster *****************************************************************************/
344 1.1 oster void rf_CheckFloatingRbufCount(raidPtr, dolock)
345 1.1 oster RF_Raid_t *raidPtr;
346 1.1 oster int dolock;
347 1.1 oster {
348 1.1 oster RF_ReconParityStripeStatus_t *p;
349 1.1 oster RF_PSStatusHeader_t *pssTable;
350 1.1 oster RF_ReconBuffer_t *rbuf;
351 1.1 oster int i, j, sum = 0;
352 1.1 oster RF_RowCol_t frow=0;
353 1.1 oster
354 1.1 oster for (i=0; i<raidPtr->numRow; i++)
355 1.1 oster if (raidPtr->reconControl[i]) {
356 1.1 oster frow = i;
357 1.1 oster break;
358 1.1 oster }
359 1.1 oster RF_ASSERT(frow >= 0);
360 1.1 oster
361 1.1 oster if (dolock)
362 1.1 oster RF_LOCK_MUTEX(raidPtr->reconControl[frow]->rb_mutex);
363 1.1 oster pssTable = raidPtr->reconControl[frow]->pssTable;
364 1.1 oster
365 1.1 oster for (i=0; i<raidPtr->pssTableSize; i++) {
366 1.1 oster RF_LOCK_MUTEX(pssTable[i].mutex);
367 1.1 oster for (p = pssTable[i].chain; p; p=p->next) {
368 1.1 oster rbuf = (RF_ReconBuffer_t *) p->rbuf;
369 1.1 oster if (rbuf && rbuf->type == RF_RBUF_TYPE_FLOATING)
370 1.1 oster sum++;
371 1.1 oster
372 1.1 oster rbuf = (RF_ReconBuffer_t *) p->writeRbuf;
373 1.1 oster if (rbuf && rbuf->type == RF_RBUF_TYPE_FLOATING)
374 1.1 oster sum++;
375 1.1 oster
376 1.1 oster for (j=0; j<p->xorBufCount; j++) {
377 1.1 oster rbuf = (RF_ReconBuffer_t *) p->rbufsForXor[j];
378 1.1 oster RF_ASSERT(rbuf);
379 1.1 oster if (rbuf->type == RF_RBUF_TYPE_FLOATING)
380 1.1 oster sum++;
381 1.1 oster }
382 1.1 oster }
383 1.1 oster RF_UNLOCK_MUTEX(pssTable[i].mutex);
384 1.1 oster }
385 1.1 oster
386 1.1 oster for (rbuf = raidPtr->reconControl[frow]->floatingRbufs; rbuf; rbuf = rbuf->next) {
387 1.1 oster if (rbuf->type == RF_RBUF_TYPE_FLOATING)
388 1.1 oster sum++;
389 1.1 oster }
390 1.1 oster for (rbuf = raidPtr->reconControl[frow]->committedRbufs; rbuf; rbuf = rbuf->next) {
391 1.1 oster if (rbuf->type == RF_RBUF_TYPE_FLOATING)
392 1.1 oster sum++;
393 1.1 oster }
394 1.1 oster for (rbuf = raidPtr->reconControl[frow]->fullBufferList; rbuf; rbuf = rbuf->next) {
395 1.1 oster if (rbuf->type == RF_RBUF_TYPE_FLOATING)
396 1.1 oster sum++;
397 1.1 oster }
398 1.1 oster for (rbuf = raidPtr->reconControl[frow]->priorityList; rbuf; rbuf = rbuf->next) {
399 1.1 oster if (rbuf->type == RF_RBUF_TYPE_FLOATING)
400 1.1 oster sum++;
401 1.1 oster }
402 1.1 oster
403 1.1 oster RF_ASSERT(sum == raidPtr->numFloatingReconBufs);
404 1.1 oster
405 1.1 oster if (dolock)
406 1.1 oster RF_UNLOCK_MUTEX(raidPtr->reconControl[frow]->rb_mutex);
407 1.1 oster }
408