rf_reconbuffer.c revision 1.6 1 /* $NetBSD: rf_reconbuffer.c,v 1.6 2001/11/13 07:11:16 lukem Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: Mark Holland
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29 /***************************************************
30 *
31 * rf_reconbuffer.c -- reconstruction buffer manager
32 *
33 ***************************************************/
34
35 #include <sys/cdefs.h>
36 __KERNEL_RCSID(0, "$NetBSD: rf_reconbuffer.c,v 1.6 2001/11/13 07:11:16 lukem Exp $");
37
38 #include "rf_raid.h"
39 #include "rf_reconbuffer.h"
40 #include "rf_acctrace.h"
41 #include "rf_etimer.h"
42 #include "rf_general.h"
43 #include "rf_debugprint.h"
44 #include "rf_revent.h"
45 #include "rf_reconutil.h"
46 #include "rf_nwayxor.h"
47
48 #define Dprintf1(s,a) if (rf_reconbufferDebug) printf(s,a)
49 #define Dprintf2(s,a,b) if (rf_reconbufferDebug) printf(s,a,b)
50 #define Dprintf3(s,a,b,c) if (rf_reconbufferDebug) printf(s,a,b,c)
51 #define Dprintf4(s,a,b,c,d) if (rf_reconbufferDebug) printf(s,a,b,c,d)
52 #define Dprintf5(s,a,b,c,d,e) if (rf_reconbufferDebug) printf(s,a,b,c,d,e)
53
54 /*****************************************************************************
55 *
56 * Submit a reconstruction buffer to the manager for XOR. We can only
57 * submit a buffer if (1) we can xor into an existing buffer, which
58 * means we don't have to acquire a new one, (2) we can acquire a
59 * floating recon buffer, or (3) the caller has indicated that we are
60 * allowed to keep the submitted buffer.
61 *
62 * Returns non-zero if and only if we were not able to submit.
63 * In this case, we append the current disk ID to the wait list on the
64 * indicated RU, so that it will be re-enabled when we acquire a buffer
65 * for this RU.
66 *
67 ****************************************************************************/
68
69 /*
70 * nWayXorFuncs[i] is a pointer to a function that will xor "i"
71 * bufs into the accumulating sum.
72 */
73 static RF_VoidFuncPtr nWayXorFuncs[] = {
74 NULL,
75 (RF_VoidFuncPtr) rf_nWayXor1,
76 (RF_VoidFuncPtr) rf_nWayXor2,
77 (RF_VoidFuncPtr) rf_nWayXor3,
78 (RF_VoidFuncPtr) rf_nWayXor4,
79 (RF_VoidFuncPtr) rf_nWayXor5,
80 (RF_VoidFuncPtr) rf_nWayXor6,
81 (RF_VoidFuncPtr) rf_nWayXor7,
82 (RF_VoidFuncPtr) rf_nWayXor8,
83 (RF_VoidFuncPtr) rf_nWayXor9
84 };
85
86 int
87 rf_SubmitReconBuffer(rbuf, keep_it, use_committed)
88 RF_ReconBuffer_t *rbuf; /* the recon buffer to submit */
89 int keep_it; /* whether we can keep this buffer or we have
90 * to return it */
91 int use_committed; /* whether to use a committed or an available
92 * recon buffer */
93 {
94 RF_LayoutSW_t *lp;
95 int rc;
96
97 lp = rbuf->raidPtr->Layout.map;
98 rc = lp->SubmitReconBuffer(rbuf, keep_it, use_committed);
99 return (rc);
100 }
101
102 int
103 rf_SubmitReconBufferBasic(rbuf, keep_it, use_committed)
104 RF_ReconBuffer_t *rbuf; /* the recon buffer to submit */
105 int keep_it; /* whether we can keep this buffer or we have
106 * to return it */
107 int use_committed; /* whether to use a committed or an available
108 * recon buffer */
109 {
110 RF_Raid_t *raidPtr = rbuf->raidPtr;
111 RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
112 RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[rbuf->row];
113 RF_ReconParityStripeStatus_t *pssPtr;
114 RF_ReconBuffer_t *targetRbuf, *t = NULL; /* temporary rbuf
115 * pointers */
116 caddr_t ta; /* temporary data buffer pointer */
117 RF_CallbackDesc_t *cb, *p;
118 int retcode = 0, created = 0;
119
120 RF_Etimer_t timer;
121
122 /* makes no sense to have a submission from the failed disk */
123 RF_ASSERT(rbuf);
124 RF_ASSERT(rbuf->col != reconCtrlPtr->fcol);
125
126 Dprintf5("RECON: submission by row %d col %d for psid %ld ru %d (failed offset %ld)\n",
127 rbuf->row, rbuf->col, (long) rbuf->parityStripeID, rbuf->which_ru, (long) rbuf->failedDiskSectorOffset);
128
129 RF_LOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID);
130
131 RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex);
132
133 pssPtr = rf_LookupRUStatus(raidPtr, reconCtrlPtr->pssTable, rbuf->parityStripeID, rbuf->which_ru, RF_PSS_NONE, &created);
134 RF_ASSERT(pssPtr); /* if it didn't exist, we wouldn't have gotten
135 * an rbuf for it */
136
137 /* check to see if enough buffers have accumulated to do an XOR. If
138 * so, there's no need to acquire a floating rbuf. Before we can do
139 * any XORing, we must have acquired a destination buffer. If we
140 * have, then we can go ahead and do the XOR if (1) including this
141 * buffer, enough bufs have accumulated, or (2) this is the last
142 * submission for this stripe. Otherwise, we have to go acquire a
143 * floating rbuf. */
144
145 targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
146 if ((targetRbuf != NULL) &&
147 ((pssPtr->xorBufCount == rf_numBufsToAccumulate - 1) || (targetRbuf->count + pssPtr->xorBufCount + 1 == layoutPtr->numDataCol))) {
148 pssPtr->rbufsForXor[pssPtr->xorBufCount++] = rbuf; /* install this buffer */
149 Dprintf3("RECON: row %d col %d invoking a %d-way XOR\n", rbuf->row, rbuf->col, pssPtr->xorBufCount);
150 RF_ETIMER_START(timer);
151 rf_MultiWayReconXor(raidPtr, pssPtr);
152 RF_ETIMER_STOP(timer);
153 RF_ETIMER_EVAL(timer);
154 raidPtr->accumXorTimeUs += RF_ETIMER_VAL_US(timer);
155 if (!keep_it) {
156 raidPtr->recon_tracerecs[rbuf->col].xor_us = RF_ETIMER_VAL_US(timer);
157 RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
158 RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
159 raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us +=
160 RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
161 RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
162
163 rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]);
164 }
165 rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol);
166
167 /* if use_committed is on, we _must_ consume a buffer off the
168 * committed list. */
169 if (use_committed) {
170 t = reconCtrlPtr->committedRbufs;
171 RF_ASSERT(t);
172 reconCtrlPtr->committedRbufs = t->next;
173 rf_ReleaseFloatingReconBuffer(raidPtr, rbuf->row, t);
174 }
175 if (keep_it) {
176 RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID);
177 RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
178 rf_FreeReconBuffer(rbuf);
179 return (retcode);
180 }
181 goto out;
182 }
183 /* set the value of "t", which we'll use as the rbuf from here on */
184 if (keep_it) {
185 t = rbuf;
186 } else {
187 if (use_committed) { /* if a buffer has been committed to
188 * us, use it */
189 t = reconCtrlPtr->committedRbufs;
190 RF_ASSERT(t);
191 reconCtrlPtr->committedRbufs = t->next;
192 t->next = NULL;
193 } else
194 if (reconCtrlPtr->floatingRbufs) {
195 t = reconCtrlPtr->floatingRbufs;
196 reconCtrlPtr->floatingRbufs = t->next;
197 t->next = NULL;
198 }
199 }
200
201 /* If we weren't able to acquire a buffer, append to the end of the
202 * buf list in the recon ctrl struct. */
203 if (!t) {
204 RF_ASSERT(!keep_it && !use_committed);
205 Dprintf2("RECON: row %d col %d failed to acquire floating rbuf\n", rbuf->row, rbuf->col);
206
207 raidPtr->procsInBufWait++;
208 if ((raidPtr->procsInBufWait == raidPtr->numCol - 1) && (raidPtr->numFullReconBuffers == 0)) {
209 printf("Buffer wait deadlock detected. Exiting.\n");
210 rf_PrintPSStatusTable(raidPtr, rbuf->row);
211 RF_PANIC();
212 }
213 pssPtr->flags |= RF_PSS_BUFFERWAIT;
214 cb = rf_AllocCallbackDesc(); /* append to buf wait list in
215 * recon ctrl structure */
216 cb->row = rbuf->row;
217 cb->col = rbuf->col;
218 cb->callbackArg.v = rbuf->parityStripeID;
219 cb->callbackArg2.v = rbuf->which_ru;
220 cb->next = NULL;
221 if (!reconCtrlPtr->bufferWaitList)
222 reconCtrlPtr->bufferWaitList = cb;
223 else { /* might want to maintain head/tail pointers
224 * here rather than search for end of list */
225 for (p = reconCtrlPtr->bufferWaitList; p->next; p = p->next);
226 p->next = cb;
227 }
228 retcode = 1;
229 goto out;
230 }
231 Dprintf2("RECON: row %d col %d acquired rbuf\n", rbuf->row, rbuf->col);
232 RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
233 RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
234 raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us +=
235 RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
236 RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
237
238 rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]);
239
240 /* initialize the buffer */
241 if (t != rbuf) {
242 t->row = rbuf->row;
243 t->col = reconCtrlPtr->fcol;
244 t->parityStripeID = rbuf->parityStripeID;
245 t->which_ru = rbuf->which_ru;
246 t->failedDiskSectorOffset = rbuf->failedDiskSectorOffset;
247 t->spRow = rbuf->spRow;
248 t->spCol = rbuf->spCol;
249 t->spOffset = rbuf->spOffset;
250
251 ta = t->buffer;
252 t->buffer = rbuf->buffer;
253 rbuf->buffer = ta; /* swap buffers */
254 }
255 /* the first installation always gets installed as the destination
256 * buffer. subsequent installations get stacked up to allow for
257 * multi-way XOR */
258 if (!pssPtr->rbuf) {
259 pssPtr->rbuf = t;
260 t->count = 1;
261 } else
262 pssPtr->rbufsForXor[pssPtr->xorBufCount++] = t; /* install this buffer */
263
264 rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol); /* the buffer is full if
265 * G=2 */
266
267 out:
268 RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID);
269 RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
270 return (retcode);
271 }
272
273 int
274 rf_MultiWayReconXor(raidPtr, pssPtr)
275 RF_Raid_t *raidPtr;
276 RF_ReconParityStripeStatus_t *pssPtr; /* the pss descriptor for this
277 * parity stripe */
278 {
279 int i, numBufs = pssPtr->xorBufCount;
280 int numBytes = rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU);
281 RF_ReconBuffer_t **rbufs = (RF_ReconBuffer_t **) pssPtr->rbufsForXor;
282 RF_ReconBuffer_t *targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
283
284 RF_ASSERT(pssPtr->rbuf != NULL);
285 RF_ASSERT(numBufs > 0 && numBufs < RF_PS_MAX_BUFS);
286 #ifdef _KERNEL
287 #ifndef __NetBSD__
288 thread_block(); /* yield the processor before doing a big XOR */
289 #endif
290 #endif /* _KERNEL */
291 /*
292 * XXX
293 *
294 * What if more than 9 bufs?
295 */
296 nWayXorFuncs[numBufs] (pssPtr->rbufsForXor, targetRbuf, numBytes / sizeof(long));
297
298 /* release all the reconstruction buffers except the last one, which
299 * belongs to the disk whose submission caused this XOR to take place */
300 for (i = 0; i < numBufs - 1; i++) {
301 if (rbufs[i]->type == RF_RBUF_TYPE_FLOATING)
302 rf_ReleaseFloatingReconBuffer(raidPtr, rbufs[i]->row, rbufs[i]);
303 else
304 if (rbufs[i]->type == RF_RBUF_TYPE_FORCED)
305 rf_FreeReconBuffer(rbufs[i]);
306 else
307 RF_ASSERT(0);
308 }
309 targetRbuf->count += pssPtr->xorBufCount;
310 pssPtr->xorBufCount = 0;
311 return (0);
312 }
313 /* removes one full buffer from one of the full-buffer lists and returns it.
314 *
315 * ASSUMES THE RB_MUTEX IS UNLOCKED AT ENTRY.
316 */
317 RF_ReconBuffer_t *
318 rf_GetFullReconBuffer(reconCtrlPtr)
319 RF_ReconCtrl_t *reconCtrlPtr;
320 {
321 RF_ReconBuffer_t *p;
322
323 RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex);
324
325 if ((p = reconCtrlPtr->priorityList) != NULL) {
326 reconCtrlPtr->priorityList = p->next;
327 p->next = NULL;
328 goto out;
329 }
330 if ((p = reconCtrlPtr->fullBufferList) != NULL) {
331 reconCtrlPtr->fullBufferList = p->next;
332 p->next = NULL;
333 goto out;
334 }
335 out:
336 RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
337 return (p);
338 }
339
340
341 /* if the reconstruction buffer is full, move it to the full list,
342 * which is maintained sorted by failed disk sector offset
343 *
344 * ASSUMES THE RB_MUTEX IS LOCKED AT ENTRY. */
345 int
346 rf_CheckForFullRbuf(raidPtr, reconCtrl, pssPtr, numDataCol)
347 RF_Raid_t *raidPtr;
348 RF_ReconCtrl_t *reconCtrl;
349 RF_ReconParityStripeStatus_t *pssPtr;
350 int numDataCol;
351 {
352 RF_ReconBuffer_t *p, *pt, *rbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
353
354 if (rbuf->count == numDataCol) {
355 raidPtr->numFullReconBuffers++;
356 Dprintf2("RECON: rbuf for psid %ld ru %d has filled\n",
357 (long) rbuf->parityStripeID, rbuf->which_ru);
358 if (!reconCtrl->fullBufferList || (rbuf->failedDiskSectorOffset < reconCtrl->fullBufferList->failedDiskSectorOffset)) {
359 Dprintf2("RECON: rbuf for psid %ld ru %d is head of list\n",
360 (long) rbuf->parityStripeID, rbuf->which_ru);
361 rbuf->next = reconCtrl->fullBufferList;
362 reconCtrl->fullBufferList = rbuf;
363 } else {
364 for (pt = reconCtrl->fullBufferList, p = pt->next; p && p->failedDiskSectorOffset < rbuf->failedDiskSectorOffset; pt = p, p = p->next);
365 rbuf->next = p;
366 pt->next = rbuf;
367 Dprintf2("RECON: rbuf for psid %ld ru %d is in list\n",
368 (long) rbuf->parityStripeID, rbuf->which_ru);
369 }
370 #if 0
371 pssPtr->writeRbuf = pssPtr->rbuf; /* DEBUG ONLY: we like
372 * to be able to find
373 * this rbuf while it's
374 * awaiting write */
375 #else
376 rbuf->pssPtr = pssPtr;
377 #endif
378 pssPtr->rbuf = NULL;
379 rf_CauseReconEvent(raidPtr, rbuf->row, rbuf->col, NULL, RF_REVENT_BUFREADY);
380 }
381 return (0);
382 }
383
384
385 /* release a floating recon buffer for someone else to use.
386 * assumes the rb_mutex is LOCKED at entry
387 */
388 void
389 rf_ReleaseFloatingReconBuffer(raidPtr, row, rbuf)
390 RF_Raid_t *raidPtr;
391 RF_RowCol_t row;
392 RF_ReconBuffer_t *rbuf;
393 {
394 RF_ReconCtrl_t *rcPtr = raidPtr->reconControl[row];
395 RF_CallbackDesc_t *cb;
396
397 Dprintf2("RECON: releasing rbuf for psid %ld ru %d\n",
398 (long) rbuf->parityStripeID, rbuf->which_ru);
399
400 /* if anyone is waiting on buffers, wake one of them up. They will
401 * subsequently wake up anyone else waiting on their RU */
402 if (rcPtr->bufferWaitList) {
403 rbuf->next = rcPtr->committedRbufs;
404 rcPtr->committedRbufs = rbuf;
405 cb = rcPtr->bufferWaitList;
406 rcPtr->bufferWaitList = cb->next;
407 rf_CauseReconEvent(raidPtr, cb->row, cb->col, (void *) 1, RF_REVENT_BUFCLEAR); /* arg==1 => we've
408 * committed a buffer */
409 rf_FreeCallbackDesc(cb);
410 raidPtr->procsInBufWait--;
411 } else {
412 rbuf->next = rcPtr->floatingRbufs;
413 rcPtr->floatingRbufs = rbuf;
414 }
415 }
416 /* release any disk that is waiting on a buffer for the indicated RU.
417 * assumes the rb_mutex is LOCKED at entry
418 */
419 void
420 rf_ReleaseBufferWaiters(raidPtr, pssPtr)
421 RF_Raid_t *raidPtr;
422 RF_ReconParityStripeStatus_t *pssPtr;
423 {
424 RF_CallbackDesc_t *cb1, *cb = pssPtr->bufWaitList;
425
426 Dprintf2("RECON: releasing buf waiters for psid %ld ru %d\n",
427 (long) pssPtr->parityStripeID, pssPtr->which_ru);
428 pssPtr->flags &= ~RF_PSS_BUFFERWAIT;
429 while (cb) {
430 cb1 = cb->next;
431 cb->next = NULL;
432 rf_CauseReconEvent(raidPtr, cb->row, cb->col, (void *) 0, RF_REVENT_BUFCLEAR); /* arg==0 => we haven't
433 * committed a buffer */
434 rf_FreeCallbackDesc(cb);
435 cb = cb1;
436 }
437 pssPtr->bufWaitList = NULL;
438 }
439 /* when reconstruction is forced on an RU, there may be some disks waiting to
440 * acquire a buffer for that RU. Since we allocate a new buffer as part of
441 * the forced-reconstruction process, we no longer have to wait for any
442 * buffers, so we wakeup any waiter that we find in the bufferWaitList
443 *
444 * assumes the rb_mutex is LOCKED at entry
445 */
446 void
447 rf_ReleaseBufferWaiter(rcPtr, rbuf)
448 RF_ReconCtrl_t *rcPtr;
449 RF_ReconBuffer_t *rbuf;
450 {
451 RF_CallbackDesc_t *cb, *cbt;
452
453 for (cbt = NULL, cb = rcPtr->bufferWaitList; cb; cbt = cb, cb = cb->next) {
454 if ((cb->callbackArg.v == rbuf->parityStripeID) && (cb->callbackArg2.v == rbuf->which_ru)) {
455 Dprintf2("RECON: Dropping row %d col %d from buffer wait list\n", cb->row, cb->col);
456 if (cbt)
457 cbt->next = cb->next;
458 else
459 rcPtr->bufferWaitList = cb->next;
460 rf_CauseReconEvent((RF_Raid_t *) rbuf->raidPtr, cb->row, cb->col, (void *) 0, RF_REVENT_BUFREADY); /* arg==0 => no
461 * committed buffer */
462 rf_FreeCallbackDesc(cb);
463 return;
464 }
465 }
466 }
467