rf_reconbuffer.c revision 1.4 1 /* $NetBSD: rf_reconbuffer.c,v 1.4 2000/03/13 23:52:36 soren Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: Mark Holland
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29 /***************************************************
30 *
31 * rf_reconbuffer.c -- reconstruction buffer manager
32 *
33 ***************************************************/
34
35 #include "rf_raid.h"
36 #include "rf_reconbuffer.h"
37 #include "rf_acctrace.h"
38 #include "rf_etimer.h"
39 #include "rf_general.h"
40 #include "rf_debugprint.h"
41 #include "rf_revent.h"
42 #include "rf_reconutil.h"
43 #include "rf_nwayxor.h"
44
45 #define Dprintf1(s,a) if (rf_reconbufferDebug) printf(s,a)
46 #define Dprintf2(s,a,b) if (rf_reconbufferDebug) printf(s,a,b)
47 #define Dprintf3(s,a,b,c) if (rf_reconbufferDebug) printf(s,a,b,c)
48 #define Dprintf4(s,a,b,c,d) if (rf_reconbufferDebug) printf(s,a,b,c,d)
49 #define Dprintf5(s,a,b,c,d,e) if (rf_reconbufferDebug) printf(s,a,b,c,d,e)
50
51 /*****************************************************************************************
52 *
53 * Submit a reconstruction buffer to the manager for XOR.
54 * We can only submit a buffer if (1) we can xor into an existing buffer, which means
55 * we don't have to acquire a new one, (2) we can acquire a floating
56 * recon buffer, or (3) the caller has indicated that we are allowed to keep the
57 * submitted buffer.
58 *
59 * Returns non-zero if and only if we were not able to submit.
60 * In this case, we append the current disk ID to the wait list on the indicated
61 * RU, so that it will be re-enabled when we acquire a buffer for this RU.
62 *
63 ****************************************************************************************/
64
65 /* just to make the code below more readable */
66 #define BUFWAIT_APPEND(_cb_, _pssPtr_, _row_, _col_) \
67 _cb_ = rf_AllocCallbackDesc(); \
68 (_cb_)->row = (_row_); (_cb_)->col = (_col_); (_cb_)->next = (_pssPtr_)->bufWaitList; (_pssPtr_)->bufWaitList = (_cb_);
69
70 /*
71 * nWayXorFuncs[i] is a pointer to a function that will xor "i"
72 * bufs into the accumulating sum.
73 */
74 static RF_VoidFuncPtr nWayXorFuncs[] = {
75 NULL,
76 (RF_VoidFuncPtr) rf_nWayXor1,
77 (RF_VoidFuncPtr) rf_nWayXor2,
78 (RF_VoidFuncPtr) rf_nWayXor3,
79 (RF_VoidFuncPtr) rf_nWayXor4,
80 (RF_VoidFuncPtr) rf_nWayXor5,
81 (RF_VoidFuncPtr) rf_nWayXor6,
82 (RF_VoidFuncPtr) rf_nWayXor7,
83 (RF_VoidFuncPtr) rf_nWayXor8,
84 (RF_VoidFuncPtr) rf_nWayXor9
85 };
86
87 int
88 rf_SubmitReconBuffer(rbuf, keep_it, use_committed)
89 RF_ReconBuffer_t *rbuf; /* the recon buffer to submit */
90 int keep_it; /* whether we can keep this buffer or we have
91 * to return it */
92 int use_committed; /* whether to use a committed or an available
93 * recon buffer */
94 {
95 RF_LayoutSW_t *lp;
96 int rc;
97
98 lp = rbuf->raidPtr->Layout.map;
99 rc = lp->SubmitReconBuffer(rbuf, keep_it, use_committed);
100 return (rc);
101 }
102
103 int
104 rf_SubmitReconBufferBasic(rbuf, keep_it, use_committed)
105 RF_ReconBuffer_t *rbuf; /* the recon buffer to submit */
106 int keep_it; /* whether we can keep this buffer or we have
107 * to return it */
108 int use_committed; /* whether to use a committed or an available
109 * recon buffer */
110 {
111 RF_Raid_t *raidPtr = rbuf->raidPtr;
112 RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
113 RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[rbuf->row];
114 RF_ReconParityStripeStatus_t *pssPtr;
115 RF_ReconBuffer_t *targetRbuf, *t = NULL; /* temporary rbuf
116 * pointers */
117 caddr_t ta; /* temporary data buffer pointer */
118 RF_CallbackDesc_t *cb, *p;
119 int retcode = 0, created = 0;
120
121 RF_Etimer_t timer;
122
123 /* makes no sense to have a submission from the failed disk */
124 RF_ASSERT(rbuf);
125 RF_ASSERT(rbuf->col != reconCtrlPtr->fcol);
126
127 Dprintf5("RECON: submission by row %d col %d for psid %ld ru %d (failed offset %ld)\n",
128 rbuf->row, rbuf->col, (long) rbuf->parityStripeID, rbuf->which_ru, (long) rbuf->failedDiskSectorOffset);
129
130 RF_LOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID);
131
132 RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex);
133
134 pssPtr = rf_LookupRUStatus(raidPtr, reconCtrlPtr->pssTable, rbuf->parityStripeID, rbuf->which_ru, RF_PSS_NONE, &created);
135 RF_ASSERT(pssPtr); /* if it didn't exist, we wouldn't have gotten
136 * an rbuf for it */
137
138 /* check to see if enough buffers have accumulated to do an XOR. If
139 * so, there's no need to acquire a floating rbuf. Before we can do
140 * any XORing, we must have acquired a destination buffer. If we
141 * have, then we can go ahead and do the XOR if (1) including this
142 * buffer, enough bufs have accumulated, or (2) this is the last
143 * submission for this stripe. Otherwise, we have to go acquire a
144 * floating rbuf. */
145
146 targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
147 if ((targetRbuf != NULL) &&
148 ((pssPtr->xorBufCount == rf_numBufsToAccumulate - 1) || (targetRbuf->count + pssPtr->xorBufCount + 1 == layoutPtr->numDataCol))) {
149 pssPtr->rbufsForXor[pssPtr->xorBufCount++] = rbuf; /* install this buffer */
150 Dprintf3("RECON: row %d col %d invoking a %d-way XOR\n", rbuf->row, rbuf->col, pssPtr->xorBufCount);
151 RF_ETIMER_START(timer);
152 rf_MultiWayReconXor(raidPtr, pssPtr);
153 RF_ETIMER_STOP(timer);
154 RF_ETIMER_EVAL(timer);
155 raidPtr->accumXorTimeUs += RF_ETIMER_VAL_US(timer);
156 if (!keep_it) {
157 raidPtr->recon_tracerecs[rbuf->col].xor_us = RF_ETIMER_VAL_US(timer);
158 RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
159 RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
160 raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us +=
161 RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
162 RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
163
164 rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]);
165 }
166 rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol);
167
168 /* if use_committed is on, we _must_ consume a buffer off the
169 * committed list. */
170 if (use_committed) {
171 t = reconCtrlPtr->committedRbufs;
172 RF_ASSERT(t);
173 reconCtrlPtr->committedRbufs = t->next;
174 rf_ReleaseFloatingReconBuffer(raidPtr, rbuf->row, t);
175 }
176 if (keep_it) {
177 RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID);
178 RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
179 rf_FreeReconBuffer(rbuf);
180 return (retcode);
181 }
182 goto out;
183 }
184 /* set the value of "t", which we'll use as the rbuf from here on */
185 if (keep_it) {
186 t = rbuf;
187 } else {
188 if (use_committed) { /* if a buffer has been committed to
189 * us, use it */
190 t = reconCtrlPtr->committedRbufs;
191 RF_ASSERT(t);
192 reconCtrlPtr->committedRbufs = t->next;
193 t->next = NULL;
194 } else
195 if (reconCtrlPtr->floatingRbufs) {
196 t = reconCtrlPtr->floatingRbufs;
197 reconCtrlPtr->floatingRbufs = t->next;
198 t->next = NULL;
199 }
200 }
201
202 /* If we weren't able to acquire a buffer, append to the end of the
203 * buf list in the recon ctrl struct. */
204 if (!t) {
205 RF_ASSERT(!keep_it && !use_committed);
206 Dprintf2("RECON: row %d col %d failed to acquire floating rbuf\n", rbuf->row, rbuf->col);
207
208 raidPtr->procsInBufWait++;
209 if ((raidPtr->procsInBufWait == raidPtr->numCol - 1) && (raidPtr->numFullReconBuffers == 0)) {
210 printf("Buffer wait deadlock detected. Exiting.\n");
211 rf_PrintPSStatusTable(raidPtr, rbuf->row);
212 RF_PANIC();
213 }
214 pssPtr->flags |= RF_PSS_BUFFERWAIT;
215 cb = rf_AllocCallbackDesc(); /* append to buf wait list in
216 * recon ctrl structure */
217 cb->row = rbuf->row;
218 cb->col = rbuf->col;
219 cb->callbackArg.v = rbuf->parityStripeID;
220 cb->callbackArg2.v = rbuf->which_ru;
221 cb->next = NULL;
222 if (!reconCtrlPtr->bufferWaitList)
223 reconCtrlPtr->bufferWaitList = cb;
224 else { /* might want to maintain head/tail pointers
225 * here rather than search for end of list */
226 for (p = reconCtrlPtr->bufferWaitList; p->next; p = p->next);
227 p->next = cb;
228 }
229 retcode = 1;
230 goto out;
231 }
232 Dprintf2("RECON: row %d col %d acquired rbuf\n", rbuf->row, rbuf->col);
233 RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
234 RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
235 raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us +=
236 RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
237 RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
238
239 rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]);
240
241 /* initialize the buffer */
242 if (t != rbuf) {
243 t->row = rbuf->row;
244 t->col = reconCtrlPtr->fcol;
245 t->parityStripeID = rbuf->parityStripeID;
246 t->which_ru = rbuf->which_ru;
247 t->failedDiskSectorOffset = rbuf->failedDiskSectorOffset;
248 t->spRow = rbuf->spRow;
249 t->spCol = rbuf->spCol;
250 t->spOffset = rbuf->spOffset;
251
252 ta = t->buffer;
253 t->buffer = rbuf->buffer;
254 rbuf->buffer = ta; /* swap buffers */
255 }
256 /* the first installation always gets installed as the destination
257 * buffer. subsequent installations get stacked up to allow for
258 * multi-way XOR */
259 if (!pssPtr->rbuf) {
260 pssPtr->rbuf = t;
261 t->count = 1;
262 } else
263 pssPtr->rbufsForXor[pssPtr->xorBufCount++] = t; /* install this buffer */
264
265 rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol); /* the buffer is full if
266 * G=2 */
267
268 out:
269 RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID);
270 RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
271 return (retcode);
272 }
273
274 int
275 rf_MultiWayReconXor(raidPtr, pssPtr)
276 RF_Raid_t *raidPtr;
277 RF_ReconParityStripeStatus_t *pssPtr; /* the pss descriptor for this
278 * parity stripe */
279 {
280 int i, numBufs = pssPtr->xorBufCount;
281 int numBytes = rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU);
282 RF_ReconBuffer_t **rbufs = (RF_ReconBuffer_t **) pssPtr->rbufsForXor;
283 RF_ReconBuffer_t *targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
284
285 RF_ASSERT(pssPtr->rbuf != NULL);
286 RF_ASSERT(numBufs > 0 && numBufs < RF_PS_MAX_BUFS);
287 #ifdef _KERNEL
288 #ifndef __NetBSD__
289 thread_block(); /* yield the processor before doing a big XOR */
290 #endif
291 #endif /* _KERNEL */
292 /*
293 * XXX
294 *
295 * What if more than 9 bufs?
296 */
297 nWayXorFuncs[numBufs] (pssPtr->rbufsForXor, targetRbuf, numBytes / sizeof(long));
298
299 /* release all the reconstruction buffers except the last one, which
300 * belongs to the disk whose submission caused this XOR to take place */
301 for (i = 0; i < numBufs - 1; i++) {
302 if (rbufs[i]->type == RF_RBUF_TYPE_FLOATING)
303 rf_ReleaseFloatingReconBuffer(raidPtr, rbufs[i]->row, rbufs[i]);
304 else
305 if (rbufs[i]->type == RF_RBUF_TYPE_FORCED)
306 rf_FreeReconBuffer(rbufs[i]);
307 else
308 RF_ASSERT(0);
309 }
310 targetRbuf->count += pssPtr->xorBufCount;
311 pssPtr->xorBufCount = 0;
312 return (0);
313 }
314 /* removes one full buffer from one of the full-buffer lists and returns it.
315 *
316 * ASSUMES THE RB_MUTEX IS UNLOCKED AT ENTRY.
317 */
318 RF_ReconBuffer_t *
319 rf_GetFullReconBuffer(reconCtrlPtr)
320 RF_ReconCtrl_t *reconCtrlPtr;
321 {
322 RF_ReconBuffer_t *p;
323
324 RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex);
325
326 if ((p = reconCtrlPtr->priorityList) != NULL) {
327 reconCtrlPtr->priorityList = p->next;
328 p->next = NULL;
329 goto out;
330 }
331 if ((p = reconCtrlPtr->fullBufferList) != NULL) {
332 reconCtrlPtr->fullBufferList = p->next;
333 p->next = NULL;
334 goto out;
335 }
336 out:
337 RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
338 return (p);
339 }
340
341
342 /* if the reconstruction buffer is full, move it to the full list, which is maintained
343 * sorted by failed disk sector offset
344 *
345 * ASSUMES THE RB_MUTEX IS LOCKED AT ENTRY.
346 */
347 int
348 rf_CheckForFullRbuf(raidPtr, reconCtrl, pssPtr, numDataCol)
349 RF_Raid_t *raidPtr;
350 RF_ReconCtrl_t *reconCtrl;
351 RF_ReconParityStripeStatus_t *pssPtr;
352 int numDataCol;
353 {
354 RF_ReconBuffer_t *p, *pt, *rbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
355
356 if (rbuf->count == numDataCol) {
357 raidPtr->numFullReconBuffers++;
358 Dprintf2("RECON: rbuf for psid %ld ru %d has filled\n",
359 (long) rbuf->parityStripeID, rbuf->which_ru);
360 if (!reconCtrl->fullBufferList || (rbuf->failedDiskSectorOffset < reconCtrl->fullBufferList->failedDiskSectorOffset)) {
361 Dprintf2("RECON: rbuf for psid %ld ru %d is head of list\n",
362 (long) rbuf->parityStripeID, rbuf->which_ru);
363 rbuf->next = reconCtrl->fullBufferList;
364 reconCtrl->fullBufferList = rbuf;
365 } else {
366 for (pt = reconCtrl->fullBufferList, p = pt->next; p && p->failedDiskSectorOffset < rbuf->failedDiskSectorOffset; pt = p, p = p->next);
367 rbuf->next = p;
368 pt->next = rbuf;
369 Dprintf2("RECON: rbuf for psid %ld ru %d is in list\n",
370 (long) rbuf->parityStripeID, rbuf->which_ru);
371 }
372 #if 0
373 pssPtr->writeRbuf = pssPtr->rbuf; /* DEBUG ONLY: we like
374 * to be able to find
375 * this rbuf while it's
376 * awaiting write */
377 #else
378 rbuf->pssPtr = pssPtr;
379 #endif
380 pssPtr->rbuf = NULL;
381 rf_CauseReconEvent(raidPtr, rbuf->row, rbuf->col, NULL, RF_REVENT_BUFREADY);
382 }
383 return (0);
384 }
385
386
387 /* release a floating recon buffer for someone else to use.
388 * assumes the rb_mutex is LOCKED at entry
389 */
390 void
391 rf_ReleaseFloatingReconBuffer(raidPtr, row, rbuf)
392 RF_Raid_t *raidPtr;
393 RF_RowCol_t row;
394 RF_ReconBuffer_t *rbuf;
395 {
396 RF_ReconCtrl_t *rcPtr = raidPtr->reconControl[row];
397 RF_CallbackDesc_t *cb;
398
399 Dprintf2("RECON: releasing rbuf for psid %ld ru %d\n",
400 (long) rbuf->parityStripeID, rbuf->which_ru);
401
402 /* if anyone is waiting on buffers, wake one of them up. They will
403 * subsequently wake up anyone else waiting on their RU */
404 if (rcPtr->bufferWaitList) {
405 rbuf->next = rcPtr->committedRbufs;
406 rcPtr->committedRbufs = rbuf;
407 cb = rcPtr->bufferWaitList;
408 rcPtr->bufferWaitList = cb->next;
409 rf_CauseReconEvent(raidPtr, cb->row, cb->col, (void *) 1, RF_REVENT_BUFCLEAR); /* arg==1 => we've
410 * committed a buffer */
411 rf_FreeCallbackDesc(cb);
412 raidPtr->procsInBufWait--;
413 } else {
414 rbuf->next = rcPtr->floatingRbufs;
415 rcPtr->floatingRbufs = rbuf;
416 }
417 }
418 /* release any disk that is waiting on a buffer for the indicated RU.
419 * assumes the rb_mutex is LOCKED at entry
420 */
421 void
422 rf_ReleaseBufferWaiters(raidPtr, pssPtr)
423 RF_Raid_t *raidPtr;
424 RF_ReconParityStripeStatus_t *pssPtr;
425 {
426 RF_CallbackDesc_t *cb1, *cb = pssPtr->bufWaitList;
427
428 Dprintf2("RECON: releasing buf waiters for psid %ld ru %d\n",
429 (long) pssPtr->parityStripeID, pssPtr->which_ru);
430 pssPtr->flags &= ~RF_PSS_BUFFERWAIT;
431 while (cb) {
432 cb1 = cb->next;
433 cb->next = NULL;
434 rf_CauseReconEvent(raidPtr, cb->row, cb->col, (void *) 0, RF_REVENT_BUFCLEAR); /* arg==0 => we haven't
435 * committed a buffer */
436 rf_FreeCallbackDesc(cb);
437 cb = cb1;
438 }
439 pssPtr->bufWaitList = NULL;
440 }
441 /* when reconstruction is forced on an RU, there may be some disks waiting to
442 * acquire a buffer for that RU. Since we allocate a new buffer as part of
443 * the forced-reconstruction process, we no longer have to wait for any
444 * buffers, so we wakeup any waiter that we find in the bufferWaitList
445 *
446 * assumes the rb_mutex is LOCKED at entry
447 */
448 void
449 rf_ReleaseBufferWaiter(rcPtr, rbuf)
450 RF_ReconCtrl_t *rcPtr;
451 RF_ReconBuffer_t *rbuf;
452 {
453 RF_CallbackDesc_t *cb, *cbt;
454
455 for (cbt = NULL, cb = rcPtr->bufferWaitList; cb; cbt = cb, cb = cb->next) {
456 if ((cb->callbackArg.v == rbuf->parityStripeID) && (cb->callbackArg2.v == rbuf->which_ru)) {
457 Dprintf2("RECON: Dropping row %d col %d from buffer wait list\n", cb->row, cb->col);
458 if (cbt)
459 cbt->next = cb->next;
460 else
461 rcPtr->bufferWaitList = cb->next;
462 rf_CauseReconEvent((RF_Raid_t *) rbuf->raidPtr, cb->row, cb->col, (void *) 0, RF_REVENT_BUFREADY); /* arg==0 => no
463 * committed buffer */
464 rf_FreeCallbackDesc(cb);
465 return;
466 }
467 }
468 }
469