rf_reconbuffer.c revision 1.2 1 /* $NetBSD: rf_reconbuffer.c,v 1.2 1999/01/26 02:34:01 oster Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: Mark Holland
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29 /***************************************************
30 *
31 * rf_reconbuffer.c -- reconstruction buffer manager
32 *
33 ***************************************************/
34
35 #include "rf_raid.h"
36 #include "rf_reconbuffer.h"
37 #include "rf_acctrace.h"
38 #include "rf_etimer.h"
39 #include "rf_general.h"
40 #include "rf_debugprint.h"
41 #include "rf_revent.h"
42 #include "rf_reconutil.h"
43 #include "rf_nwayxor.h"
44
45 #define Dprintf1(s,a) if (rf_reconbufferDebug) printf(s,a)
46 #define Dprintf2(s,a,b) if (rf_reconbufferDebug) printf(s,a,b)
47 #define Dprintf3(s,a,b,c) if (rf_reconbufferDebug) printf(s,a,b,c)
48 #define Dprintf4(s,a,b,c,d) if (rf_reconbufferDebug) printf(s,a,b,c,d)
49 #define Dprintf5(s,a,b,c,d,e) if (rf_reconbufferDebug) printf(s,a,b,c,d,e)
50
51 /*****************************************************************************************
52 *
53 * Submit a reconstruction buffer to the manager for XOR.
54 * We can only submit a buffer if (1) we can xor into an existing buffer, which means
55 * we don't have to acquire a new one, (2) we can acquire a floating
56 * recon buffer, or (3) the caller has indicated that we are allowed to keep the
57 * submitted buffer.
58 *
59 * Returns non-zero if and only if we were not able to submit.
60 * In this case, we append the current disk ID to the wait list on the indicated
61 * RU, so that it will be re-enabled when we acquire a buffer for this RU.
62 *
63 ****************************************************************************************/
64
65 /* just to make the code below more readable */
66 #define BUFWAIT_APPEND(_cb_, _pssPtr_, _row_, _col_) \
67 _cb_ = rf_AllocCallbackDesc(); \
68 (_cb_)->row = (_row_); (_cb_)->col = (_col_); (_cb_)->next = (_pssPtr_)->bufWaitList; (_pssPtr_)->bufWaitList = (_cb_);
69
70 /*
71 * nWayXorFuncs[i] is a pointer to a function that will xor "i"
72 * bufs into the accumulating sum.
73 */
74 static RF_VoidFuncPtr nWayXorFuncs[] = {
75 NULL,
76 (RF_VoidFuncPtr)rf_nWayXor1,
77 (RF_VoidFuncPtr)rf_nWayXor2,
78 (RF_VoidFuncPtr)rf_nWayXor3,
79 (RF_VoidFuncPtr)rf_nWayXor4,
80 (RF_VoidFuncPtr)rf_nWayXor5,
81 (RF_VoidFuncPtr)rf_nWayXor6,
82 (RF_VoidFuncPtr)rf_nWayXor7,
83 (RF_VoidFuncPtr)rf_nWayXor8,
84 (RF_VoidFuncPtr)rf_nWayXor9
85 };
86
87 int rf_SubmitReconBuffer(rbuf, keep_it, use_committed)
88 RF_ReconBuffer_t *rbuf; /* the recon buffer to submit */
89 int keep_it; /* whether we can keep this buffer or we have to return it */
90 int use_committed; /* whether to use a committed or an available recon buffer */
91 {
92 RF_LayoutSW_t *lp;
93 int rc;
94
95 lp = rbuf->raidPtr->Layout.map;
96 rc = lp->SubmitReconBuffer(rbuf, keep_it, use_committed);
97 return(rc);
98 }
99
100 int rf_SubmitReconBufferBasic(rbuf, keep_it, use_committed)
101 RF_ReconBuffer_t *rbuf; /* the recon buffer to submit */
102 int keep_it; /* whether we can keep this buffer or we have to return it */
103 int use_committed; /* whether to use a committed or an available recon buffer */
104 {
105 RF_Raid_t *raidPtr = rbuf->raidPtr;
106 RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
107 RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[rbuf->row];
108 RF_ReconParityStripeStatus_t *pssPtr;
109 RF_ReconBuffer_t *targetRbuf, *t = NULL; /* temporary rbuf pointers */
110 caddr_t ta; /* temporary data buffer pointer */
111 RF_CallbackDesc_t *cb, *p;
112 int retcode = 0, created = 0;
113
114 RF_Etimer_t timer;
115
116 /* makes no sense to have a submission from the failed disk */
117 RF_ASSERT(rbuf);
118 RF_ASSERT(rbuf->col != reconCtrlPtr->fcol);
119
120 Dprintf5("RECON: submission by row %d col %d for psid %ld ru %d (failed offset %ld)\n",
121 rbuf->row, rbuf->col, (long)rbuf->parityStripeID, rbuf->which_ru, (long)rbuf->failedDiskSectorOffset);
122
123 RF_LOCK_PSS_MUTEX(raidPtr,rbuf->row,rbuf->parityStripeID);
124
125 RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex);
126
127 pssPtr = rf_LookupRUStatus(raidPtr, reconCtrlPtr->pssTable, rbuf->parityStripeID, rbuf->which_ru, RF_PSS_NONE, &created);
128 RF_ASSERT(pssPtr); /* if it didn't exist, we wouldn't have gotten an rbuf for it */
129
130 /* check to see if enough buffers have accumulated to do an XOR. If so, there's no need to
131 * acquire a floating rbuf. Before we can do any XORing, we must have acquired a destination
132 * buffer. If we have, then we can go ahead and do the XOR if (1) including this buffer, enough
133 * bufs have accumulated, or (2) this is the last submission for this stripe.
134 * Otherwise, we have to go acquire a floating rbuf.
135 */
136
137 targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
138 if ( (targetRbuf != NULL) &&
139 ((pssPtr->xorBufCount == rf_numBufsToAccumulate-1) || (targetRbuf->count + pssPtr->xorBufCount + 1 == layoutPtr->numDataCol)) ) {
140 pssPtr->rbufsForXor[ pssPtr->xorBufCount++ ] = rbuf; /* install this buffer */
141 Dprintf3("RECON: row %d col %d invoking a %d-way XOR\n",rbuf->row, rbuf->col,pssPtr->xorBufCount);
142 RF_ETIMER_START(timer);
143 rf_MultiWayReconXor(raidPtr, pssPtr);
144 RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer);
145 raidPtr->accumXorTimeUs += RF_ETIMER_VAL_US(timer);
146 if (!keep_it) {
147 raidPtr->recon_tracerecs[rbuf->col].xor_us = RF_ETIMER_VAL_US(timer);
148 RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
149 RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
150 raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us +=
151 RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
152 RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
153
154 rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]);
155 }
156 rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol);
157
158 /* if use_committed is on, we _must_ consume a buffer off the committed list. */
159 if (use_committed) {
160 t = reconCtrlPtr->committedRbufs;
161 RF_ASSERT(t);
162 reconCtrlPtr->committedRbufs = t->next;
163 rf_ReleaseFloatingReconBuffer(raidPtr, rbuf->row, t);
164 }
165 if (keep_it) {
166 RF_UNLOCK_PSS_MUTEX( raidPtr,rbuf->row,rbuf->parityStripeID);
167 RF_UNLOCK_MUTEX( reconCtrlPtr->rb_mutex );
168 rf_FreeReconBuffer(rbuf);
169 return(retcode);
170 }
171 goto out;
172 }
173
174 /* set the value of "t", which we'll use as the rbuf from here on */
175 if (keep_it) {
176 t = rbuf;
177 }
178 else {
179 if (use_committed) { /* if a buffer has been committed to us, use it */
180 t = reconCtrlPtr->committedRbufs;
181 RF_ASSERT(t);
182 reconCtrlPtr->committedRbufs = t->next;
183 t->next = NULL;
184 } else if (reconCtrlPtr->floatingRbufs) {
185 t = reconCtrlPtr->floatingRbufs;
186 reconCtrlPtr->floatingRbufs = t->next;
187 t->next = NULL;
188 }
189 }
190
191 /* If we weren't able to acquire a buffer,
192 * append to the end of the buf list in the recon ctrl struct.
193 */
194 if (!t) {
195 RF_ASSERT(!keep_it && !use_committed);
196 Dprintf2("RECON: row %d col %d failed to acquire floating rbuf\n",rbuf->row, rbuf->col);
197
198 raidPtr->procsInBufWait++;
199 if ( (raidPtr->procsInBufWait == raidPtr->numCol -1) && (raidPtr->numFullReconBuffers == 0)) {
200 printf("Buffer wait deadlock detected. Exiting.\n");
201 rf_PrintPSStatusTable(raidPtr, rbuf->row);
202 RF_PANIC();
203 }
204 pssPtr->flags |= RF_PSS_BUFFERWAIT;
205 cb = rf_AllocCallbackDesc(); /* append to buf wait list in recon ctrl structure */
206 cb->row = rbuf->row; cb->col = rbuf->col;
207 cb->callbackArg.v = rbuf->parityStripeID;
208 cb->callbackArg2.v = rbuf->which_ru;
209 cb->next = NULL;
210 if (!reconCtrlPtr->bufferWaitList) reconCtrlPtr->bufferWaitList = cb;
211 else { /* might want to maintain head/tail pointers here rather than search for end of list */
212 for (p = reconCtrlPtr->bufferWaitList; p->next; p=p->next);
213 p->next = cb;
214 }
215 retcode = 1;
216 goto out;
217 }
218 Dprintf2("RECON: row %d col %d acquired rbuf\n",rbuf->row, rbuf->col);
219 RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
220 RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
221 raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us +=
222 RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
223 RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
224
225 rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]);
226
227 /* initialize the buffer */
228 if (t!=rbuf) {
229 t->row = rbuf->row; t->col = reconCtrlPtr->fcol;
230 t->parityStripeID = rbuf->parityStripeID;
231 t->which_ru = rbuf->which_ru;
232 t->failedDiskSectorOffset = rbuf->failedDiskSectorOffset;
233 t->spRow=rbuf->spRow;
234 t->spCol=rbuf->spCol;
235 t->spOffset=rbuf->spOffset;
236
237 ta = t->buffer; t->buffer = rbuf->buffer; rbuf->buffer = ta; /* swap buffers */
238 }
239
240 /* the first installation always gets installed as the destination buffer.
241 * subsequent installations get stacked up to allow for multi-way XOR
242 */
243 if (!pssPtr->rbuf) {pssPtr->rbuf = t; t->count = 1;}
244 else pssPtr->rbufsForXor[ pssPtr->xorBufCount++ ] = t; /* install this buffer */
245
246 rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol); /* the buffer is full if G=2 */
247
248 out:
249 RF_UNLOCK_PSS_MUTEX( raidPtr,rbuf->row,rbuf->parityStripeID);
250 RF_UNLOCK_MUTEX( reconCtrlPtr->rb_mutex );
251 return(retcode);
252 }
253
254 int rf_MultiWayReconXor(raidPtr, pssPtr)
255 RF_Raid_t *raidPtr;
256 RF_ReconParityStripeStatus_t *pssPtr; /* the pss descriptor for this parity stripe */
257 {
258 int i, numBufs = pssPtr->xorBufCount;
259 int numBytes = rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU);
260 RF_ReconBuffer_t **rbufs = (RF_ReconBuffer_t **) pssPtr->rbufsForXor;
261 RF_ReconBuffer_t *targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
262
263 RF_ASSERT(pssPtr->rbuf != NULL);
264 RF_ASSERT(numBufs > 0 && numBufs < RF_PS_MAX_BUFS);
265 #ifdef _KERNEL
266 #ifndef __NetBSD__
267 thread_block(); /* yield the processor before doing a big XOR */
268 #endif
269 #endif /* _KERNEL */
270 /*
271 * XXX
272 *
273 * What if more than 9 bufs?
274 */
275 nWayXorFuncs[numBufs](pssPtr->rbufsForXor, targetRbuf, numBytes/sizeof(long));
276
277 /* release all the reconstruction buffers except the last one, which belongs to the
278 * the disk who's submission caused this XOR to take place
279 */
280 for (i=0; i < numBufs-1; i++) {
281 if (rbufs[i]->type == RF_RBUF_TYPE_FLOATING) rf_ReleaseFloatingReconBuffer(raidPtr, rbufs[i]->row, rbufs[i]);
282 else if (rbufs[i]->type == RF_RBUF_TYPE_FORCED) rf_FreeReconBuffer(rbufs[i]);
283 else RF_ASSERT(0);
284 }
285 targetRbuf->count += pssPtr->xorBufCount;
286 pssPtr->xorBufCount = 0;
287 return(0);
288 }
289
290 /* removes one full buffer from one of the full-buffer lists and returns it.
291 *
292 * ASSUMES THE RB_MUTEX IS UNLOCKED AT ENTRY.
293 */
294 RF_ReconBuffer_t *rf_GetFullReconBuffer(reconCtrlPtr)
295 RF_ReconCtrl_t *reconCtrlPtr;
296 {
297 RF_ReconBuffer_t *p;
298
299 RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex);
300
301 if ( (p=reconCtrlPtr->priorityList) != NULL) {
302 reconCtrlPtr->priorityList = p->next;
303 p->next = NULL;
304 goto out;
305 }
306 if ( (p=reconCtrlPtr->fullBufferList) != NULL) {
307 reconCtrlPtr->fullBufferList = p->next;
308 p->next = NULL;
309 goto out;
310 }
311
312 out:
313 RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
314 return(p);
315 }
316
317
318 /* if the reconstruction buffer is full, move it to the full list, which is maintained
319 * sorted by failed disk sector offset
320 *
321 * ASSUMES THE RB_MUTEX IS LOCKED AT ENTRY.
322 */
323 int rf_CheckForFullRbuf(raidPtr, reconCtrl, pssPtr, numDataCol)
324 RF_Raid_t *raidPtr;
325 RF_ReconCtrl_t *reconCtrl;
326 RF_ReconParityStripeStatus_t *pssPtr;
327 int numDataCol;
328 {
329 RF_ReconBuffer_t *p, *pt, *rbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
330
331 if (rbuf->count == numDataCol) {
332 raidPtr->numFullReconBuffers++;
333 Dprintf2("RECON: rbuf for psid %ld ru %d has filled\n",
334 (long)rbuf->parityStripeID, rbuf->which_ru);
335 if (!reconCtrl->fullBufferList || (rbuf->failedDiskSectorOffset < reconCtrl->fullBufferList->failedDiskSectorOffset)) {
336 Dprintf2("RECON: rbuf for psid %ld ru %d is head of list\n",
337 (long)rbuf->parityStripeID, rbuf->which_ru);
338 rbuf->next = reconCtrl->fullBufferList;
339 reconCtrl->fullBufferList = rbuf;
340 }
341 else {
342 for (pt = reconCtrl->fullBufferList, p = pt->next; p && p->failedDiskSectorOffset < rbuf->failedDiskSectorOffset; pt=p, p=p->next);
343 rbuf->next = p;
344 pt->next = rbuf;
345 Dprintf2("RECON: rbuf for psid %ld ru %d is in list\n",
346 (long)rbuf->parityStripeID, rbuf->which_ru);
347 }
348 #if 0
349 pssPtr->writeRbuf = pssPtr->rbuf; /* DEBUG ONLY: we like to be able to find this rbuf while it's awaiting write */
350 #else
351 rbuf->pssPtr = pssPtr;
352 #endif
353 pssPtr->rbuf = NULL;
354 rf_CauseReconEvent(raidPtr, rbuf->row, rbuf->col, NULL, RF_REVENT_BUFREADY);
355 }
356 return(0);
357 }
358
359
360 /* release a floating recon buffer for someone else to use.
361 * assumes the rb_mutex is LOCKED at entry
362 */
363 void rf_ReleaseFloatingReconBuffer(raidPtr, row, rbuf)
364 RF_Raid_t *raidPtr;
365 RF_RowCol_t row;
366 RF_ReconBuffer_t *rbuf;
367 {
368 RF_ReconCtrl_t *rcPtr = raidPtr->reconControl[row];
369 RF_CallbackDesc_t *cb;
370
371 Dprintf2("RECON: releasing rbuf for psid %ld ru %d\n",
372 (long)rbuf->parityStripeID, rbuf->which_ru);
373
374 /* if anyone is waiting on buffers, wake one of them up. They will subsequently wake up anyone
375 * else waiting on their RU
376 */
377 if (rcPtr->bufferWaitList) {
378 rbuf->next = rcPtr->committedRbufs;
379 rcPtr->committedRbufs = rbuf;
380 cb = rcPtr->bufferWaitList;
381 rcPtr->bufferWaitList = cb->next;
382 rf_CauseReconEvent(raidPtr, cb->row, cb->col, (void *) 1, RF_REVENT_BUFCLEAR); /* arg==1 => we've committed a buffer */
383 rf_FreeCallbackDesc(cb);
384 raidPtr->procsInBufWait--;
385 } else {
386 rbuf->next = rcPtr->floatingRbufs;
387 rcPtr->floatingRbufs = rbuf;
388 }
389 }
390
391 /* release any disk that is waiting on a buffer for the indicated RU.
392 * assumes the rb_mutex is LOCKED at entry
393 */
394 void rf_ReleaseBufferWaiters(raidPtr, pssPtr)
395 RF_Raid_t *raidPtr;
396 RF_ReconParityStripeStatus_t *pssPtr;
397 {
398 RF_CallbackDesc_t *cb1, *cb = pssPtr->bufWaitList;
399
400 Dprintf2("RECON: releasing buf waiters for psid %ld ru %d\n",
401 (long)pssPtr->parityStripeID, pssPtr->which_ru);
402 pssPtr->flags &= ~RF_PSS_BUFFERWAIT;
403 while (cb) {
404 cb1 = cb->next;
405 cb->next = NULL;
406 rf_CauseReconEvent(raidPtr, cb->row, cb->col, (void *) 0, RF_REVENT_BUFCLEAR); /* arg==0 => we haven't committed a buffer */
407 rf_FreeCallbackDesc(cb);
408 cb = cb1;
409 }
410 pssPtr->bufWaitList = NULL;
411 }
412
413 /* when reconstruction is forced on an RU, there may be some disks waiting to
414 * acquire a buffer for that RU. Since we allocate a new buffer as part of
415 * the forced-reconstruction process, we no longer have to wait for any
416 * buffers, so we wakeup any waiter that we find in the bufferWaitList
417 *
418 * assumes the rb_mutex is LOCKED at entry
419 */
420 void rf_ReleaseBufferWaiter(rcPtr, rbuf)
421 RF_ReconCtrl_t *rcPtr;
422 RF_ReconBuffer_t *rbuf;
423 {
424 RF_CallbackDesc_t *cb, *cbt;
425
426 for (cbt = NULL, cb = rcPtr->bufferWaitList; cb; cbt = cb, cb=cb->next) {
427 if ( (cb->callbackArg.v == rbuf->parityStripeID) && ( cb->callbackArg2.v == rbuf->which_ru)) {
428 Dprintf2("RECON: Dropping row %d col %d from buffer wait list\n", cb->row, cb->col);
429 if (cbt) cbt->next = cb->next;
430 else rcPtr->bufferWaitList = cb->next;
431 rf_CauseReconEvent((RF_Raid_t *) rbuf->raidPtr, cb->row, cb->col, (void *) 0, RF_REVENT_BUFREADY); /* arg==0 => no committed buffer */
432 rf_FreeCallbackDesc(cb);
433 return;
434 }
435 }
436 }
437