rf_reconbuffer.c revision 1.5.2.4 1 /* $NetBSD: rf_reconbuffer.c,v 1.5.2.4 2002/09/17 21:20:56 nathanw Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: Mark Holland
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29 /***************************************************
30 *
31 * rf_reconbuffer.c -- reconstruction buffer manager
32 *
33 ***************************************************/
34
35 #include <sys/cdefs.h>
36 __KERNEL_RCSID(0, "$NetBSD: rf_reconbuffer.c,v 1.5.2.4 2002/09/17 21:20:56 nathanw Exp $");
37
38 #include "rf_raid.h"
39 #include "rf_reconbuffer.h"
40 #include "rf_acctrace.h"
41 #include "rf_etimer.h"
42 #include "rf_general.h"
43 #include "rf_revent.h"
44 #include "rf_reconutil.h"
45 #include "rf_nwayxor.h"
46
47 #ifdef DEBUG
48
49 #define Dprintf1(s,a) if (rf_reconbufferDebug) printf(s,a)
50 #define Dprintf2(s,a,b) if (rf_reconbufferDebug) printf(s,a,b)
51 #define Dprintf3(s,a,b,c) if (rf_reconbufferDebug) printf(s,a,b,c)
52 #define Dprintf4(s,a,b,c,d) if (rf_reconbufferDebug) printf(s,a,b,c,d)
53 #define Dprintf5(s,a,b,c,d,e) if (rf_reconbufferDebug) printf(s,a,b,c,d,e)
54
55 #else /* DEBUG */
56
57 #define Dprintf1(s,a) {}
58 #define Dprintf2(s,a,b) {}
59 #define Dprintf3(s,a,b,c) {}
60 #define Dprintf4(s,a,b,c,d) {}
61 #define Dprintf5(s,a,b,c,d,e) {}
62
63 #endif
64
65 /*****************************************************************************
66 *
67 * Submit a reconstruction buffer to the manager for XOR. We can only
68 * submit a buffer if (1) we can xor into an existing buffer, which
69 * means we don't have to acquire a new one, (2) we can acquire a
70 * floating recon buffer, or (3) the caller has indicated that we are
71 * allowed to keep the submitted buffer.
72 *
73 * Returns non-zero if and only if we were not able to submit.
74 * In this case, we append the current disk ID to the wait list on the
75 * indicated RU, so that it will be re-enabled when we acquire a buffer
76 * for this RU.
77 *
78 ****************************************************************************/
79
80 /*
81 * nWayXorFuncs[i] is a pointer to a function that will xor "i"
82 * bufs into the accumulating sum.
83 */
84 static RF_VoidFuncPtr nWayXorFuncs[] = {
85 NULL,
86 (RF_VoidFuncPtr) rf_nWayXor1,
87 (RF_VoidFuncPtr) rf_nWayXor2,
88 (RF_VoidFuncPtr) rf_nWayXor3,
89 (RF_VoidFuncPtr) rf_nWayXor4,
90 (RF_VoidFuncPtr) rf_nWayXor5,
91 (RF_VoidFuncPtr) rf_nWayXor6,
92 (RF_VoidFuncPtr) rf_nWayXor7,
93 (RF_VoidFuncPtr) rf_nWayXor8,
94 (RF_VoidFuncPtr) rf_nWayXor9
95 };
96
97 int
98 rf_SubmitReconBuffer(rbuf, keep_it, use_committed)
99 RF_ReconBuffer_t *rbuf; /* the recon buffer to submit */
100 int keep_it; /* whether we can keep this buffer or we have
101 * to return it */
102 int use_committed; /* whether to use a committed or an available
103 * recon buffer */
104 {
105 RF_LayoutSW_t *lp;
106 int rc;
107
108 lp = rbuf->raidPtr->Layout.map;
109 rc = lp->SubmitReconBuffer(rbuf, keep_it, use_committed);
110 return (rc);
111 }
112
113 int
114 rf_SubmitReconBufferBasic(rbuf, keep_it, use_committed)
115 RF_ReconBuffer_t *rbuf; /* the recon buffer to submit */
116 int keep_it; /* whether we can keep this buffer or we have
117 * to return it */
118 int use_committed; /* whether to use a committed or an available
119 * recon buffer */
120 {
121 RF_Raid_t *raidPtr = rbuf->raidPtr;
122 RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
123 RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[rbuf->row];
124 RF_ReconParityStripeStatus_t *pssPtr;
125 RF_ReconBuffer_t *targetRbuf, *t = NULL; /* temporary rbuf
126 * pointers */
127 caddr_t ta; /* temporary data buffer pointer */
128 RF_CallbackDesc_t *cb, *p;
129 int retcode = 0, created = 0;
130
131 RF_Etimer_t timer;
132
133 /* makes no sense to have a submission from the failed disk */
134 RF_ASSERT(rbuf);
135 RF_ASSERT(rbuf->col != reconCtrlPtr->fcol);
136
137 Dprintf5("RECON: submission by row %d col %d for psid %ld ru %d (failed offset %ld)\n",
138 rbuf->row, rbuf->col, (long) rbuf->parityStripeID, rbuf->which_ru, (long) rbuf->failedDiskSectorOffset);
139
140 RF_LOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID);
141
142 RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex);
143
144 pssPtr = rf_LookupRUStatus(raidPtr, reconCtrlPtr->pssTable, rbuf->parityStripeID, rbuf->which_ru, RF_PSS_NONE, &created);
145 RF_ASSERT(pssPtr); /* if it didn't exist, we wouldn't have gotten
146 * an rbuf for it */
147
148 /* check to see if enough buffers have accumulated to do an XOR. If
149 * so, there's no need to acquire a floating rbuf. Before we can do
150 * any XORing, we must have acquired a destination buffer. If we
151 * have, then we can go ahead and do the XOR if (1) including this
152 * buffer, enough bufs have accumulated, or (2) this is the last
153 * submission for this stripe. Otherwise, we have to go acquire a
154 * floating rbuf. */
155
156 targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
157 if ((targetRbuf != NULL) &&
158 ((pssPtr->xorBufCount == rf_numBufsToAccumulate - 1) || (targetRbuf->count + pssPtr->xorBufCount + 1 == layoutPtr->numDataCol))) {
159 pssPtr->rbufsForXor[pssPtr->xorBufCount++] = rbuf; /* install this buffer */
160 Dprintf3("RECON: row %d col %d invoking a %d-way XOR\n", rbuf->row, rbuf->col, pssPtr->xorBufCount);
161 RF_ETIMER_START(timer);
162 rf_MultiWayReconXor(raidPtr, pssPtr);
163 RF_ETIMER_STOP(timer);
164 RF_ETIMER_EVAL(timer);
165 raidPtr->accumXorTimeUs += RF_ETIMER_VAL_US(timer);
166 if (!keep_it) {
167 raidPtr->recon_tracerecs[rbuf->col].xor_us = RF_ETIMER_VAL_US(timer);
168 RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
169 RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
170 raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us +=
171 RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
172 RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
173
174 rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]);
175 }
176 rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol);
177
178 /* if use_committed is on, we _must_ consume a buffer off the
179 * committed list. */
180 if (use_committed) {
181 t = reconCtrlPtr->committedRbufs;
182 RF_ASSERT(t);
183 reconCtrlPtr->committedRbufs = t->next;
184 rf_ReleaseFloatingReconBuffer(raidPtr, rbuf->row, t);
185 }
186 if (keep_it) {
187 RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID);
188 RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
189 rf_FreeReconBuffer(rbuf);
190 return (retcode);
191 }
192 goto out;
193 }
194 /* set the value of "t", which we'll use as the rbuf from here on */
195 if (keep_it) {
196 t = rbuf;
197 } else {
198 if (use_committed) { /* if a buffer has been committed to
199 * us, use it */
200 t = reconCtrlPtr->committedRbufs;
201 RF_ASSERT(t);
202 reconCtrlPtr->committedRbufs = t->next;
203 t->next = NULL;
204 } else
205 if (reconCtrlPtr->floatingRbufs) {
206 t = reconCtrlPtr->floatingRbufs;
207 reconCtrlPtr->floatingRbufs = t->next;
208 t->next = NULL;
209 }
210 }
211
212 /* If we weren't able to acquire a buffer, append to the end of the
213 * buf list in the recon ctrl struct. */
214 if (!t) {
215 RF_ASSERT(!keep_it && !use_committed);
216 Dprintf2("RECON: row %d col %d failed to acquire floating rbuf\n", rbuf->row, rbuf->col);
217
218 raidPtr->procsInBufWait++;
219 if ((raidPtr->procsInBufWait == raidPtr->numCol - 1) && (raidPtr->numFullReconBuffers == 0)) {
220 printf("Buffer wait deadlock detected. Exiting.\n");
221 rf_PrintPSStatusTable(raidPtr, rbuf->row);
222 RF_PANIC();
223 }
224 pssPtr->flags |= RF_PSS_BUFFERWAIT;
225 cb = rf_AllocCallbackDesc(); /* append to buf wait list in
226 * recon ctrl structure */
227 cb->row = rbuf->row;
228 cb->col = rbuf->col;
229 cb->callbackArg.v = rbuf->parityStripeID;
230 cb->callbackArg2.v = rbuf->which_ru;
231 cb->next = NULL;
232 if (!reconCtrlPtr->bufferWaitList)
233 reconCtrlPtr->bufferWaitList = cb;
234 else { /* might want to maintain head/tail pointers
235 * here rather than search for end of list */
236 for (p = reconCtrlPtr->bufferWaitList; p->next; p = p->next);
237 p->next = cb;
238 }
239 retcode = 1;
240 goto out;
241 }
242 Dprintf2("RECON: row %d col %d acquired rbuf\n", rbuf->row, rbuf->col);
243 RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
244 RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
245 raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us +=
246 RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
247 RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
248
249 rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]);
250
251 /* initialize the buffer */
252 if (t != rbuf) {
253 t->row = rbuf->row;
254 t->col = reconCtrlPtr->fcol;
255 t->parityStripeID = rbuf->parityStripeID;
256 t->which_ru = rbuf->which_ru;
257 t->failedDiskSectorOffset = rbuf->failedDiskSectorOffset;
258 t->spRow = rbuf->spRow;
259 t->spCol = rbuf->spCol;
260 t->spOffset = rbuf->spOffset;
261
262 ta = t->buffer;
263 t->buffer = rbuf->buffer;
264 rbuf->buffer = ta; /* swap buffers */
265 }
266 /* the first installation always gets installed as the destination
267 * buffer. subsequent installations get stacked up to allow for
268 * multi-way XOR */
269 if (!pssPtr->rbuf) {
270 pssPtr->rbuf = t;
271 t->count = 1;
272 } else
273 pssPtr->rbufsForXor[pssPtr->xorBufCount++] = t; /* install this buffer */
274
275 rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol); /* the buffer is full if
276 * G=2 */
277
278 out:
279 RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID);
280 RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
281 return (retcode);
282 }
283
284 int
285 rf_MultiWayReconXor(raidPtr, pssPtr)
286 RF_Raid_t *raidPtr;
287 RF_ReconParityStripeStatus_t *pssPtr; /* the pss descriptor for this
288 * parity stripe */
289 {
290 int i, numBufs = pssPtr->xorBufCount;
291 int numBytes = rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU);
292 RF_ReconBuffer_t **rbufs = (RF_ReconBuffer_t **) pssPtr->rbufsForXor;
293 RF_ReconBuffer_t *targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
294
295 RF_ASSERT(pssPtr->rbuf != NULL);
296 RF_ASSERT(numBufs > 0 && numBufs < RF_PS_MAX_BUFS);
297 #ifdef _KERNEL
298 #ifndef __NetBSD__
299 thread_block(); /* yield the processor before doing a big XOR */
300 #endif
301 #endif /* _KERNEL */
302 /*
303 * XXX
304 *
305 * What if more than 9 bufs?
306 */
307 nWayXorFuncs[numBufs] (pssPtr->rbufsForXor, targetRbuf, numBytes / sizeof(long));
308
309 /* release all the reconstruction buffers except the last one, which
310 * belongs to the disk whose submission caused this XOR to take place */
311 for (i = 0; i < numBufs - 1; i++) {
312 if (rbufs[i]->type == RF_RBUF_TYPE_FLOATING)
313 rf_ReleaseFloatingReconBuffer(raidPtr, rbufs[i]->row, rbufs[i]);
314 else
315 if (rbufs[i]->type == RF_RBUF_TYPE_FORCED)
316 rf_FreeReconBuffer(rbufs[i]);
317 else
318 RF_ASSERT(0);
319 }
320 targetRbuf->count += pssPtr->xorBufCount;
321 pssPtr->xorBufCount = 0;
322 return (0);
323 }
324 /* removes one full buffer from one of the full-buffer lists and returns it.
325 *
326 * ASSUMES THE RB_MUTEX IS UNLOCKED AT ENTRY.
327 */
328 RF_ReconBuffer_t *
329 rf_GetFullReconBuffer(reconCtrlPtr)
330 RF_ReconCtrl_t *reconCtrlPtr;
331 {
332 RF_ReconBuffer_t *p;
333
334 RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex);
335
336 if ((p = reconCtrlPtr->priorityList) != NULL) {
337 reconCtrlPtr->priorityList = p->next;
338 p->next = NULL;
339 goto out;
340 }
341 if ((p = reconCtrlPtr->fullBufferList) != NULL) {
342 reconCtrlPtr->fullBufferList = p->next;
343 p->next = NULL;
344 goto out;
345 }
346 out:
347 RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
348 return (p);
349 }
350
351
352 /* if the reconstruction buffer is full, move it to the full list,
353 * which is maintained sorted by failed disk sector offset
354 *
355 * ASSUMES THE RB_MUTEX IS LOCKED AT ENTRY. */
356 int
357 rf_CheckForFullRbuf(raidPtr, reconCtrl, pssPtr, numDataCol)
358 RF_Raid_t *raidPtr;
359 RF_ReconCtrl_t *reconCtrl;
360 RF_ReconParityStripeStatus_t *pssPtr;
361 int numDataCol;
362 {
363 RF_ReconBuffer_t *p, *pt, *rbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
364
365 if (rbuf->count == numDataCol) {
366 raidPtr->numFullReconBuffers++;
367 Dprintf2("RECON: rbuf for psid %ld ru %d has filled\n",
368 (long) rbuf->parityStripeID, rbuf->which_ru);
369 if (!reconCtrl->fullBufferList || (rbuf->failedDiskSectorOffset < reconCtrl->fullBufferList->failedDiskSectorOffset)) {
370 Dprintf2("RECON: rbuf for psid %ld ru %d is head of list\n",
371 (long) rbuf->parityStripeID, rbuf->which_ru);
372 rbuf->next = reconCtrl->fullBufferList;
373 reconCtrl->fullBufferList = rbuf;
374 } else {
375 for (pt = reconCtrl->fullBufferList, p = pt->next; p && p->failedDiskSectorOffset < rbuf->failedDiskSectorOffset; pt = p, p = p->next);
376 rbuf->next = p;
377 pt->next = rbuf;
378 Dprintf2("RECON: rbuf for psid %ld ru %d is in list\n",
379 (long) rbuf->parityStripeID, rbuf->which_ru);
380 }
381 #if 0
382 pssPtr->writeRbuf = pssPtr->rbuf; /* DEBUG ONLY: we like
383 * to be able to find
384 * this rbuf while it's
385 * awaiting write */
386 #else
387 rbuf->pssPtr = pssPtr;
388 #endif
389 pssPtr->rbuf = NULL;
390 rf_CauseReconEvent(raidPtr, rbuf->row, rbuf->col, NULL, RF_REVENT_BUFREADY);
391 }
392 return (0);
393 }
394
395
396 /* release a floating recon buffer for someone else to use.
397 * assumes the rb_mutex is LOCKED at entry
398 */
399 void
400 rf_ReleaseFloatingReconBuffer(raidPtr, row, rbuf)
401 RF_Raid_t *raidPtr;
402 RF_RowCol_t row;
403 RF_ReconBuffer_t *rbuf;
404 {
405 RF_ReconCtrl_t *rcPtr = raidPtr->reconControl[row];
406 RF_CallbackDesc_t *cb;
407
408 Dprintf2("RECON: releasing rbuf for psid %ld ru %d\n",
409 (long) rbuf->parityStripeID, rbuf->which_ru);
410
411 /* if anyone is waiting on buffers, wake one of them up. They will
412 * subsequently wake up anyone else waiting on their RU */
413 if (rcPtr->bufferWaitList) {
414 rbuf->next = rcPtr->committedRbufs;
415 rcPtr->committedRbufs = rbuf;
416 cb = rcPtr->bufferWaitList;
417 rcPtr->bufferWaitList = cb->next;
418 rf_CauseReconEvent(raidPtr, cb->row, cb->col, (void *) 1, RF_REVENT_BUFCLEAR); /* arg==1 => we've
419 * committed a buffer */
420 rf_FreeCallbackDesc(cb);
421 raidPtr->procsInBufWait--;
422 } else {
423 rbuf->next = rcPtr->floatingRbufs;
424 rcPtr->floatingRbufs = rbuf;
425 }
426 }
427 /* release any disk that is waiting on a buffer for the indicated RU.
428 * assumes the rb_mutex is LOCKED at entry
429 */
430 void
431 rf_ReleaseBufferWaiters(raidPtr, pssPtr)
432 RF_Raid_t *raidPtr;
433 RF_ReconParityStripeStatus_t *pssPtr;
434 {
435 RF_CallbackDesc_t *cb1, *cb = pssPtr->bufWaitList;
436
437 Dprintf2("RECON: releasing buf waiters for psid %ld ru %d\n",
438 (long) pssPtr->parityStripeID, pssPtr->which_ru);
439 pssPtr->flags &= ~RF_PSS_BUFFERWAIT;
440 while (cb) {
441 cb1 = cb->next;
442 cb->next = NULL;
443 rf_CauseReconEvent(raidPtr, cb->row, cb->col, (void *) 0, RF_REVENT_BUFCLEAR); /* arg==0 => we haven't
444 * committed a buffer */
445 rf_FreeCallbackDesc(cb);
446 cb = cb1;
447 }
448 pssPtr->bufWaitList = NULL;
449 }
450 /* when reconstruction is forced on an RU, there may be some disks waiting to
451 * acquire a buffer for that RU. Since we allocate a new buffer as part of
452 * the forced-reconstruction process, we no longer have to wait for any
453 * buffers, so we wakeup any waiter that we find in the bufferWaitList
454 *
455 * assumes the rb_mutex is LOCKED at entry
456 */
457 void
458 rf_ReleaseBufferWaiter(rcPtr, rbuf)
459 RF_ReconCtrl_t *rcPtr;
460 RF_ReconBuffer_t *rbuf;
461 {
462 RF_CallbackDesc_t *cb, *cbt;
463
464 for (cbt = NULL, cb = rcPtr->bufferWaitList; cb; cbt = cb, cb = cb->next) {
465 if ((cb->callbackArg.v == rbuf->parityStripeID) && (cb->callbackArg2.v == rbuf->which_ru)) {
466 Dprintf2("RECON: Dropping row %d col %d from buffer wait list\n", cb->row, cb->col);
467 if (cbt)
468 cbt->next = cb->next;
469 else
470 rcPtr->bufferWaitList = cb->next;
471 rf_CauseReconEvent((RF_Raid_t *) rbuf->raidPtr, cb->row, cb->col, (void *) 0, RF_REVENT_BUFREADY); /* arg==0 => no
472 * committed buffer */
473 rf_FreeCallbackDesc(cb);
474 return;
475 }
476 }
477 }
478