Home | History | Annotate | Line # | Download | only in raidframe
rf_reconbuffer.c revision 1.5
      1 /*	$NetBSD: rf_reconbuffer.c,v 1.5 2001/01/27 20:10:49 oster Exp $	*/
      2 /*
      3  * Copyright (c) 1995 Carnegie-Mellon University.
      4  * All rights reserved.
      5  *
      6  * Author: Mark Holland
      7  *
      8  * Permission to use, copy, modify and distribute this software and
      9  * its documentation is hereby granted, provided that both the copyright
     10  * notice and this permission notice appear in all copies of the
     11  * software, derivative works or modified versions, and any portions
     12  * thereof, and that both notices appear in supporting documentation.
     13  *
     14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  *
     18  * Carnegie Mellon requests users of this software to return to
     19  *
     20  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  *  School of Computer Science
     22  *  Carnegie Mellon University
     23  *  Pittsburgh PA 15213-3890
     24  *
     25  * any improvements or extensions that they make and grant Carnegie the
     26  * rights to redistribute these changes.
     27  */
     28 
     29 /***************************************************
     30  *
     31  * rf_reconbuffer.c -- reconstruction buffer manager
     32  *
     33  ***************************************************/
     34 
     35 #include "rf_raid.h"
     36 #include "rf_reconbuffer.h"
     37 #include "rf_acctrace.h"
     38 #include "rf_etimer.h"
     39 #include "rf_general.h"
     40 #include "rf_debugprint.h"
     41 #include "rf_revent.h"
     42 #include "rf_reconutil.h"
     43 #include "rf_nwayxor.h"
     44 
     45 #define Dprintf1(s,a) if (rf_reconbufferDebug) printf(s,a)
     46 #define Dprintf2(s,a,b) if (rf_reconbufferDebug) printf(s,a,b)
     47 #define Dprintf3(s,a,b,c) if (rf_reconbufferDebug) printf(s,a,b,c)
     48 #define Dprintf4(s,a,b,c,d) if (rf_reconbufferDebug) printf(s,a,b,c,d)
     49 #define Dprintf5(s,a,b,c,d,e) if (rf_reconbufferDebug) printf(s,a,b,c,d,e)
     50 
     51 /*****************************************************************************
     52  *
     53  * Submit a reconstruction buffer to the manager for XOR.  We can only
     54  * submit a buffer if (1) we can xor into an existing buffer, which
     55  * means we don't have to acquire a new one, (2) we can acquire a
     56  * floating recon buffer, or (3) the caller has indicated that we are
     57  * allowed to keep the submitted buffer.
     58  *
     59  * Returns non-zero if and only if we were not able to submit.
     60  * In this case, we append the current disk ID to the wait list on the
     61  * indicated RU, so that it will be re-enabled when we acquire a buffer
     62  * for this RU.
     63  *
     64  ****************************************************************************/
     65 
     66 /*
     67  * nWayXorFuncs[i] is a pointer to a function that will xor "i"
     68  * bufs into the accumulating sum.
     69  */
     70 static RF_VoidFuncPtr nWayXorFuncs[] = {
     71 	NULL,
     72 	(RF_VoidFuncPtr) rf_nWayXor1,
     73 	(RF_VoidFuncPtr) rf_nWayXor2,
     74 	(RF_VoidFuncPtr) rf_nWayXor3,
     75 	(RF_VoidFuncPtr) rf_nWayXor4,
     76 	(RF_VoidFuncPtr) rf_nWayXor5,
     77 	(RF_VoidFuncPtr) rf_nWayXor6,
     78 	(RF_VoidFuncPtr) rf_nWayXor7,
     79 	(RF_VoidFuncPtr) rf_nWayXor8,
     80 	(RF_VoidFuncPtr) rf_nWayXor9
     81 };
     82 
     83 int
     84 rf_SubmitReconBuffer(rbuf, keep_it, use_committed)
     85 	RF_ReconBuffer_t *rbuf;	/* the recon buffer to submit */
     86 	int     keep_it;	/* whether we can keep this buffer or we have
     87 				 * to return it */
     88 	int     use_committed;	/* whether to use a committed or an available
     89 				 * recon buffer */
     90 {
     91 	RF_LayoutSW_t *lp;
     92 	int     rc;
     93 
     94 	lp = rbuf->raidPtr->Layout.map;
     95 	rc = lp->SubmitReconBuffer(rbuf, keep_it, use_committed);
     96 	return (rc);
     97 }
     98 
     99 int
    100 rf_SubmitReconBufferBasic(rbuf, keep_it, use_committed)
    101 	RF_ReconBuffer_t *rbuf;	/* the recon buffer to submit */
    102 	int     keep_it;	/* whether we can keep this buffer or we have
    103 				 * to return it */
    104 	int     use_committed;	/* whether to use a committed or an available
    105 				 * recon buffer */
    106 {
    107 	RF_Raid_t *raidPtr = rbuf->raidPtr;
    108 	RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
    109 	RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[rbuf->row];
    110 	RF_ReconParityStripeStatus_t *pssPtr;
    111 	RF_ReconBuffer_t *targetRbuf, *t = NULL;	/* temporary rbuf
    112 							 * pointers */
    113 	caddr_t ta;		/* temporary data buffer pointer */
    114 	RF_CallbackDesc_t *cb, *p;
    115 	int     retcode = 0, created = 0;
    116 
    117 	RF_Etimer_t timer;
    118 
    119 	/* makes no sense to have a submission from the failed disk */
    120 	RF_ASSERT(rbuf);
    121 	RF_ASSERT(rbuf->col != reconCtrlPtr->fcol);
    122 
    123 	Dprintf5("RECON: submission by row %d col %d for psid %ld ru %d (failed offset %ld)\n",
    124 	    rbuf->row, rbuf->col, (long) rbuf->parityStripeID, rbuf->which_ru, (long) rbuf->failedDiskSectorOffset);
    125 
    126 	RF_LOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID);
    127 
    128 	RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex);
    129 
    130 	pssPtr = rf_LookupRUStatus(raidPtr, reconCtrlPtr->pssTable, rbuf->parityStripeID, rbuf->which_ru, RF_PSS_NONE, &created);
    131 	RF_ASSERT(pssPtr);	/* if it didn't exist, we wouldn't have gotten
    132 				 * an rbuf for it */
    133 
    134 	/* check to see if enough buffers have accumulated to do an XOR.  If
    135 	 * so, there's no need to acquire a floating rbuf.  Before we can do
    136 	 * any XORing, we must have acquired a destination buffer.  If we
    137 	 * have, then we can go ahead and do the XOR if (1) including this
    138 	 * buffer, enough bufs have accumulated, or (2) this is the last
    139 	 * submission for this stripe. Otherwise, we have to go acquire a
    140 	 * floating rbuf. */
    141 
    142 	targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
    143 	if ((targetRbuf != NULL) &&
    144 	    ((pssPtr->xorBufCount == rf_numBufsToAccumulate - 1) || (targetRbuf->count + pssPtr->xorBufCount + 1 == layoutPtr->numDataCol))) {
    145 		pssPtr->rbufsForXor[pssPtr->xorBufCount++] = rbuf;	/* install this buffer */
    146 		Dprintf3("RECON: row %d col %d invoking a %d-way XOR\n", rbuf->row, rbuf->col, pssPtr->xorBufCount);
    147 		RF_ETIMER_START(timer);
    148 		rf_MultiWayReconXor(raidPtr, pssPtr);
    149 		RF_ETIMER_STOP(timer);
    150 		RF_ETIMER_EVAL(timer);
    151 		raidPtr->accumXorTimeUs += RF_ETIMER_VAL_US(timer);
    152 		if (!keep_it) {
    153 			raidPtr->recon_tracerecs[rbuf->col].xor_us = RF_ETIMER_VAL_US(timer);
    154 			RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
    155 			RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
    156 			raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us +=
    157 			    RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
    158 			RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
    159 
    160 			rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]);
    161 		}
    162 		rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol);
    163 
    164 		/* if use_committed is on, we _must_ consume a buffer off the
    165 		 * committed list. */
    166 		if (use_committed) {
    167 			t = reconCtrlPtr->committedRbufs;
    168 			RF_ASSERT(t);
    169 			reconCtrlPtr->committedRbufs = t->next;
    170 			rf_ReleaseFloatingReconBuffer(raidPtr, rbuf->row, t);
    171 		}
    172 		if (keep_it) {
    173 			RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID);
    174 			RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
    175 			rf_FreeReconBuffer(rbuf);
    176 			return (retcode);
    177 		}
    178 		goto out;
    179 	}
    180 	/* set the value of "t", which we'll use as the rbuf from here on */
    181 	if (keep_it) {
    182 		t = rbuf;
    183 	} else {
    184 		if (use_committed) {	/* if a buffer has been committed to
    185 					 * us, use it */
    186 			t = reconCtrlPtr->committedRbufs;
    187 			RF_ASSERT(t);
    188 			reconCtrlPtr->committedRbufs = t->next;
    189 			t->next = NULL;
    190 		} else
    191 			if (reconCtrlPtr->floatingRbufs) {
    192 				t = reconCtrlPtr->floatingRbufs;
    193 				reconCtrlPtr->floatingRbufs = t->next;
    194 				t->next = NULL;
    195 			}
    196 	}
    197 
    198 	/* If we weren't able to acquire a buffer, append to the end of the
    199 	 * buf list in the recon ctrl struct. */
    200 	if (!t) {
    201 		RF_ASSERT(!keep_it && !use_committed);
    202 		Dprintf2("RECON: row %d col %d failed to acquire floating rbuf\n", rbuf->row, rbuf->col);
    203 
    204 		raidPtr->procsInBufWait++;
    205 		if ((raidPtr->procsInBufWait == raidPtr->numCol - 1) && (raidPtr->numFullReconBuffers == 0)) {
    206 			printf("Buffer wait deadlock detected.  Exiting.\n");
    207 			rf_PrintPSStatusTable(raidPtr, rbuf->row);
    208 			RF_PANIC();
    209 		}
    210 		pssPtr->flags |= RF_PSS_BUFFERWAIT;
    211 		cb = rf_AllocCallbackDesc();	/* append to buf wait list in
    212 						 * recon ctrl structure */
    213 		cb->row = rbuf->row;
    214 		cb->col = rbuf->col;
    215 		cb->callbackArg.v = rbuf->parityStripeID;
    216 		cb->callbackArg2.v = rbuf->which_ru;
    217 		cb->next = NULL;
    218 		if (!reconCtrlPtr->bufferWaitList)
    219 			reconCtrlPtr->bufferWaitList = cb;
    220 		else {		/* might want to maintain head/tail pointers
    221 				 * here rather than search for end of list */
    222 			for (p = reconCtrlPtr->bufferWaitList; p->next; p = p->next);
    223 			p->next = cb;
    224 		}
    225 		retcode = 1;
    226 		goto out;
    227 	}
    228 	Dprintf2("RECON: row %d col %d acquired rbuf\n", rbuf->row, rbuf->col);
    229 	RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
    230 	RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
    231 	raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us +=
    232 	    RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
    233 	RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
    234 
    235 	rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]);
    236 
    237 	/* initialize the buffer */
    238 	if (t != rbuf) {
    239 		t->row = rbuf->row;
    240 		t->col = reconCtrlPtr->fcol;
    241 		t->parityStripeID = rbuf->parityStripeID;
    242 		t->which_ru = rbuf->which_ru;
    243 		t->failedDiskSectorOffset = rbuf->failedDiskSectorOffset;
    244 		t->spRow = rbuf->spRow;
    245 		t->spCol = rbuf->spCol;
    246 		t->spOffset = rbuf->spOffset;
    247 
    248 		ta = t->buffer;
    249 		t->buffer = rbuf->buffer;
    250 		rbuf->buffer = ta;	/* swap buffers */
    251 	}
    252 	/* the first installation always gets installed as the destination
    253 	 * buffer. subsequent installations get stacked up to allow for
    254 	 * multi-way XOR */
    255 	if (!pssPtr->rbuf) {
    256 		pssPtr->rbuf = t;
    257 		t->count = 1;
    258 	} else
    259 		pssPtr->rbufsForXor[pssPtr->xorBufCount++] = t;	/* install this buffer */
    260 
    261 	rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol);	/* the buffer is full if
    262 											 * G=2 */
    263 
    264 out:
    265 	RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID);
    266 	RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
    267 	return (retcode);
    268 }
    269 
    270 int
    271 rf_MultiWayReconXor(raidPtr, pssPtr)
    272 	RF_Raid_t *raidPtr;
    273 	RF_ReconParityStripeStatus_t *pssPtr;	/* the pss descriptor for this
    274 						 * parity stripe */
    275 {
    276 	int     i, numBufs = pssPtr->xorBufCount;
    277 	int     numBytes = rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU);
    278 	RF_ReconBuffer_t **rbufs = (RF_ReconBuffer_t **) pssPtr->rbufsForXor;
    279 	RF_ReconBuffer_t *targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
    280 
    281 	RF_ASSERT(pssPtr->rbuf != NULL);
    282 	RF_ASSERT(numBufs > 0 && numBufs < RF_PS_MAX_BUFS);
    283 #ifdef _KERNEL
    284 #ifndef __NetBSD__
    285 	thread_block();		/* yield the processor before doing a big XOR */
    286 #endif
    287 #endif				/* _KERNEL */
    288 	/*
    289          * XXX
    290          *
    291          * What if more than 9 bufs?
    292          */
    293 	nWayXorFuncs[numBufs] (pssPtr->rbufsForXor, targetRbuf, numBytes / sizeof(long));
    294 
    295 	/* release all the reconstruction buffers except the last one, which
    296 	 * belongs to the disk whose submission caused this XOR to take place */
    297 	for (i = 0; i < numBufs - 1; i++) {
    298 		if (rbufs[i]->type == RF_RBUF_TYPE_FLOATING)
    299 			rf_ReleaseFloatingReconBuffer(raidPtr, rbufs[i]->row, rbufs[i]);
    300 		else
    301 			if (rbufs[i]->type == RF_RBUF_TYPE_FORCED)
    302 				rf_FreeReconBuffer(rbufs[i]);
    303 			else
    304 				RF_ASSERT(0);
    305 	}
    306 	targetRbuf->count += pssPtr->xorBufCount;
    307 	pssPtr->xorBufCount = 0;
    308 	return (0);
    309 }
    310 /* removes one full buffer from one of the full-buffer lists and returns it.
    311  *
    312  * ASSUMES THE RB_MUTEX IS UNLOCKED AT ENTRY.
    313  */
    314 RF_ReconBuffer_t *
    315 rf_GetFullReconBuffer(reconCtrlPtr)
    316 	RF_ReconCtrl_t *reconCtrlPtr;
    317 {
    318 	RF_ReconBuffer_t *p;
    319 
    320 	RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex);
    321 
    322 	if ((p = reconCtrlPtr->priorityList) != NULL) {
    323 		reconCtrlPtr->priorityList = p->next;
    324 		p->next = NULL;
    325 		goto out;
    326 	}
    327 	if ((p = reconCtrlPtr->fullBufferList) != NULL) {
    328 		reconCtrlPtr->fullBufferList = p->next;
    329 		p->next = NULL;
    330 		goto out;
    331 	}
    332 out:
    333 	RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
    334 	return (p);
    335 }
    336 
    337 
    338 /* if the reconstruction buffer is full, move it to the full list,
    339  * which is maintained sorted by failed disk sector offset
    340  *
    341  * ASSUMES THE RB_MUTEX IS LOCKED AT ENTRY.  */
    342 int
    343 rf_CheckForFullRbuf(raidPtr, reconCtrl, pssPtr, numDataCol)
    344 	RF_Raid_t *raidPtr;
    345 	RF_ReconCtrl_t *reconCtrl;
    346 	RF_ReconParityStripeStatus_t *pssPtr;
    347 	int     numDataCol;
    348 {
    349 	RF_ReconBuffer_t *p, *pt, *rbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
    350 
    351 	if (rbuf->count == numDataCol) {
    352 		raidPtr->numFullReconBuffers++;
    353 		Dprintf2("RECON: rbuf for psid %ld ru %d has filled\n",
    354 		    (long) rbuf->parityStripeID, rbuf->which_ru);
    355 		if (!reconCtrl->fullBufferList || (rbuf->failedDiskSectorOffset < reconCtrl->fullBufferList->failedDiskSectorOffset)) {
    356 			Dprintf2("RECON: rbuf for psid %ld ru %d is head of list\n",
    357 			    (long) rbuf->parityStripeID, rbuf->which_ru);
    358 			rbuf->next = reconCtrl->fullBufferList;
    359 			reconCtrl->fullBufferList = rbuf;
    360 		} else {
    361 			for (pt = reconCtrl->fullBufferList, p = pt->next; p && p->failedDiskSectorOffset < rbuf->failedDiskSectorOffset; pt = p, p = p->next);
    362 			rbuf->next = p;
    363 			pt->next = rbuf;
    364 			Dprintf2("RECON: rbuf for psid %ld ru %d is in list\n",
    365 			    (long) rbuf->parityStripeID, rbuf->which_ru);
    366 		}
    367 #if 0
    368 		pssPtr->writeRbuf = pssPtr->rbuf;	/* DEBUG ONLY:  we like
    369 							 * to be able to find
    370 							 * this rbuf while it's
    371 							 * awaiting write */
    372 #else
    373 		rbuf->pssPtr = pssPtr;
    374 #endif
    375 		pssPtr->rbuf = NULL;
    376 		rf_CauseReconEvent(raidPtr, rbuf->row, rbuf->col, NULL, RF_REVENT_BUFREADY);
    377 	}
    378 	return (0);
    379 }
    380 
    381 
    382 /* release a floating recon buffer for someone else to use.
    383  * assumes the rb_mutex is LOCKED at entry
    384  */
    385 void
    386 rf_ReleaseFloatingReconBuffer(raidPtr, row, rbuf)
    387 	RF_Raid_t *raidPtr;
    388 	RF_RowCol_t row;
    389 	RF_ReconBuffer_t *rbuf;
    390 {
    391 	RF_ReconCtrl_t *rcPtr = raidPtr->reconControl[row];
    392 	RF_CallbackDesc_t *cb;
    393 
    394 	Dprintf2("RECON: releasing rbuf for psid %ld ru %d\n",
    395 	    (long) rbuf->parityStripeID, rbuf->which_ru);
    396 
    397 	/* if anyone is waiting on buffers, wake one of them up.  They will
    398 	 * subsequently wake up anyone else waiting on their RU */
    399 	if (rcPtr->bufferWaitList) {
    400 		rbuf->next = rcPtr->committedRbufs;
    401 		rcPtr->committedRbufs = rbuf;
    402 		cb = rcPtr->bufferWaitList;
    403 		rcPtr->bufferWaitList = cb->next;
    404 		rf_CauseReconEvent(raidPtr, cb->row, cb->col, (void *) 1, RF_REVENT_BUFCLEAR);	/* arg==1 => we've
    405 												 * committed a buffer */
    406 		rf_FreeCallbackDesc(cb);
    407 		raidPtr->procsInBufWait--;
    408 	} else {
    409 		rbuf->next = rcPtr->floatingRbufs;
    410 		rcPtr->floatingRbufs = rbuf;
    411 	}
    412 }
    413 /* release any disk that is waiting on a buffer for the indicated RU.
    414  * assumes the rb_mutex is LOCKED at entry
    415  */
    416 void
    417 rf_ReleaseBufferWaiters(raidPtr, pssPtr)
    418 	RF_Raid_t *raidPtr;
    419 	RF_ReconParityStripeStatus_t *pssPtr;
    420 {
    421 	RF_CallbackDesc_t *cb1, *cb = pssPtr->bufWaitList;
    422 
    423 	Dprintf2("RECON: releasing buf waiters for psid %ld ru %d\n",
    424 	    (long) pssPtr->parityStripeID, pssPtr->which_ru);
    425 	pssPtr->flags &= ~RF_PSS_BUFFERWAIT;
    426 	while (cb) {
    427 		cb1 = cb->next;
    428 		cb->next = NULL;
    429 		rf_CauseReconEvent(raidPtr, cb->row, cb->col, (void *) 0, RF_REVENT_BUFCLEAR);	/* arg==0 => we haven't
    430 												 * committed a buffer */
    431 		rf_FreeCallbackDesc(cb);
    432 		cb = cb1;
    433 	}
    434 	pssPtr->bufWaitList = NULL;
    435 }
    436 /* when reconstruction is forced on an RU, there may be some disks waiting to
    437  * acquire a buffer for that RU.  Since we allocate a new buffer as part of
    438  * the forced-reconstruction process, we no longer have to wait for any
    439  * buffers, so we wakeup any waiter that we find in the bufferWaitList
    440  *
    441  * assumes the rb_mutex is LOCKED at entry
    442  */
    443 void
    444 rf_ReleaseBufferWaiter(rcPtr, rbuf)
    445 	RF_ReconCtrl_t *rcPtr;
    446 	RF_ReconBuffer_t *rbuf;
    447 {
    448 	RF_CallbackDesc_t *cb, *cbt;
    449 
    450 	for (cbt = NULL, cb = rcPtr->bufferWaitList; cb; cbt = cb, cb = cb->next) {
    451 		if ((cb->callbackArg.v == rbuf->parityStripeID) && (cb->callbackArg2.v == rbuf->which_ru)) {
    452 			Dprintf2("RECON: Dropping row %d col %d from buffer wait list\n", cb->row, cb->col);
    453 			if (cbt)
    454 				cbt->next = cb->next;
    455 			else
    456 				rcPtr->bufferWaitList = cb->next;
    457 			rf_CauseReconEvent((RF_Raid_t *) rbuf->raidPtr, cb->row, cb->col, (void *) 0, RF_REVENT_BUFREADY);	/* arg==0 => no
    458 																 * committed buffer */
    459 			rf_FreeCallbackDesc(cb);
    460 			return;
    461 		}
    462 	}
    463 }
    464