Home | History | Annotate | Line # | Download | only in raidframe
      1 /*	$NetBSD: rf_reconbuffer.c,v 1.27 2021/07/23 00:54:45 oster Exp $	*/
      2 /*
      3  * Copyright (c) 1995 Carnegie-Mellon University.
      4  * All rights reserved.
      5  *
      6  * Author: Mark Holland
      7  *
      8  * Permission to use, copy, modify and distribute this software and
      9  * its documentation is hereby granted, provided that both the copyright
     10  * notice and this permission notice appear in all copies of the
     11  * software, derivative works or modified versions, and any portions
     12  * thereof, and that both notices appear in supporting documentation.
     13  *
     14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  *
     18  * Carnegie Mellon requests users of this software to return to
     19  *
     20  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  *  School of Computer Science
     22  *  Carnegie Mellon University
     23  *  Pittsburgh PA 15213-3890
     24  *
     25  * any improvements or extensions that they make and grant Carnegie the
     26  * rights to redistribute these changes.
     27  */
     28 
     29 /***************************************************
     30  *
     31  * rf_reconbuffer.c -- reconstruction buffer manager
     32  *
     33  ***************************************************/
     34 
     35 #include <sys/cdefs.h>
     36 __KERNEL_RCSID(0, "$NetBSD: rf_reconbuffer.c,v 1.27 2021/07/23 00:54:45 oster Exp $");
     37 
     38 #include "rf_raid.h"
     39 #include "rf_reconbuffer.h"
     40 #include "rf_acctrace.h"
     41 #include "rf_etimer.h"
     42 #include "rf_general.h"
     43 #include "rf_revent.h"
     44 #include "rf_reconutil.h"
     45 #include "rf_nwayxor.h"
     46 
     47 #ifdef DEBUG
     48 
     49 #define Dprintf1(s,a) if (rf_reconbufferDebug) printf(s,a)
     50 #define Dprintf2(s,a,b) if (rf_reconbufferDebug) printf(s,a,b)
     51 #define Dprintf3(s,a,b,c) if (rf_reconbufferDebug) printf(s,a,b,c)
     52 #define Dprintf4(s,a,b,c,d) if (rf_reconbufferDebug) printf(s,a,b,c,d)
     53 #define Dprintf5(s,a,b,c,d,e) if (rf_reconbufferDebug) printf(s,a,b,c,d,e)
     54 
     55 #else /* DEBUG */
     56 
     57 #define Dprintf1(s,a) {}
     58 #define Dprintf2(s,a,b) {}
     59 #define Dprintf3(s,a,b,c) {}
     60 #define Dprintf4(s,a,b,c,d) {}
     61 #define Dprintf5(s,a,b,c,d,e) {}
     62 
     63 #endif
     64 
     65 /*****************************************************************************
     66  *
     67  * Submit a reconstruction buffer to the manager for XOR.  We can only
     68  * submit a buffer if (1) we can xor into an existing buffer, which
     69  * means we don't have to acquire a new one, (2) we can acquire a
     70  * floating recon buffer, or (3) the caller has indicated that we are
     71  * allowed to keep the submitted buffer.
     72  *
     73  * Returns non-zero if and only if we were not able to submit.
     74  * In this case, we append the current disk ID to the wait list on the
     75  * indicated RU, so that it will be re-enabled when we acquire a buffer
     76  * for this RU.
     77  *
     78  ****************************************************************************/
     79 
     80 /*
     81  * nWayXorFuncs[i] is a pointer to a function that will xor "i"
     82  * bufs into the accumulating sum.
     83  */
     84 static const RF_VoidFuncPtr nWayXorFuncs[] = {
     85 	NULL,
     86 	(RF_VoidFuncPtr) rf_nWayXor1,
     87 	(RF_VoidFuncPtr) rf_nWayXor2,
     88 	(RF_VoidFuncPtr) rf_nWayXor3,
     89 	(RF_VoidFuncPtr) rf_nWayXor4,
     90 	(RF_VoidFuncPtr) rf_nWayXor5,
     91 	(RF_VoidFuncPtr) rf_nWayXor6,
     92 	(RF_VoidFuncPtr) rf_nWayXor7,
     93 	(RF_VoidFuncPtr) rf_nWayXor8,
     94 	(RF_VoidFuncPtr) rf_nWayXor9
     95 };
     96 
     97 /*
     98  * rbuf          - the recon buffer to submit
     99  * keep_it       - whether we can keep this buffer or we have to return it
    100  * use_committed - whether to use a committed or an available recon buffer
    101  */
    102 int
    103 rf_SubmitReconBuffer(RF_ReconBuffer_t *rbuf, int keep_it, int use_committed)
    104 {
    105 	const RF_LayoutSW_t *lp;
    106 	int     rc;
    107 
    108 	lp = rbuf->raidPtr->Layout.map;
    109 	rc = lp->SubmitReconBuffer(rbuf, keep_it, use_committed);
    110 	return (rc);
    111 }
    112 
    113 /*
    114  * rbuf          - the recon buffer to submit
    115  * keep_it       - whether we can keep this buffer or we have to return it
    116  * use_committed - whether to use a committed or an available recon buffer
    117  */
    118 int
    119 rf_SubmitReconBufferBasic(RF_ReconBuffer_t *rbuf, int keep_it,
    120 			  int use_committed)
    121 {
    122 	RF_Raid_t *raidPtr = rbuf->raidPtr;
    123 	RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
    124 	RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl;
    125 	RF_ReconParityStripeStatus_t *pssPtr;
    126 	RF_ReconBuffer_t *targetRbuf, *t = NULL;	/* temporary rbuf
    127 							 * pointers */
    128 	void *ta;		/* temporary data buffer pointer */
    129 	RF_CallbackValueDesc_t *cb, *p;
    130 	int     retcode = 0;
    131 
    132 	RF_Etimer_t timer;
    133 
    134 	/* makes no sense to have a submission from the failed disk */
    135 	RF_ASSERT(rbuf);
    136 	RF_ASSERT(rbuf->col != reconCtrlPtr->fcol);
    137 
    138 	Dprintf4("RECON: submission by col %d for psid %ld ru %d (failed offset %ld)\n",
    139 	    rbuf->col, (long) rbuf->parityStripeID, rbuf->which_ru, (long) rbuf->failedDiskSectorOffset);
    140 
    141 	RF_LOCK_PSS_MUTEX(raidPtr, rbuf->parityStripeID);
    142 
    143 	rf_lock_mutex2(reconCtrlPtr->rb_mutex);
    144 	while(reconCtrlPtr->rb_lock) {
    145 		rf_wait_cond2(reconCtrlPtr->rb_cv, reconCtrlPtr->rb_mutex);
    146 	}
    147 	reconCtrlPtr->rb_lock = 1;
    148 	rf_unlock_mutex2(reconCtrlPtr->rb_mutex);
    149 
    150 	pssPtr = rf_LookupRUStatus(raidPtr, reconCtrlPtr->pssTable, rbuf->parityStripeID, rbuf->which_ru, RF_PSS_NONE, NULL);
    151 	RF_ASSERT(pssPtr);	/* if it didn't exist, we wouldn't have gotten
    152 				 * an rbuf for it */
    153 
    154 	/* check to see if enough buffers have accumulated to do an XOR.  If
    155 	 * so, there's no need to acquire a floating rbuf.  Before we can do
    156 	 * any XORing, we must have acquired a destination buffer.  If we
    157 	 * have, then we can go ahead and do the XOR if (1) including this
    158 	 * buffer, enough bufs have accumulated, or (2) this is the last
    159 	 * submission for this stripe. Otherwise, we have to go acquire a
    160 	 * floating rbuf. */
    161 
    162 	targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
    163 	if ((targetRbuf != NULL) &&
    164 	    ((pssPtr->xorBufCount == rf_numBufsToAccumulate - 1) || (targetRbuf->count + pssPtr->xorBufCount + 1 == layoutPtr->numDataCol))) {
    165 		pssPtr->rbufsForXor[pssPtr->xorBufCount++] = rbuf;	/* install this buffer */
    166 		Dprintf2("RECON: col %d invoking a %d-way XOR\n", rbuf->col, pssPtr->xorBufCount);
    167 		RF_ETIMER_START(timer);
    168 		rf_MultiWayReconXor(raidPtr, pssPtr);
    169 		RF_ETIMER_STOP(timer);
    170 		RF_ETIMER_EVAL(timer);
    171 		raidPtr->accumXorTimeUs += RF_ETIMER_VAL_US(timer);
    172 		if (!keep_it) {
    173 #if RF_ACC_TRACE > 0
    174 			raidPtr->recon_tracerecs[rbuf->col].xor_us = RF_ETIMER_VAL_US(timer);
    175 			RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
    176 			RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
    177 			raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us +=
    178 			    RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
    179 			RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
    180 
    181 			rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]);
    182 #endif
    183 		}
    184 		rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol);
    185 
    186 		/* if use_committed is on, we _must_ consume a buffer off the
    187 		 * committed list. */
    188 		if (use_committed) {
    189 			t = reconCtrlPtr->committedRbufs;
    190 			RF_ASSERT(t);
    191 			reconCtrlPtr->committedRbufs = t->next;
    192 			rf_ReleaseFloatingReconBuffer(raidPtr, t);
    193 		}
    194 		if (keep_it) {
    195 			RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->parityStripeID);
    196 			rf_lock_mutex2(reconCtrlPtr->rb_mutex);
    197 			reconCtrlPtr->rb_lock = 0;
    198 			rf_broadcast_cond2(reconCtrlPtr->rb_cv);
    199 			rf_unlock_mutex2(reconCtrlPtr->rb_mutex);
    200 			rf_FreeReconBuffer(rbuf);
    201 			return (retcode);
    202 		}
    203 		goto out;
    204 	}
    205 	/* set the value of "t", which we'll use as the rbuf from here on */
    206 	if (keep_it) {
    207 		t = rbuf;
    208 	} else {
    209 		if (use_committed) {	/* if a buffer has been committed to
    210 					 * us, use it */
    211 			t = reconCtrlPtr->committedRbufs;
    212 			RF_ASSERT(t);
    213 			reconCtrlPtr->committedRbufs = t->next;
    214 			t->next = NULL;
    215 		} else
    216 			if (reconCtrlPtr->floatingRbufs) {
    217 				t = reconCtrlPtr->floatingRbufs;
    218 				reconCtrlPtr->floatingRbufs = t->next;
    219 				t->next = NULL;
    220 			}
    221 	}
    222 
    223 	/* If we weren't able to acquire a buffer, append to the end of the
    224 	 * buf list in the recon ctrl struct. */
    225 	if (!t) {
    226 		RF_ASSERT(!keep_it && !use_committed);
    227 		Dprintf1("RECON: col %d failed to acquire floating rbuf\n", rbuf->col);
    228 
    229 		raidPtr->procsInBufWait++;
    230 		if ((raidPtr->procsInBufWait == raidPtr->numCol - 1) && (raidPtr->numFullReconBuffers == 0)) {
    231 			printf("Buffer wait deadlock detected.  Exiting.\n");
    232 			rf_PrintPSStatusTable(raidPtr);
    233 			RF_PANIC();
    234 		}
    235 		pssPtr->flags |= RF_PSS_BUFFERWAIT;
    236 		cb = rf_AllocCallbackValueDesc(raidPtr); /* append to buf wait list in
    237 							  * recon ctrl structure */
    238 		cb->col = rbuf->col;
    239 		cb->v = rbuf->parityStripeID;
    240 		cb->next = NULL;
    241 		if (!reconCtrlPtr->bufferWaitList)
    242 			reconCtrlPtr->bufferWaitList = cb;
    243 		else {		/* might want to maintain head/tail pointers
    244 				 * here rather than search for end of list */
    245 			for (p = reconCtrlPtr->bufferWaitList; p->next; p = p->next);
    246 			p->next = cb;
    247 		}
    248 		retcode = 1;
    249 		goto out;
    250 	}
    251 	Dprintf1("RECON: col %d acquired rbuf\n", rbuf->col);
    252 #if RF_ACC_TRACE > 0
    253 	RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
    254 	RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
    255 	raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us +=
    256 	    RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
    257 	RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
    258 
    259 	rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]);
    260 #endif
    261 
    262 	/* initialize the buffer */
    263 	if (t != rbuf) {
    264 		t->col = reconCtrlPtr->fcol;
    265 		t->parityStripeID = rbuf->parityStripeID;
    266 		t->which_ru = rbuf->which_ru;
    267 		t->failedDiskSectorOffset = rbuf->failedDiskSectorOffset;
    268 		t->spCol = rbuf->spCol;
    269 		t->spOffset = rbuf->spOffset;
    270 
    271 		ta = t->buffer;
    272 		t->buffer = rbuf->buffer;
    273 		rbuf->buffer = ta;	/* swap buffers */
    274 	}
    275 	/* the first installation always gets installed as the destination
    276 	 * buffer. subsequent installations get stacked up to allow for
    277 	 * multi-way XOR */
    278 	if (!pssPtr->rbuf) {
    279 		pssPtr->rbuf = t;
    280 		t->count = 1;
    281 	} else
    282 		pssPtr->rbufsForXor[pssPtr->xorBufCount++] = t;	/* install this buffer */
    283 
    284 	rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol);	/* the buffer is full if
    285 											 * G=2 */
    286 
    287 out:
    288 	RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->parityStripeID);
    289 	rf_lock_mutex2(reconCtrlPtr->rb_mutex);
    290 	reconCtrlPtr->rb_lock = 0;
    291 	rf_broadcast_cond2(reconCtrlPtr->rb_cv);
    292 	rf_unlock_mutex2(reconCtrlPtr->rb_mutex);
    293 	return (retcode);
    294 }
    295 /* pssPtr - the pss descriptor for this parity stripe */
    296 int
    297 rf_MultiWayReconXor(RF_Raid_t *raidPtr, RF_ReconParityStripeStatus_t *pssPtr)
    298 {
    299 	int     i, numBufs = pssPtr->xorBufCount;
    300 	int     numBytes = rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU);
    301 	RF_ReconBuffer_t **rbufs = (RF_ReconBuffer_t **) pssPtr->rbufsForXor;
    302 	RF_ReconBuffer_t *targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
    303 
    304 	RF_ASSERT(pssPtr->rbuf != NULL);
    305 	RF_ASSERT(numBufs > 0 && numBufs < RF_PS_MAX_BUFS);
    306 #ifdef _KERNEL
    307 #ifndef __NetBSD__
    308 	thread_block();		/* yield the processor before doing a big XOR */
    309 #endif
    310 #endif				/* _KERNEL */
    311 	/*
    312          * XXX
    313          *
    314          * What if more than 9 bufs?
    315          */
    316 	nWayXorFuncs[numBufs] (pssPtr->rbufsForXor, targetRbuf, numBytes / sizeof(long));
    317 
    318 	/* release all the reconstruction buffers except the last one, which
    319 	 * belongs to the disk whose submission caused this XOR to take place */
    320 	for (i = 0; i < numBufs - 1; i++) {
    321 		if (rbufs[i]->type == RF_RBUF_TYPE_FLOATING)
    322 			rf_ReleaseFloatingReconBuffer(raidPtr, rbufs[i]);
    323 		else
    324 			if (rbufs[i]->type == RF_RBUF_TYPE_FORCED)
    325 				rf_FreeReconBuffer(rbufs[i]);
    326 			else
    327 				RF_ASSERT(0);
    328 	}
    329 	targetRbuf->count += pssPtr->xorBufCount;
    330 	pssPtr->xorBufCount = 0;
    331 	return (0);
    332 }
    333 /* removes one full buffer from one of the full-buffer lists and returns it.
    334  *
    335  * ASSUMES THE RB_MUTEX IS UNLOCKED AT ENTRY.
    336  */
    337 RF_ReconBuffer_t *
    338 rf_GetFullReconBuffer(RF_ReconCtrl_t *reconCtrlPtr)
    339 {
    340 	RF_ReconBuffer_t *p;
    341 
    342 	rf_lock_mutex2(reconCtrlPtr->rb_mutex);
    343 	while(reconCtrlPtr->rb_lock) {
    344 		rf_wait_cond2(reconCtrlPtr->rb_cv, reconCtrlPtr->rb_mutex);
    345 	}
    346 	reconCtrlPtr->rb_lock = 1;
    347 	rf_unlock_mutex2(reconCtrlPtr->rb_mutex);
    348 
    349 	if ((p = reconCtrlPtr->fullBufferList) != NULL) {
    350 		reconCtrlPtr->fullBufferList = p->next;
    351 		p->next = NULL;
    352 	}
    353 	rf_lock_mutex2(reconCtrlPtr->rb_mutex);
    354 	reconCtrlPtr->rb_lock = 0;
    355 	rf_broadcast_cond2(reconCtrlPtr->rb_cv);
    356 	rf_unlock_mutex2(reconCtrlPtr->rb_mutex);
    357 	return (p);
    358 }
    359 
    360 
    361 /* if the reconstruction buffer is full, move it to the full list,
    362  * which is maintained sorted by failed disk sector offset
    363  *
    364  * ASSUMES THE RB_MUTEX IS LOCKED AT ENTRY.  */
    365 int
    366 rf_CheckForFullRbuf(RF_Raid_t *raidPtr, RF_ReconCtrl_t *reconCtrl,
    367 		    RF_ReconParityStripeStatus_t *pssPtr, int numDataCol)
    368 {
    369 	RF_ReconBuffer_t *p, *pt, *rbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
    370 
    371 	if (rbuf->count == numDataCol) {
    372 		raidPtr->numFullReconBuffers++;
    373 		Dprintf2("RECON: rbuf for psid %ld ru %d has filled\n",
    374 		    (long) rbuf->parityStripeID, rbuf->which_ru);
    375 		if (!reconCtrl->fullBufferList || (rbuf->failedDiskSectorOffset < reconCtrl->fullBufferList->failedDiskSectorOffset)) {
    376 			Dprintf2("RECON: rbuf for psid %ld ru %d is head of list\n",
    377 			    (long) rbuf->parityStripeID, rbuf->which_ru);
    378 			rbuf->next = reconCtrl->fullBufferList;
    379 			reconCtrl->fullBufferList = rbuf;
    380 		} else {
    381 			for (pt = reconCtrl->fullBufferList, p = pt->next; p && p->failedDiskSectorOffset < rbuf->failedDiskSectorOffset; pt = p, p = p->next);
    382 			rbuf->next = p;
    383 			pt->next = rbuf;
    384 			Dprintf2("RECON: rbuf for psid %ld ru %d is in list\n",
    385 			    (long) rbuf->parityStripeID, rbuf->which_ru);
    386 		}
    387 		rbuf->pssPtr = pssPtr;
    388 		pssPtr->rbuf = NULL;
    389 		rf_CauseReconEvent(raidPtr, rbuf->col, NULL, RF_REVENT_BUFREADY);
    390 	}
    391 	return (0);
    392 }
    393 
    394 
    395 /* release a floating recon buffer for someone else to use.
    396  * assumes the rb_mutex is LOCKED at entry
    397  */
    398 void
    399 rf_ReleaseFloatingReconBuffer(RF_Raid_t *raidPtr, RF_ReconBuffer_t *rbuf)
    400 {
    401 	RF_ReconCtrl_t *rcPtr = raidPtr->reconControl;
    402 	RF_CallbackValueDesc_t *cb;
    403 
    404 	Dprintf2("RECON: releasing rbuf for psid %ld ru %d\n",
    405 	    (long) rbuf->parityStripeID, rbuf->which_ru);
    406 
    407 	/* if anyone is waiting on buffers, wake one of them up.  They will
    408 	 * subsequently wake up anyone else waiting on their RU */
    409 	if (rcPtr->bufferWaitList) {
    410 		rbuf->next = rcPtr->committedRbufs;
    411 		rcPtr->committedRbufs = rbuf;
    412 		cb = rcPtr->bufferWaitList;
    413 		rcPtr->bufferWaitList = cb->next;
    414 		rf_CauseReconEvent(raidPtr, cb->col, (void *) 1, RF_REVENT_BUFCLEAR);	/* arg==1 => we've
    415 												 * committed a buffer */
    416 		rf_FreeCallbackValueDesc(raidPtr, cb);
    417 		raidPtr->procsInBufWait--;
    418 	} else {
    419 		rbuf->next = rcPtr->floatingRbufs;
    420 		rcPtr->floatingRbufs = rbuf;
    421 	}
    422 }
    423