Home | History | Annotate | Line # | Download | only in raidframe
rf_reconbuffer.c revision 1.2
      1 /*	$NetBSD: rf_reconbuffer.c,v 1.2 1999/01/26 02:34:01 oster Exp $	*/
      2 /*
      3  * Copyright (c) 1995 Carnegie-Mellon University.
      4  * All rights reserved.
      5  *
      6  * Author: Mark Holland
      7  *
      8  * Permission to use, copy, modify and distribute this software and
      9  * its documentation is hereby granted, provided that both the copyright
     10  * notice and this permission notice appear in all copies of the
     11  * software, derivative works or modified versions, and any portions
     12  * thereof, and that both notices appear in supporting documentation.
     13  *
     14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  *
     18  * Carnegie Mellon requests users of this software to return to
     19  *
     20  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  *  School of Computer Science
     22  *  Carnegie Mellon University
     23  *  Pittsburgh PA 15213-3890
     24  *
     25  * any improvements or extensions that they make and grant Carnegie the
     26  * rights to redistribute these changes.
     27  */
     28 
     29 /***************************************************
     30  *
     31  * rf_reconbuffer.c -- reconstruction buffer manager
     32  *
     33  ***************************************************/
     34 
     35 #include "rf_raid.h"
     36 #include "rf_reconbuffer.h"
     37 #include "rf_acctrace.h"
     38 #include "rf_etimer.h"
     39 #include "rf_general.h"
     40 #include "rf_debugprint.h"
     41 #include "rf_revent.h"
     42 #include "rf_reconutil.h"
     43 #include "rf_nwayxor.h"
     44 
     45 #define Dprintf1(s,a) if (rf_reconbufferDebug) printf(s,a)
     46 #define Dprintf2(s,a,b) if (rf_reconbufferDebug) printf(s,a,b)
     47 #define Dprintf3(s,a,b,c) if (rf_reconbufferDebug) printf(s,a,b,c)
     48 #define Dprintf4(s,a,b,c,d) if (rf_reconbufferDebug) printf(s,a,b,c,d)
     49 #define Dprintf5(s,a,b,c,d,e) if (rf_reconbufferDebug) printf(s,a,b,c,d,e)
     50 
     51 /*****************************************************************************************
     52  *
     53  * Submit a reconstruction buffer to the manager for XOR.
     54  * We can only submit a buffer if (1) we can xor into an existing buffer, which means
     55  * we don't have to acquire a new one, (2) we can acquire a floating
     56  * recon buffer, or (3) the caller has indicated that we are allowed to keep the
     57  * submitted buffer.
     58  *
     59  * Returns non-zero if and only if we were not able to submit.
     60  * In this case, we append the current disk ID to the wait list on the indicated
     61  * RU, so that it will be re-enabled when we acquire a buffer for this RU.
     62  *
     63  ****************************************************************************************/
     64 
     65 /* just to make the code below more readable */
     66 #define BUFWAIT_APPEND(_cb_, _pssPtr_, _row_, _col_) \
     67   _cb_ = rf_AllocCallbackDesc();                    \
     68   (_cb_)->row = (_row_); (_cb_)->col = (_col_); (_cb_)->next = (_pssPtr_)->bufWaitList; (_pssPtr_)->bufWaitList = (_cb_);
     69 
     70 /*
     71  * nWayXorFuncs[i] is a pointer to a function that will xor "i"
     72  * bufs into the accumulating sum.
     73  */
     74 static RF_VoidFuncPtr nWayXorFuncs[] = {
     75   NULL,
     76   (RF_VoidFuncPtr)rf_nWayXor1,
     77   (RF_VoidFuncPtr)rf_nWayXor2,
     78   (RF_VoidFuncPtr)rf_nWayXor3,
     79   (RF_VoidFuncPtr)rf_nWayXor4,
     80   (RF_VoidFuncPtr)rf_nWayXor5,
     81   (RF_VoidFuncPtr)rf_nWayXor6,
     82   (RF_VoidFuncPtr)rf_nWayXor7,
     83   (RF_VoidFuncPtr)rf_nWayXor8,
     84   (RF_VoidFuncPtr)rf_nWayXor9
     85 };
     86 
     87 int rf_SubmitReconBuffer(rbuf, keep_it, use_committed)
     88   RF_ReconBuffer_t  *rbuf;          /* the recon buffer to submit */
     89   int                keep_it;       /* whether we can keep this buffer or we have to return it */
     90   int                use_committed; /* whether to use a committed or an available recon buffer */
     91 {
     92   RF_LayoutSW_t *lp;
     93   int rc;
     94 
     95   lp = rbuf->raidPtr->Layout.map;
     96   rc = lp->SubmitReconBuffer(rbuf, keep_it, use_committed);
     97   return(rc);
     98 }
     99 
    100 int rf_SubmitReconBufferBasic(rbuf, keep_it, use_committed)
    101   RF_ReconBuffer_t  *rbuf;          /* the recon buffer to submit */
    102   int                keep_it;       /* whether we can keep this buffer or we have to return it */
    103   int                use_committed; /* whether to use a committed or an available recon buffer */
    104 {
    105   RF_Raid_t *raidPtr                = rbuf->raidPtr;
    106   RF_RaidLayout_t *layoutPtr        = &raidPtr->Layout;
    107   RF_ReconCtrl_t *reconCtrlPtr      = raidPtr->reconControl[rbuf->row];
    108   RF_ReconParityStripeStatus_t *pssPtr;
    109   RF_ReconBuffer_t *targetRbuf, *t = NULL;        /* temporary rbuf pointers */
    110   caddr_t ta;                                     /* temporary data buffer pointer */
    111   RF_CallbackDesc_t *cb, *p;
    112   int retcode = 0, created = 0;
    113 
    114   RF_Etimer_t timer;
    115 
    116   /* makes no sense to have a submission from the failed disk */
    117   RF_ASSERT(rbuf);
    118   RF_ASSERT(rbuf->col != reconCtrlPtr->fcol);
    119 
    120   Dprintf5("RECON: submission by row %d col %d for psid %ld ru %d (failed offset %ld)\n",
    121 			       rbuf->row, rbuf->col, (long)rbuf->parityStripeID, rbuf->which_ru, (long)rbuf->failedDiskSectorOffset);
    122 
    123   RF_LOCK_PSS_MUTEX(raidPtr,rbuf->row,rbuf->parityStripeID);
    124 
    125   RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex);
    126 
    127   pssPtr = rf_LookupRUStatus(raidPtr, reconCtrlPtr->pssTable, rbuf->parityStripeID, rbuf->which_ru, RF_PSS_NONE, &created);
    128   RF_ASSERT(pssPtr);  /* if it didn't exist, we wouldn't have gotten an rbuf for it */
    129 
    130   /* check to see if enough buffers have accumulated to do an XOR.  If so, there's no need to
    131    * acquire a floating rbuf.  Before we can do any XORing, we must have acquired a destination
    132    * buffer.  If we have, then we can go ahead and do the XOR if (1) including this buffer, enough
    133    * bufs have accumulated, or (2) this is the last submission for this stripe.
    134    * Otherwise, we have to go acquire a floating rbuf.
    135    */
    136 
    137   targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
    138   if (  (targetRbuf != NULL) &&
    139        ((pssPtr->xorBufCount == rf_numBufsToAccumulate-1) || (targetRbuf->count + pssPtr->xorBufCount + 1 == layoutPtr->numDataCol)) ) {
    140     pssPtr->rbufsForXor[ pssPtr->xorBufCount++ ] = rbuf;          /* install this buffer */
    141     Dprintf3("RECON: row %d col %d invoking a %d-way XOR\n",rbuf->row, rbuf->col,pssPtr->xorBufCount);
    142     RF_ETIMER_START(timer);
    143     rf_MultiWayReconXor(raidPtr, pssPtr);
    144     RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer);
    145     raidPtr->accumXorTimeUs += RF_ETIMER_VAL_US(timer);
    146     if (!keep_it) {
    147       raidPtr->recon_tracerecs[rbuf->col].xor_us = RF_ETIMER_VAL_US(timer);
    148       RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
    149       RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
    150       raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us +=
    151         RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
    152       RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
    153 
    154       rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]);
    155     }
    156     rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol);
    157 
    158     /* if use_committed is on, we _must_ consume a buffer off the committed list. */
    159     if (use_committed) {
    160       t = reconCtrlPtr->committedRbufs;
    161       RF_ASSERT(t);
    162       reconCtrlPtr->committedRbufs = t->next;
    163       rf_ReleaseFloatingReconBuffer(raidPtr, rbuf->row, t);
    164     }
    165     if (keep_it) {
    166       RF_UNLOCK_PSS_MUTEX( raidPtr,rbuf->row,rbuf->parityStripeID);
    167       RF_UNLOCK_MUTEX( reconCtrlPtr->rb_mutex );
    168       rf_FreeReconBuffer(rbuf);
    169       return(retcode);
    170     }
    171     goto out;
    172   }
    173 
    174   /* set the value of "t", which we'll use as the rbuf from here on */
    175   if (keep_it) {
    176     t = rbuf;
    177   }
    178   else {
    179     if (use_committed) {      /* if a buffer has been committed to us, use it */
    180       t = reconCtrlPtr->committedRbufs;
    181       RF_ASSERT(t);
    182       reconCtrlPtr->committedRbufs = t->next;
    183       t->next = NULL;
    184     } else if (reconCtrlPtr->floatingRbufs) {
    185       t = reconCtrlPtr->floatingRbufs;
    186       reconCtrlPtr->floatingRbufs = t->next;
    187       t->next = NULL;
    188     }
    189   }
    190 
    191   /* If we weren't able to acquire a buffer,
    192    * append to the end of the buf list in the recon ctrl struct.
    193    */
    194   if (!t) {
    195     RF_ASSERT(!keep_it && !use_committed);
    196     Dprintf2("RECON: row %d col %d failed to acquire floating rbuf\n",rbuf->row, rbuf->col);
    197 
    198     raidPtr->procsInBufWait++;
    199     if ( (raidPtr->procsInBufWait == raidPtr->numCol -1) && (raidPtr->numFullReconBuffers == 0)) {
    200       printf("Buffer wait deadlock detected.  Exiting.\n");
    201       rf_PrintPSStatusTable(raidPtr, rbuf->row);
    202       RF_PANIC();
    203     }
    204     pssPtr->flags |= RF_PSS_BUFFERWAIT;
    205     cb = rf_AllocCallbackDesc();                      /* append to buf wait list in recon ctrl structure */
    206     cb->row = rbuf->row; cb->col = rbuf->col;
    207     cb->callbackArg.v  = rbuf->parityStripeID;
    208     cb->callbackArg2.v = rbuf->which_ru;
    209     cb->next = NULL;
    210     if (!reconCtrlPtr->bufferWaitList) reconCtrlPtr->bufferWaitList = cb;
    211     else {       /* might want to maintain head/tail pointers here rather than search for end of list */
    212       for (p = reconCtrlPtr->bufferWaitList; p->next; p=p->next);
    213       p->next = cb;
    214     }
    215     retcode = 1;
    216     goto out;
    217   }
    218   Dprintf2("RECON: row %d col %d acquired rbuf\n",rbuf->row, rbuf->col);
    219   RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
    220   RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
    221   raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us +=
    222     RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
    223   RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
    224 
    225   rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]);
    226 
    227   /* initialize the buffer */
    228   if (t!=rbuf) {
    229     t->row = rbuf->row; t->col = reconCtrlPtr->fcol;
    230     t->parityStripeID = rbuf->parityStripeID;
    231     t->which_ru = rbuf->which_ru;
    232     t->failedDiskSectorOffset = rbuf->failedDiskSectorOffset;
    233     t->spRow=rbuf->spRow;
    234     t->spCol=rbuf->spCol;
    235     t->spOffset=rbuf->spOffset;
    236 
    237     ta = t->buffer; t->buffer = rbuf->buffer; rbuf->buffer = ta;      /* swap buffers */
    238   }
    239 
    240   /* the first installation always gets installed as the destination buffer.
    241    * subsequent installations get stacked up to allow for multi-way XOR
    242    */
    243   if (!pssPtr->rbuf) {pssPtr->rbuf = t; t->count = 1;}
    244   else pssPtr->rbufsForXor[ pssPtr->xorBufCount++ ] = t;          /* install this buffer */
    245 
    246   rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol);      /* the buffer is full if G=2 */
    247 
    248 out:
    249   RF_UNLOCK_PSS_MUTEX( raidPtr,rbuf->row,rbuf->parityStripeID);
    250   RF_UNLOCK_MUTEX( reconCtrlPtr->rb_mutex );
    251   return(retcode);
    252 }
    253 
    254 int rf_MultiWayReconXor(raidPtr, pssPtr)
    255   RF_Raid_t                     *raidPtr;
    256   RF_ReconParityStripeStatus_t  *pssPtr;   /* the pss descriptor for this parity stripe */
    257 {
    258   int i, numBufs = pssPtr->xorBufCount;
    259   int numBytes = rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU);
    260   RF_ReconBuffer_t **rbufs = (RF_ReconBuffer_t **) pssPtr->rbufsForXor;
    261   RF_ReconBuffer_t *targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
    262 
    263   RF_ASSERT(pssPtr->rbuf != NULL);
    264   RF_ASSERT(numBufs > 0 && numBufs < RF_PS_MAX_BUFS);
    265 #ifdef _KERNEL
    266 #ifndef __NetBSD__
    267   thread_block(); /* yield the processor before doing a big XOR */
    268 #endif
    269 #endif /* _KERNEL */
    270   /*
    271    * XXX
    272    *
    273    * What if more than 9 bufs?
    274    */
    275   nWayXorFuncs[numBufs](pssPtr->rbufsForXor, targetRbuf, numBytes/sizeof(long));
    276 
    277   /* release all the reconstruction buffers except the last one, which belongs to the
    278    * the disk who's submission caused this XOR to take place
    279    */
    280   for (i=0; i < numBufs-1; i++) {
    281     if (rbufs[i]->type == RF_RBUF_TYPE_FLOATING) rf_ReleaseFloatingReconBuffer(raidPtr, rbufs[i]->row, rbufs[i]);
    282     else if (rbufs[i]->type == RF_RBUF_TYPE_FORCED) rf_FreeReconBuffer(rbufs[i]);
    283     else RF_ASSERT(0);
    284   }
    285   targetRbuf->count += pssPtr->xorBufCount;
    286   pssPtr->xorBufCount = 0;
    287   return(0);
    288 }
    289 
    290 /* removes one full buffer from one of the full-buffer lists and returns it.
    291  *
    292  * ASSUMES THE RB_MUTEX IS UNLOCKED AT ENTRY.
    293  */
    294 RF_ReconBuffer_t *rf_GetFullReconBuffer(reconCtrlPtr)
    295   RF_ReconCtrl_t  *reconCtrlPtr;
    296 {
    297   RF_ReconBuffer_t *p;
    298 
    299   RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex);
    300 
    301   if ( (p=reconCtrlPtr->priorityList) != NULL) {
    302     reconCtrlPtr->priorityList = p->next;
    303     p->next = NULL;
    304     goto out;
    305   }
    306   if ( (p=reconCtrlPtr->fullBufferList) != NULL) {
    307     reconCtrlPtr->fullBufferList = p->next;
    308     p->next = NULL;
    309     goto out;
    310   }
    311 
    312 out:
    313   RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
    314   return(p);
    315 }
    316 
    317 
    318 /* if the reconstruction buffer is full, move it to the full list, which is maintained
    319  * sorted by failed disk sector offset
    320  *
    321  * ASSUMES THE RB_MUTEX IS LOCKED AT ENTRY.
    322  */
    323 int rf_CheckForFullRbuf(raidPtr, reconCtrl, pssPtr, numDataCol)
    324   RF_Raid_t                     *raidPtr;
    325   RF_ReconCtrl_t                *reconCtrl;
    326   RF_ReconParityStripeStatus_t  *pssPtr;
    327   int                            numDataCol;
    328 {
    329   RF_ReconBuffer_t *p, *pt, *rbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
    330 
    331   if (rbuf->count == numDataCol) {
    332     raidPtr->numFullReconBuffers++;
    333     Dprintf2("RECON: rbuf for psid %ld ru %d has filled\n",
    334 	     (long)rbuf->parityStripeID, rbuf->which_ru);
    335     if (!reconCtrl->fullBufferList || (rbuf->failedDiskSectorOffset < reconCtrl->fullBufferList->failedDiskSectorOffset)) {
    336       Dprintf2("RECON: rbuf for psid %ld ru %d is head of list\n",
    337 	       (long)rbuf->parityStripeID, rbuf->which_ru);
    338       rbuf->next = reconCtrl->fullBufferList;
    339       reconCtrl->fullBufferList = rbuf;
    340     }
    341     else {
    342       for (pt = reconCtrl->fullBufferList, p = pt->next; p && p->failedDiskSectorOffset < rbuf->failedDiskSectorOffset; pt=p, p=p->next);
    343       rbuf->next = p;
    344       pt->next = rbuf;
    345       Dprintf2("RECON: rbuf for psid %ld ru %d is in list\n",
    346 	       (long)rbuf->parityStripeID, rbuf->which_ru);
    347     }
    348 #if 0
    349     pssPtr->writeRbuf = pssPtr->rbuf;        /* DEBUG ONLY:  we like to be able to find this rbuf while it's awaiting write */
    350 #else
    351     rbuf->pssPtr = pssPtr;
    352 #endif
    353     pssPtr->rbuf = NULL;
    354     rf_CauseReconEvent(raidPtr, rbuf->row, rbuf->col, NULL, RF_REVENT_BUFREADY);
    355   }
    356   return(0);
    357 }
    358 
    359 
    360 /* release a floating recon buffer for someone else to use.
    361  * assumes the rb_mutex is LOCKED at entry
    362  */
    363 void rf_ReleaseFloatingReconBuffer(raidPtr, row, rbuf)
    364   RF_Raid_t         *raidPtr;
    365   RF_RowCol_t        row;
    366   RF_ReconBuffer_t  *rbuf;
    367 {
    368   RF_ReconCtrl_t *rcPtr = raidPtr->reconControl[row];
    369   RF_CallbackDesc_t *cb;
    370 
    371   Dprintf2("RECON: releasing rbuf for psid %ld ru %d\n",
    372 	   (long)rbuf->parityStripeID, rbuf->which_ru);
    373 
    374   /* if anyone is waiting on buffers, wake one of them up.  They will subsequently wake up anyone
    375    * else waiting on their RU
    376    */
    377   if (rcPtr->bufferWaitList) {
    378     rbuf->next = rcPtr->committedRbufs;
    379     rcPtr->committedRbufs = rbuf;
    380     cb = rcPtr->bufferWaitList;
    381     rcPtr->bufferWaitList = cb->next;
    382     rf_CauseReconEvent(raidPtr, cb->row, cb->col, (void *) 1, RF_REVENT_BUFCLEAR);  /* arg==1 => we've committed a buffer */
    383     rf_FreeCallbackDesc(cb);
    384     raidPtr->procsInBufWait--;
    385   } else {
    386     rbuf->next = rcPtr->floatingRbufs;
    387     rcPtr->floatingRbufs = rbuf;
    388   }
    389 }
    390 
    391 /* release any disk that is waiting on a buffer for the indicated RU.
    392  * assumes the rb_mutex is LOCKED at entry
    393  */
    394 void rf_ReleaseBufferWaiters(raidPtr, pssPtr)
    395   RF_Raid_t                     *raidPtr;
    396   RF_ReconParityStripeStatus_t  *pssPtr;
    397 {
    398   RF_CallbackDesc_t *cb1, *cb = pssPtr->bufWaitList;
    399 
    400   Dprintf2("RECON: releasing buf waiters for psid %ld ru %d\n",
    401 	   (long)pssPtr->parityStripeID, pssPtr->which_ru);
    402   pssPtr->flags &= ~RF_PSS_BUFFERWAIT;
    403   while (cb) {
    404     cb1 = cb->next;
    405     cb->next = NULL;
    406     rf_CauseReconEvent(raidPtr, cb->row, cb->col, (void *) 0, RF_REVENT_BUFCLEAR);  /* arg==0 => we haven't committed a buffer */
    407     rf_FreeCallbackDesc(cb);
    408     cb = cb1;
    409   }
    410   pssPtr->bufWaitList = NULL;
    411 }
    412 
    413 /* when reconstruction is forced on an RU, there may be some disks waiting to
    414  * acquire a buffer for that RU.  Since we allocate a new buffer as part of
    415  * the forced-reconstruction process, we no longer have to wait for any
    416  * buffers, so we wakeup any waiter that we find in the bufferWaitList
    417  *
    418  * assumes the rb_mutex is LOCKED at entry
    419  */
    420 void rf_ReleaseBufferWaiter(rcPtr, rbuf)
    421   RF_ReconCtrl_t    *rcPtr;
    422   RF_ReconBuffer_t  *rbuf;
    423 {
    424   RF_CallbackDesc_t *cb, *cbt;
    425 
    426   for (cbt = NULL, cb = rcPtr->bufferWaitList; cb; cbt = cb, cb=cb->next) {
    427     if ( (cb->callbackArg.v == rbuf->parityStripeID) && ( cb->callbackArg2.v == rbuf->which_ru)) {
    428       Dprintf2("RECON: Dropping row %d col %d from buffer wait list\n", cb->row, cb->col);
    429       if (cbt) cbt->next = cb->next;
    430       else rcPtr->bufferWaitList = cb->next;
    431       rf_CauseReconEvent((RF_Raid_t *) rbuf->raidPtr, cb->row, cb->col, (void *) 0, RF_REVENT_BUFREADY);  /* arg==0 => no committed buffer */
    432       rf_FreeCallbackDesc(cb);
    433       return;
    434     }
    435   }
    436 }
    437