Home | History | Annotate | Line # | Download | only in raidframe
rf_diskqueue.c revision 1.55.4.1
      1 /*	$NetBSD: rf_diskqueue.c,v 1.55.4.1 2021/10/19 10:55:15 martin Exp $	*/
      2 /*
      3  * Copyright (c) 1995 Carnegie-Mellon University.
      4  * All rights reserved.
      5  *
      6  * Author: Mark Holland
      7  *
      8  * Permission to use, copy, modify and distribute this software and
      9  * its documentation is hereby granted, provided that both the copyright
     10  * notice and this permission notice appear in all copies of the
     11  * software, derivative works or modified versions, and any portions
     12  * thereof, and that both notices appear in supporting documentation.
     13  *
     14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  *
     18  * Carnegie Mellon requests users of this software to return to
     19  *
     20  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  *  School of Computer Science
     22  *  Carnegie Mellon University
     23  *  Pittsburgh PA 15213-3890
     24  *
     25  * any improvements or extensions that they make and grant Carnegie the
     26  * rights to redistribute these changes.
     27  */
     28 
     29 /****************************************************************************
     30  *
     31  * rf_diskqueue.c -- higher-level disk queue code
     32  *
     33  * the routines here are a generic wrapper around the actual queueing
     34  * routines.  The code here implements thread scheduling, synchronization,
     35  * and locking ops (see below) on top of the lower-level queueing code.
     36  *
     37  * to support atomic RMW, we implement "locking operations".  When a
     38  * locking op is dispatched to the lower levels of the driver, the
     39  * queue is locked, and no further I/Os are dispatched until the queue
     40  * receives & completes a corresponding "unlocking operation".  This
     41  * code relies on the higher layers to guarantee that a locking op
     42  * will always be eventually followed by an unlocking op.  The model
     43  * is that the higher layers are structured so locking and unlocking
     44  * ops occur in pairs, i.e.  an unlocking op cannot be generated until
     45  * after a locking op reports completion.  There is no good way to
     46  * check to see that an unlocking op "corresponds" to the op that
     47  * currently has the queue locked, so we make no such attempt.  Since
     48  * by definition there can be only one locking op outstanding on a
     49  * disk, this should not be a problem.
     50  *
     51  * In the kernel, we allow multiple I/Os to be concurrently dispatched
     52  * to the disk driver.  In order to support locking ops in this
     53  * environment, when we decide to do a locking op, we stop dispatching
     54  * new I/Os and wait until all dispatched I/Os have completed before
     55  * dispatching the locking op.
     56  *
     57  * Unfortunately, the code is different in the 3 different operating
     58  * states (user level, kernel, simulator).  In the kernel, I/O is
     59  * non-blocking, and we have no disk threads to dispatch for us.
     60  * Therefore, we have to dispatch new I/Os to the scsi driver at the
     61  * time of enqueue, and also at the time of completion.  At user
     62  * level, I/O is blocking, and so only the disk threads may dispatch
     63  * I/Os.  Thus at user level, all we can do at enqueue time is enqueue
     64  * and wake up the disk thread to do the dispatch.
     65  *
     66  ****************************************************************************/
     67 
     68 #include <sys/cdefs.h>
     69 __KERNEL_RCSID(0, "$NetBSD: rf_diskqueue.c,v 1.55.4.1 2021/10/19 10:55:15 martin Exp $");
     70 
     71 #include <dev/raidframe/raidframevar.h>
     72 
     73 #include "rf_threadstuff.h"
     74 #include "rf_raid.h"
     75 #include "rf_diskqueue.h"
     76 #include "rf_alloclist.h"
     77 #include "rf_acctrace.h"
     78 #include "rf_etimer.h"
     79 #include "rf_general.h"
     80 #include "rf_debugprint.h"
     81 #include "rf_shutdown.h"
     82 #include "rf_cvscan.h"
     83 #include "rf_sstf.h"
     84 #include "rf_fifo.h"
     85 #include "rf_kintf.h"
     86 
     87 #include <sys/buf.h>
     88 
     89 static void rf_ShutdownDiskQueueSystem(void *);
     90 
     91 #ifndef RF_DEBUG_DISKQUEUE
     92 #define RF_DEBUG_DISKQUEUE 0
     93 #endif
     94 
     95 #if RF_DEBUG_DISKQUEUE
     96 #define Dprintf1(s,a)         if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),NULL,NULL,NULL,NULL,NULL,NULL,NULL)
     97 #define Dprintf2(s,a,b)       if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),NULL,NULL,NULL,NULL,NULL,NULL)
     98 #define Dprintf3(s,a,b,c)     if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),NULL,NULL,NULL,NULL,NULL)
     99 #else
    100 #define Dprintf1(s,a)
    101 #define Dprintf2(s,a,b)
    102 #define Dprintf3(s,a,b,c)
    103 #endif
    104 
    105 /*****************************************************************************
    106  *
    107  * the disk queue switch defines all the functions used in the
    108  * different queueing disciplines queue ID, init routine, enqueue
    109  * routine, dequeue routine
    110  *
    111  ****************************************************************************/
    112 
    113 static const RF_DiskQueueSW_t diskqueuesw[] = {
    114 	{"fifo",		/* FIFO */
    115 		rf_FifoCreate,
    116 		rf_FifoEnqueue,
    117 		rf_FifoDequeue,
    118 		rf_FifoPeek,
    119 	rf_FifoPromote},
    120 
    121 	{"cvscan",		/* cvscan */
    122 		rf_CvscanCreate,
    123 		rf_CvscanEnqueue,
    124 		rf_CvscanDequeue,
    125 		rf_CvscanPeek,
    126 	rf_CvscanPromote},
    127 
    128 	{"sstf",		/* shortest seek time first */
    129 		rf_SstfCreate,
    130 		rf_SstfEnqueue,
    131 		rf_SstfDequeue,
    132 		rf_SstfPeek,
    133 	rf_SstfPromote},
    134 
    135 	{"scan",		/* SCAN (two-way elevator) */
    136 		rf_ScanCreate,
    137 		rf_SstfEnqueue,
    138 		rf_ScanDequeue,
    139 		rf_ScanPeek,
    140 	rf_SstfPromote},
    141 
    142 	{"cscan",		/* CSCAN (one-way elevator) */
    143 		rf_CscanCreate,
    144 		rf_SstfEnqueue,
    145 		rf_CscanDequeue,
    146 		rf_CscanPeek,
    147 	rf_SstfPromote},
    148 
    149 };
    150 #define NUM_DISK_QUEUE_TYPES (sizeof(diskqueuesw)/sizeof(RF_DiskQueueSW_t))
    151 
    152 
    153 #define RF_MAX_FREE_DQD 256
    154 #define RF_MIN_FREE_DQD  64
    155 
    156 /* XXX: scale these... */
    157 #define RF_MAX_FREE_BUFIO 256
    158 #define RF_MIN_FREE_BUFIO  64
    159 
    160 
    161 
    162 /* configures a single disk queue */
    163 
    164 static void
    165 rf_ShutdownDiskQueue(void *arg)
    166 {
    167 	RF_DiskQueue_t *diskqueue = arg;
    168 
    169 	rf_destroy_mutex2(diskqueue->mutex);
    170 }
    171 
    172 int
    173 rf_ConfigureDiskQueue(RF_Raid_t *raidPtr, RF_DiskQueue_t *diskqueue,
    174 		      RF_RowCol_t c, const RF_DiskQueueSW_t *p,
    175 		      RF_SectorCount_t sectPerDisk, dev_t dev,
    176 		      int maxOutstanding, RF_ShutdownList_t **listp,
    177 		      RF_AllocListElem_t *clList)
    178 {
    179 	diskqueue->col = c;
    180 	diskqueue->qPtr = p;
    181 	diskqueue->qHdr = (p->Create) (sectPerDisk, clList, listp);
    182 	diskqueue->dev = dev;
    183 	diskqueue->numOutstanding = 0;
    184 	diskqueue->queueLength = 0;
    185 	diskqueue->maxOutstanding = maxOutstanding;
    186 	diskqueue->curPriority = RF_IO_NORMAL_PRIORITY;
    187 	diskqueue->flags = 0;
    188 	diskqueue->raidPtr = raidPtr;
    189 	diskqueue->rf_cinfo = &raidPtr->raid_cinfo[c];
    190 	rf_init_mutex2(diskqueue->mutex, IPL_VM);
    191 	rf_ShutdownCreate(listp, rf_ShutdownDiskQueue, diskqueue);
    192 	return (0);
    193 }
    194 
    195 static void
    196 rf_ShutdownDiskQueueSystem(void *ignored)
    197 {
    198 	pool_destroy(&rf_pools.dqd);
    199 	pool_destroy(&rf_pools.bufio);
    200 }
    201 
    202 int
    203 rf_ConfigureDiskQueueSystem(RF_ShutdownList_t **listp)
    204 {
    205 
    206 	rf_pool_init(&rf_pools.dqd, sizeof(RF_DiskQueueData_t),
    207 		     "rf_dqd_pl", RF_MIN_FREE_DQD, RF_MAX_FREE_DQD);
    208 	rf_pool_init(&rf_pools.bufio, sizeof(buf_t),
    209 		     "rf_bufio_pl", RF_MIN_FREE_BUFIO, RF_MAX_FREE_BUFIO);
    210 	rf_ShutdownCreate(listp, rf_ShutdownDiskQueueSystem, NULL);
    211 
    212 	return (0);
    213 }
    214 
    215 int
    216 rf_ConfigureDiskQueues(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
    217 		       RF_Config_t *cfgPtr)
    218 {
    219 	RF_DiskQueue_t *diskQueues, *spareQueues;
    220 	const RF_DiskQueueSW_t *p;
    221 	RF_RowCol_t r,c;
    222 	int     rc, i;
    223 
    224 	raidPtr->maxQueueDepth = cfgPtr->maxOutstandingDiskReqs;
    225 
    226 	for (p = NULL, i = 0; i < NUM_DISK_QUEUE_TYPES; i++) {
    227 		if (!strcmp(diskqueuesw[i].queueType, cfgPtr->diskQueueType)) {
    228 			p = &diskqueuesw[i];
    229 			break;
    230 		}
    231 	}
    232 	if (p == NULL) {
    233 		RF_ERRORMSG2("Unknown queue type \"%s\".  Using %s\n", cfgPtr->diskQueueType, diskqueuesw[0].queueType);
    234 		p = &diskqueuesw[0];
    235 	}
    236 	raidPtr->qType = p;
    237 
    238 	diskQueues = RF_MallocAndAdd(
    239 	    (raidPtr->numCol + RF_MAXSPARE) * sizeof(*diskQueues),
    240 	    raidPtr->cleanupList);
    241 	if (diskQueues == NULL)
    242 		return (ENOMEM);
    243 	raidPtr->Queues = diskQueues;
    244 
    245 	for (c = 0; c < raidPtr->numCol; c++) {
    246 		rc = rf_ConfigureDiskQueue(raidPtr, &diskQueues[c],
    247 					   c, p,
    248 					   raidPtr->sectorsPerDisk,
    249 					   raidPtr->Disks[c].dev,
    250 					   cfgPtr->maxOutstandingDiskReqs,
    251 					   listp, raidPtr->cleanupList);
    252 		if (rc)
    253 			return (rc);
    254 	}
    255 
    256 	spareQueues = &raidPtr->Queues[raidPtr->numCol];
    257 	for (r = 0; r < raidPtr->numSpare; r++) {
    258 		rc = rf_ConfigureDiskQueue(raidPtr, &spareQueues[r],
    259 					   raidPtr->numCol + r, p,
    260 					   raidPtr->sectorsPerDisk,
    261 					   raidPtr->Disks[raidPtr->numCol + r].dev,
    262 					   cfgPtr->maxOutstandingDiskReqs, listp,
    263 					   raidPtr->cleanupList);
    264 		if (rc)
    265 			return (rc);
    266 	}
    267 	return (0);
    268 }
    269 /* Enqueue a disk I/O
    270  *
    271  * In the kernel, I/O is non-blocking and so we'd like to have multiple
    272  * I/Os outstanding on the physical disks when possible.
    273  *
    274  * when any request arrives at a queue, we have two choices:
    275  *    dispatch it to the lower levels
    276  *    queue it up
    277  *
    278  * kernel rules for when to do what:
    279  *    unlocking req  :  always dispatch it
    280  *    normal req     :  queue empty => dispatch it & set priority
    281  *                      queue not full & priority is ok => dispatch it
    282  *                      else queue it
    283  */
    284 void
    285 rf_DiskIOEnqueue(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req, int pri)
    286 {
    287 	RF_ETIMER_START(req->qtime);
    288 	RF_ASSERT(req->type == RF_IO_TYPE_NOP || req->numSector);
    289 	req->priority = pri;
    290 
    291 #if RF_DEBUG_DISKQUEUE
    292 	if (rf_queueDebug && (req->numSector == 0)) {
    293 		printf("Warning: Enqueueing zero-sector access\n");
    294 	}
    295 #endif
    296 	RF_LOCK_QUEUE_MUTEX(queue, "DiskIOEnqueue");
    297 	if (RF_OK_TO_DISPATCH(queue, req)) {
    298 		Dprintf2("Dispatching pri %d regular op to c %d (ok to dispatch)\n", pri, queue->col);
    299 		rf_DispatchKernelIO(queue, req);
    300 	} else {
    301 		queue->queueLength++;	/* increment count of number of requests waiting in this queue */
    302 		Dprintf2("Enqueueing pri %d regular op to c %d (not ok to dispatch)\n", pri, queue->col);
    303 		req->queue = (void *) queue;
    304 		(queue->qPtr->Enqueue) (queue->qHdr, req, pri);
    305 	}
    306 	RF_UNLOCK_QUEUE_MUTEX(queue, "DiskIOEnqueue");
    307 }
    308 
    309 
    310 /* get the next set of I/Os started */
    311 void
    312 rf_DiskIOComplete(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req, int status)
    313 {
    314 	int     done = 0;
    315 
    316 	RF_LOCK_QUEUE_MUTEX(queue, "DiskIOComplete");
    317 	queue->numOutstanding--;
    318 	RF_ASSERT(queue->numOutstanding >= 0);
    319 
    320 	/* dispatch requests to the disk until we find one that we can't. */
    321 	/* no reason to continue once we've filled up the queue */
    322 	/* no reason to even start if the queue is locked */
    323 
    324 	while (!done && !RF_QUEUE_FULL(queue)) {
    325 		req = (queue->qPtr->Dequeue) (queue->qHdr);
    326 		if (req) {
    327 			Dprintf2("DiskIOComplete: extracting pri %d req from queue at c %d\n", req->priority, queue->col);
    328 			queue->queueLength--;	/* decrement count of number of requests waiting in this queue */
    329 			RF_ASSERT(queue->queueLength >= 0);
    330 			if (RF_OK_TO_DISPATCH(queue, req)) {
    331 				Dprintf2("DiskIOComplete: dispatching pri %d regular req to c %d (ok to dispatch)\n", req->priority, queue->col);
    332 				rf_DispatchKernelIO(queue, req);
    333 			} else {
    334 				/* we can't dispatch it, so just re-enqueue it.
    335 				   potential trouble here if disk queues batch reqs */
    336 				Dprintf2("DiskIOComplete: re-enqueueing pri %d regular req to c %d\n", req->priority, queue->col);
    337 				queue->queueLength++;
    338 				(queue->qPtr->Enqueue) (queue->qHdr, req, req->priority);
    339 				done = 1;
    340 			}
    341 		} else {
    342 			Dprintf1("DiskIOComplete: no more requests to extract.\n", "");
    343 			done = 1;
    344 		}
    345 	}
    346 
    347 	RF_UNLOCK_QUEUE_MUTEX(queue, "DiskIOComplete");
    348 }
    349 /* promotes accesses tagged with the given parityStripeID from low priority
    350  * to normal priority.  This promotion is optional, meaning that a queue
    351  * need not implement it.  If there is no promotion routine associated with
    352  * a queue, this routine does nothing and returns -1.
    353  */
    354 int
    355 rf_DiskIOPromote(RF_DiskQueue_t *queue, RF_StripeNum_t parityStripeID,
    356 		 RF_ReconUnitNum_t which_ru)
    357 {
    358 	int     retval;
    359 
    360 	if (!queue->qPtr->Promote)
    361 		return (-1);
    362 	RF_LOCK_QUEUE_MUTEX(queue, "DiskIOPromote");
    363 	retval = (queue->qPtr->Promote) (queue->qHdr, parityStripeID, which_ru);
    364 	RF_UNLOCK_QUEUE_MUTEX(queue, "DiskIOPromote");
    365 	return (retval);
    366 }
    367 
    368 RF_DiskQueueData_t *
    369 rf_CreateDiskQueueData(RF_IoType_t typ, RF_SectorNum_t ssect,
    370 		       RF_SectorCount_t nsect, void *bf,
    371 		       RF_StripeNum_t parityStripeID,
    372 		       RF_ReconUnitNum_t which_ru,
    373 		       int (*wakeF) (void *, int), void *arg,
    374 		       RF_AccTraceEntry_t *tracerec, RF_Raid_t *raidPtr,
    375 		       RF_DiskQueueDataFlags_t flags, void *kb_proc,
    376 		       int waitflag)
    377 {
    378 	RF_DiskQueueData_t *p;
    379 
    380 	p = pool_get(&rf_pools.dqd, PR_WAITOK | PR_ZERO);
    381 	KASSERT(p != NULL);
    382 
    383 	/* Obtain a buffer from our own pool.  It is possible for the
    384 	   regular getiobuf() to run out of memory and return NULL.
    385 	   We need to guarantee that never happens, as RAIDframe
    386 	   doesn't have a good way to recover if memory allocation
    387 	   fails here.
    388 	*/
    389 	p->bp = pool_get(&rf_pools.bufio, PR_WAITOK | PR_ZERO);
    390 	KASSERT(p->bp != NULL);
    391 
    392 	buf_init(p->bp);
    393 
    394 	SET(p->bp->b_cflags, BC_BUSY);	/* mark buffer busy */
    395 
    396 	p->sectorOffset = ssect + rf_protectedSectors;
    397 	p->numSector = nsect;
    398 	p->type = typ;
    399 	p->buf = bf;
    400 	p->parityStripeID = parityStripeID;
    401 	p->which_ru = which_ru;
    402 	p->CompleteFunc = wakeF;
    403 	p->argument = arg;
    404 	p->next = NULL;
    405 	p->tracerec = tracerec;
    406 	p->priority = RF_IO_NORMAL_PRIORITY;
    407 	p->raidPtr = raidPtr;
    408 	p->flags = flags;
    409 	p->b_proc = kb_proc;
    410 	return (p);
    411 }
    412 
    413 void
    414 rf_FreeDiskQueueData(RF_DiskQueueData_t *p)
    415 {
    416 	pool_put(&rf_pools.bufio, p->bp);
    417 	pool_put(&rf_pools.dqd, p);
    418 }
    419