Home | History | Annotate | Line # | Download | only in raidframe
rf_raid.h revision 1.1
      1 /*	$NetBSD: rf_raid.h,v 1.1 1998/11/13 04:20:32 oster Exp $	*/
      2 /*
      3  * Copyright (c) 1995 Carnegie-Mellon University.
      4  * All rights reserved.
      5  *
      6  * Author: Mark Holland
      7  *
      8  * Permission to use, copy, modify and distribute this software and
      9  * its documentation is hereby granted, provided that both the copyright
     10  * notice and this permission notice appear in all copies of the
     11  * software, derivative works or modified versions, and any portions
     12  * thereof, and that both notices appear in supporting documentation.
     13  *
     14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  *
     18  * Carnegie Mellon requests users of this software to return to
     19  *
     20  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  *  School of Computer Science
     22  *  Carnegie Mellon University
     23  *  Pittsburgh PA 15213-3890
     24  *
     25  * any improvements or extensions that they make and grant Carnegie the
     26  * rights to redistribute these changes.
     27  */
     28 
     29 /**********************************************
     30  * rf_raid.h -- main header file for RAID driver
     31  **********************************************/
     32 
     33 /*
     34  * :
     35  * Log: rf_raid.h,v
     36  * Revision 1.48  1996/08/20 22:33:54  jimz
     37  * make hist_diskreq a doubly-indexed array
     38  *
     39  * Revision 1.47  1996/07/15  05:40:41  jimz
     40  * some recon datastructure cleanup
     41  * better handling of multiple failures
     42  * added undocumented double-recon test
     43  *
     44  * Revision 1.46  1996/07/10  22:28:51  jimz
     45  * get rid of obsolete row statuses (dead,degraded2)
     46  *
     47  * Revision 1.45  1996/06/14  14:56:29  jimz
     48  * make engine threading stuff ifndef SIMULATE
     49  *
     50  * Revision 1.44  1996/06/14  14:16:54  jimz
     51  * move in engine node queue, atomicity control
     52  *
     53  * Revision 1.43  1996/06/12  04:41:26  jimz
     54  * tweaks to make genplot work with user-level driver
     55  * (mainly change stat collection)
     56  *
     57  * Revision 1.42  1996/06/11  10:57:17  jimz
     58  * add recon_done_procs, recon_done_proc_mutex
     59  *
     60  * Revision 1.41  1996/06/11  01:26:48  jimz
     61  * added mechanism for user-level to sync diskthread startup,
     62  * shutdown
     63  *
     64  * Revision 1.40  1996/06/10  14:18:58  jimz
     65  * move user, throughput stats into per-array structure
     66  *
     67  * Revision 1.39  1996/06/10  11:55:47  jimz
     68  * Straightened out some per-array/not-per-array distinctions, fixed
     69  * a couple bugs related to confusion. Added shutdown lists. Removed
     70  * layout shutdown function (now subsumed by shutdown lists).
     71  *
     72  * Revision 1.38  1996/06/07  21:33:04  jimz
     73  * begin using consistent types for sector numbers,
     74  * stripe numbers, row+col numbers, recon unit numbers
     75  *
     76  * Revision 1.37  1996/06/05  19:38:32  jimz
     77  * fixed up disk queueing types config
     78  * added sstf disk queueing
     79  * fixed exit bug on diskthreads (ref-ing bad mem)
     80  *
     81  * Revision 1.36  1996/06/05  18:06:02  jimz
     82  * Major code cleanup. The Great Renaming is now done.
     83  * Better modularity. Better typing. Fixed a bunch of
     84  * synchronization bugs. Made a lot of global stuff
     85  * per-desc or per-array. Removed dead code.
     86  *
     87  * Revision 1.35  1996/06/03  23:28:26  jimz
     88  * more bugfixes
     89  * check in tree to sync for IPDS runs with current bugfixes
     90  * there still may be a problem with threads in the script test
     91  * getting I/Os stuck- not trivially reproducible (runs ~50 times
     92  * in a row without getting stuck)
     93  *
     94  * Revision 1.34  1996/06/02  17:31:48  jimz
     95  * Moved a lot of global stuff into array structure, where it belongs.
     96  * Fixed up paritylogging, pss modules in this manner. Some general
     97  * code cleanup. Removed lots of dead code, some dead files.
     98  *
     99  * Revision 1.33  1996/05/30  23:22:16  jimz
    100  * bugfixes of serialization, timing problems
    101  * more cleanup
    102  *
    103  * Revision 1.32  1996/05/30  11:29:41  jimz
    104  * Numerous bug fixes. Stripe lock release code disagreed with the taking code
    105  * about when stripes should be locked (I made it consistent: no parity, no lock)
    106  * There was a lot of extra serialization of I/Os which I've removed- a lot of
    107  * it was to calculate values for the cache code, which is no longer with us.
    108  * More types, function, macro cleanup. Added code to properly quiesce the array
    109  * on shutdown. Made a lot of stuff array-specific which was (bogusly) general
    110  * before. Fixed memory allocation, freeing bugs.
    111  *
    112  * Revision 1.31  1996/05/27  18:56:37  jimz
    113  * more code cleanup
    114  * better typing
    115  * compiles in all 3 environments
    116  *
    117  * Revision 1.30  1996/05/24  22:17:04  jimz
    118  * continue code + namespace cleanup
    119  * typed a bunch of flags
    120  *
    121  * Revision 1.29  1996/05/23  21:46:35  jimz
    122  * checkpoint in code cleanup (release prep)
    123  * lots of types, function names have been fixed
    124  *
    125  * Revision 1.28  1996/05/23  00:33:23  jimz
    126  * code cleanup: move all debug decls to rf_options.c, all extern
    127  * debug decls to rf_options.h, all debug vars preceded by rf_
    128  *
    129  * Revision 1.27  1996/05/18  19:51:34  jimz
    130  * major code cleanup- fix syntax, make some types consistent,
    131  * add prototypes, clean out dead code, et cetera
    132  *
    133  * Revision 1.26  1996/05/08  21:01:24  jimz
    134  * fixed up enum type names that were conflicting with other
    135  * enums and function names (ie, "panic")
    136  * future naming trends will be towards RF_ and rf_ for
    137  * everything raidframe-related
    138  *
    139  * Revision 1.25  1996/05/02  14:57:55  jimz
    140  * add sectorMask
    141  *
    142  * Revision 1.24  1996/04/22  15:53:13  jimz
    143  * MAX_RAIDS -> NRAIDFRAME
    144  *
    145  * Revision 1.23  1995/12/14  18:39:46  jimz
    146  * convert to rf_types.h types
    147  *
    148  * Revision 1.22  1995/12/06  15:02:26  root
    149  * added copyright info
    150  *
    151  * Revision 1.21  1995/10/09  17:39:24  jimz
    152  * added info for tracking number of outstanding accesses
    153  * at user-level
    154  *
    155  * Revision 1.20  1995/09/30  20:37:46  jimz
    156  * added acc_totals to Raid for kernel
    157  *
    158  * Revision 1.19  1995/09/19  22:57:14  jimz
    159  * add cache of raidid for kernel
    160  *
    161  * Revision 1.18  1995/09/18  16:50:04  jimz
    162  * added RF_MAX_DISKS (for config ioctls)
    163  *
    164  * Revision 1.17  1995/09/07  19:02:31  jimz
    165  * mods to get raidframe to compile and link
    166  * in kernel environment
    167  *
    168  * Revision 1.16  1995/07/21  19:29:51  robby
    169  * added some info for the idler to the Raid
    170  *
    171  * Revision 1.15  1995/07/16  03:19:14  cfb
    172  * added cachePtr to *raidPtr
    173  *
    174  * Revision 1.14  1995/06/23  13:39:36  robby
    175  * updeated to prototypes in rf_layout.h
    176  *
    177  */
    178 
    179 #ifndef _RF__RF_RAID_H_
    180 #define _RF__RF_RAID_H_
    181 
    182 #ifdef _KERNEL
    183 #define KERNEL
    184 #endif
    185 
    186 #include "rf_archs.h"
    187 #include "rf_types.h"
    188 #include "rf_threadstuff.h"
    189 
    190 #if defined(__NetBSD__) && defined(_KERNEL)
    191 #include "rf_netbsd.h"
    192 #endif
    193 
    194 #ifdef KERNEL
    195 /* XXX Needs to be added.  GO
    196 #include <raidframe.h>
    197 */
    198 #include <sys/disklabel.h>
    199 #else /* KERNEL */
    200 #include <stdio.h>
    201 #include <assert.h>
    202 #endif /* KERNEL */
    203 #include <sys/types.h>
    204 
    205 #include "rf_alloclist.h"
    206 #include "rf_stripelocks.h"
    207 #include "rf_layout.h"
    208 #include "rf_disks.h"
    209 #include "rf_debugMem.h"
    210 #include "rf_diskqueue.h"
    211 #include "rf_reconstruct.h"
    212 #include "rf_acctrace.h"
    213 
    214 #if RF_INCLUDE_PARITYLOGGING > 0
    215 #include "rf_paritylog.h"
    216 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
    217 
    218 #define RF_MAX_DISKS 128 /* max disks per array */
    219 #ifdef __NetBSD__
    220 #define RF_DEV2RAIDID(_dev)  (DISKUNIT(_dev))
    221 #else
    222 #define RF_DEV2RAIDID(_dev)  (minor(_dev)>>6)     /* convert dev_t to raid id */
    223 #endif
    224 
    225 /*
    226  * Each row in the array is a distinct parity group, so
    227  * each has it's own status, which is one of the following.
    228  */
    229 typedef enum RF_RowStatus_e {
    230   rf_rs_optimal,
    231   rf_rs_degraded,
    232   rf_rs_reconstructing,
    233   rf_rs_reconfigured
    234 } RF_RowStatus_t;
    235 
    236 struct RF_CumulativeStats_s {
    237   struct timeval start;     /* the time when the stats were last started*/
    238   struct timeval stop;      /* the time when the stats were last stopped */
    239   long sum_io_us;           /* sum of all user response times (us) */
    240   long num_ios;             /* total number of I/Os serviced */
    241   long num_sect_moved;      /* total number of sectors read or written */
    242 };
    243 
    244 struct RF_ThroughputStats_s {
    245   RF_DECLARE_MUTEX(mutex)/* a mutex used to lock the configuration stuff */
    246   struct timeval start;  /* timer started when numOutstandingRequests moves from 0 to 1 */
    247   struct timeval stop;   /* timer stopped when numOutstandingRequests moves from 1 to 0 */
    248   RF_uint64 sum_io_us;   /* total time timer is enabled */
    249   RF_uint64 num_ios;     /* total number of ios processed by RAIDframe */
    250   long num_out_ios;      /* number of outstanding ios */
    251 };
    252 
    253 #ifdef SIMULATE
    254 typedef struct RF_PendingRecon_s RF_PendingRecon_t;
    255 struct RF_PendingRecon_s {
    256   RF_RowCol_t         row;
    257   RF_RowCol_t         col;
    258   RF_PendingRecon_t  *next;
    259 };
    260 #endif /* SIMULATE */
    261 
    262 struct RF_Raid_s {
    263   /* This portion never changes, and can be accessed without locking */
    264   /* an exception is Disks[][].status, which requires locking when it is changed */
    265   u_int numRow;             /* number of rows of disks, typically == # of ranks */
    266   u_int numCol;             /* number of columns of disks, typically == # of disks/rank */
    267   u_int numSpare;           /* number of spare disks */
    268   int   maxQueueDepth;      /* max disk queue depth */
    269   RF_SectorCount_t  totalSectors;   /* total number of sectors in the array */
    270   RF_SectorCount_t  sectorsPerDisk; /* number of sectors on each disk */
    271   u_int logBytesPerSector;  /* base-2 log of the number of bytes in a sector */
    272   u_int bytesPerSector;     /* bytes in a sector */
    273   RF_int32  sectorMask;     /* mask of bytes-per-sector */
    274 
    275   RF_RaidLayout_t   Layout; /* all information related to layout */
    276   RF_RaidDisk_t   **Disks;  /* all information related to physical disks */
    277   RF_DiskQueue_t  **Queues; /* all information related to disk queues */
    278      /* NOTE:  This is an anchor point via which the queues can be accessed,
    279       * but the enqueue/dequeue routines in diskqueue.c use a local copy of
    280       * this pointer for the actual accesses.
    281       */
    282   /* The remainder of the structure can change, and therefore requires locking on reads and updates */
    283   RF_DECLARE_MUTEX(mutex)        /* mutex used to serialize access to the fields below */
    284   RF_RowStatus_t  *status;       /* the status of each row in the array */
    285   int              valid;        /* indicates successful configuration */
    286   RF_LockTableEntry_t *lockTable;   /* stripe-lock table */
    287   RF_LockTableEntry_t *quiesceLock; /* quiesnce table */
    288   int                  numFailures; /* total number of failures in the array */
    289 
    290   /*
    291    * Cleanup stuff
    292    */
    293   RF_ShutdownList_t  *shutdownList; /* shutdown activities */
    294   RF_AllocListElem_t *cleanupList;  /* memory to be freed at shutdown time */
    295 
    296   /*
    297    * Recon stuff
    298    */
    299   RF_HeadSepLimit_t headSepLimit;
    300   int numFloatingReconBufs;
    301   int reconInProgress;
    302 #ifdef SIMULATE
    303   RF_PendingRecon_t *pendingRecon;
    304 #endif /* SIMULATE */
    305   RF_DECLARE_COND(waitForReconCond)
    306   RF_RaidReconDesc_t *reconDesc; /* reconstruction descriptor */
    307   RF_ReconCtrl_t **reconControl; /* reconstruction control structure pointers for each row in the array */
    308 
    309 #if !defined(KERNEL) && !defined(SIMULATE)
    310   /*
    311    * Disk thread stuff
    312    */
    313   int diskthreads_created;
    314   int diskthreads_running;
    315   int diskthreads_shutdown;
    316   RF_DECLARE_MUTEX(diskthread_count_mutex)
    317   RF_DECLARE_COND(diskthread_count_cond)
    318 #endif /* !KERNEL && !SIMULATE */
    319 
    320   /*
    321    * Array-quiescence stuff
    322    */
    323   RF_DECLARE_MUTEX(access_suspend_mutex)
    324   RF_DECLARE_COND(quiescent_cond)
    325   RF_IoCount_t accesses_suspended;
    326   RF_IoCount_t accs_in_flight;
    327   int access_suspend_release;
    328   int waiting_for_quiescence;
    329   RF_CallbackDesc_t *quiesce_wait_list;
    330 
    331   /*
    332    * Statistics
    333    */
    334 #if !defined(KERNEL) && !defined(SIMULATE)
    335   RF_ThroughputStats_t throughputstats;
    336 #endif /* !KERNEL && !SIMULATE */
    337   RF_CumulativeStats_t userstats;
    338 
    339   /*
    340    * Engine thread control
    341    */
    342   RF_DECLARE_MUTEX(node_queue_mutex)
    343   RF_DECLARE_COND(node_queue_cond)
    344   RF_DagNode_t *node_queue;
    345 #ifndef SIMULATE
    346   RF_Thread_t engine_thread;
    347   RF_ThreadGroup_t engine_tg;
    348 #endif /* !SIMULATE */
    349   int shutdown_engine;
    350   int dags_in_flight; /* debug */
    351 
    352   /*
    353    * PSS (Parity Stripe Status) stuff
    354    */
    355   RF_FreeList_t *pss_freelist;
    356   long pssTableSize;
    357 
    358   /*
    359    * Reconstruction stuff
    360    */
    361   int procsInBufWait;
    362   int numFullReconBuffers;
    363   RF_AccTraceEntry_t *recon_tracerecs;
    364   unsigned long accumXorTimeUs;
    365   RF_ReconDoneProc_t *recon_done_procs;
    366   RF_DECLARE_MUTEX(recon_done_proc_mutex)
    367 
    368 #if !defined(KERNEL) && !defined(SIMULATE)
    369   RF_Thread_t **diskthreads, *sparediskthreads;  /* thread descriptors for disk threads in user-level version */
    370 #endif /* !KERNEL && !SIMULATE */
    371 
    372   /*
    373    * nAccOutstanding, waitShutdown protected by desc freelist lock
    374    * (This may seem strange, since that's a central serialization point
    375    * for a per-array piece of data, but otherwise, it'd be an extra
    376    * per-array lock, and that'd only be less efficient...)
    377    */
    378   RF_DECLARE_COND(outstandingCond)
    379   int waitShutdown;
    380   int nAccOutstanding;
    381 
    382   RF_DiskId_t **diskids;
    383   RF_DiskId_t  *sparediskids;
    384 
    385 #ifdef KERNEL
    386 	int           raidid;
    387 #endif /* KERNEL */
    388 	RF_AccTotals_t  acc_totals;
    389 	int           keep_acc_totals;
    390 
    391 #ifdef _KERNEL
    392         struct raidcinfo **raid_cinfo; /* array of component info */
    393         struct proc *proc; /* XXX shouldn't be needed here.. :-p */
    394 #endif
    395 
    396   int terminate_disk_queues;
    397 
    398   /*
    399    * XXX
    400    *
    401    * config-specific information should be moved
    402    * somewhere else, or at least hung off this
    403    * in some generic way
    404    */
    405 
    406   /* used by rf_compute_workload_shift */
    407   RF_RowCol_t hist_diskreq[RF_MAXROW][RF_MAXCOL];
    408 
    409   /* used by declustering */
    410   int noRotate;
    411 
    412 #if RF_INCLUDE_PARITYLOGGING > 0
    413   /* used by parity logging */
    414   RF_SectorCount_t          regionLogCapacity;
    415   RF_ParityLogQueue_t       parityLogPool;       /* pool of unused parity logs */
    416   RF_RegionInfo_t          *regionInfo;          /* array of region state */
    417   int                       numParityLogs;
    418   int                       numSectorsPerLog;
    419   int                       regionParityRange;
    420   int                       logsInUse;           /* debugging */
    421   RF_ParityLogDiskQueue_t   parityLogDiskQueue;  /* state of parity logging disk work */
    422   RF_RegionBufferQueue_t    regionBufferPool;    /* buffers for holding region log */
    423   RF_RegionBufferQueue_t    parityBufferPool;    /* buffers for holding parity */
    424   caddr_t                   parityLogBufferHeap; /* pool of unused parity logs */
    425 #ifndef SIMULATE
    426   RF_Thread_t               pLogDiskThreadHandle;
    427 #endif /* !SIMULATE */
    428 
    429 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
    430 };
    431 
    432 #endif /* !_RF__RF_RAID_H_ */
    433