rf_raid.h revision 1.3 1 /* $NetBSD: rf_raid.h,v 1.3 1999/02/05 00:06:15 oster Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: Mark Holland
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29 /**********************************************
30 * rf_raid.h -- main header file for RAID driver
31 **********************************************/
32
33
34 #ifndef _RF__RF_RAID_H_
35 #define _RF__RF_RAID_H_
36
37 #include "rf_archs.h"
38 #include "rf_types.h"
39 #include "rf_threadstuff.h"
40
41 #include "rf_netbsd.h"
42
43 #include <sys/disklabel.h>
44 #include <sys/types.h>
45
46 #include "rf_alloclist.h"
47 #include "rf_stripelocks.h"
48 #include "rf_layout.h"
49 #include "rf_disks.h"
50 #include "rf_debugMem.h"
51 #include "rf_diskqueue.h"
52 #include "rf_reconstruct.h"
53 #include "rf_acctrace.h"
54
55 #if RF_INCLUDE_PARITYLOGGING > 0
56 #include "rf_paritylog.h"
57 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
58
59 #define RF_MAX_DISKS 128 /* max disks per array */
60 #define RF_DEV2RAIDID(_dev) (DISKUNIT(_dev))
61
62 /*
63 * Each row in the array is a distinct parity group, so
64 * each has it's own status, which is one of the following.
65 */
66 typedef enum RF_RowStatus_e {
67 rf_rs_optimal,
68 rf_rs_degraded,
69 rf_rs_reconstructing,
70 rf_rs_reconfigured
71 } RF_RowStatus_t;
72
73 struct RF_CumulativeStats_s {
74 struct timeval start; /* the time when the stats were last started */
75 struct timeval stop; /* the time when the stats were last stopped */
76 long sum_io_us; /* sum of all user response times (us) */
77 long num_ios; /* total number of I/Os serviced */
78 long num_sect_moved; /* total number of sectors read or written */
79 };
80
81 struct RF_ThroughputStats_s {
82 RF_DECLARE_MUTEX(mutex) /* a mutex used to lock the configuration
83 * stuff */
84 struct timeval start; /* timer started when numOutstandingRequests
85 * moves from 0 to 1 */
86 struct timeval stop; /* timer stopped when numOutstandingRequests
87 * moves from 1 to 0 */
88 RF_uint64 sum_io_us; /* total time timer is enabled */
89 RF_uint64 num_ios; /* total number of ios processed by RAIDframe */
90 long num_out_ios; /* number of outstanding ios */
91 };
92
93 struct RF_Raid_s {
94 /* This portion never changes, and can be accessed without locking */
95 /* an exception is Disks[][].status, which requires locking when it is
96 * changed */
97 u_int numRow; /* number of rows of disks, typically == # of
98 * ranks */
99 u_int numCol; /* number of columns of disks, typically == #
100 * of disks/rank */
101 u_int numSpare; /* number of spare disks */
102 int maxQueueDepth; /* max disk queue depth */
103 RF_SectorCount_t totalSectors; /* total number of sectors in the
104 * array */
105 RF_SectorCount_t sectorsPerDisk; /* number of sectors on each
106 * disk */
107 u_int logBytesPerSector; /* base-2 log of the number of bytes
108 * in a sector */
109 u_int bytesPerSector; /* bytes in a sector */
110 RF_int32 sectorMask; /* mask of bytes-per-sector */
111
112 RF_RaidLayout_t Layout; /* all information related to layout */
113 RF_RaidDisk_t **Disks; /* all information related to physical disks */
114 RF_DiskQueue_t **Queues;/* all information related to disk queues */
115 /* NOTE: This is an anchor point via which the queues can be
116 * accessed, but the enqueue/dequeue routines in diskqueue.c use a
117 * local copy of this pointer for the actual accesses. */
118 /* The remainder of the structure can change, and therefore requires
119 * locking on reads and updates */
120 RF_DECLARE_MUTEX(mutex) /* mutex used to serialize access to
121 * the fields below */
122 RF_RowStatus_t *status; /* the status of each row in the array */
123 int valid; /* indicates successful configuration */
124 RF_LockTableEntry_t *lockTable; /* stripe-lock table */
125 RF_LockTableEntry_t *quiesceLock; /* quiesnce table */
126 int numFailures; /* total number of failures in the array */
127
128 /*
129 * Cleanup stuff
130 */
131 RF_ShutdownList_t *shutdownList; /* shutdown activities */
132 RF_AllocListElem_t *cleanupList; /* memory to be freed at
133 * shutdown time */
134
135 /*
136 * Recon stuff
137 */
138 RF_HeadSepLimit_t headSepLimit;
139 int numFloatingReconBufs;
140 int reconInProgress;
141 RF_DECLARE_COND(waitForReconCond)
142 RF_RaidReconDesc_t *reconDesc; /* reconstruction descriptor */
143 RF_ReconCtrl_t **reconControl; /* reconstruction control structure
144 * pointers for each row in the array */
145
146 /*
147 * Array-quiescence stuff
148 */
149 RF_DECLARE_MUTEX(access_suspend_mutex)
150 RF_DECLARE_COND(quiescent_cond)
151 RF_IoCount_t accesses_suspended;
152 RF_IoCount_t accs_in_flight;
153 int access_suspend_release;
154 int waiting_for_quiescence;
155 RF_CallbackDesc_t *quiesce_wait_list;
156
157 /*
158 * Statistics
159 */
160 #if !defined(_KERNEL) && !defined(SIMULATE)
161 RF_ThroughputStats_t throughputstats;
162 #endif /* !KERNEL && !SIMULATE */
163 RF_CumulativeStats_t userstats;
164
165 /*
166 * Engine thread control
167 */
168 RF_DECLARE_MUTEX(node_queue_mutex)
169 RF_DECLARE_COND(node_queue_cond)
170 RF_DagNode_t *node_queue;
171 RF_Thread_t engine_thread;
172 RF_ThreadGroup_t engine_tg;
173 int shutdown_engine;
174 int dags_in_flight; /* debug */
175
176 /*
177 * PSS (Parity Stripe Status) stuff
178 */
179 RF_FreeList_t *pss_freelist;
180 long pssTableSize;
181
182 /*
183 * Reconstruction stuff
184 */
185 int procsInBufWait;
186 int numFullReconBuffers;
187 RF_AccTraceEntry_t *recon_tracerecs;
188 unsigned long accumXorTimeUs;
189 RF_ReconDoneProc_t *recon_done_procs;
190 RF_DECLARE_MUTEX(recon_done_proc_mutex)
191 /*
192 * nAccOutstanding, waitShutdown protected by desc freelist lock
193 * (This may seem strange, since that's a central serialization point
194 * for a per-array piece of data, but otherwise, it'd be an extra
195 * per-array lock, and that'd only be less efficient...)
196 */
197 RF_DECLARE_COND(outstandingCond)
198 int waitShutdown;
199 int nAccOutstanding;
200
201 RF_DiskId_t **diskids;
202 RF_DiskId_t *sparediskids;
203
204 int raidid;
205 RF_AccTotals_t acc_totals;
206 int keep_acc_totals;
207
208 struct raidcinfo **raid_cinfo; /* array of component info */
209 struct proc *proc; /* XXX shouldn't be needed here.. :-p */
210
211 int terminate_disk_queues;
212
213 /*
214 * XXX
215 *
216 * config-specific information should be moved
217 * somewhere else, or at least hung off this
218 * in some generic way
219 */
220
221 /* used by rf_compute_workload_shift */
222 RF_RowCol_t hist_diskreq[RF_MAXROW][RF_MAXCOL];
223
224 /* used by declustering */
225 int noRotate;
226
227 #if RF_INCLUDE_PARITYLOGGING > 0
228 /* used by parity logging */
229 RF_SectorCount_t regionLogCapacity;
230 RF_ParityLogQueue_t parityLogPool; /* pool of unused parity logs */
231 RF_RegionInfo_t *regionInfo; /* array of region state */
232 int numParityLogs;
233 int numSectorsPerLog;
234 int regionParityRange;
235 int logsInUse; /* debugging */
236 RF_ParityLogDiskQueue_t parityLogDiskQueue; /* state of parity
237 * logging disk work */
238 RF_RegionBufferQueue_t regionBufferPool; /* buffers for holding
239 * region log */
240 RF_RegionBufferQueue_t parityBufferPool; /* buffers for holding
241 * parity */
242 caddr_t parityLogBufferHeap; /* pool of unused parity logs */
243 RF_Thread_t pLogDiskThreadHandle;
244
245 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
246 };
247 #endif /* !_RF__RF_RAID_H_ */
248