rf_reconstruct.h revision 1.1 1 1.1 oster /* $NetBSD: rf_reconstruct.h,v 1.1 1998/11/13 04:20:34 oster Exp $ */
2 1.1 oster /*
3 1.1 oster * Copyright (c) 1995 Carnegie-Mellon University.
4 1.1 oster * All rights reserved.
5 1.1 oster *
6 1.1 oster * Author: Mark Holland
7 1.1 oster *
8 1.1 oster * Permission to use, copy, modify and distribute this software and
9 1.1 oster * its documentation is hereby granted, provided that both the copyright
10 1.1 oster * notice and this permission notice appear in all copies of the
11 1.1 oster * software, derivative works or modified versions, and any portions
12 1.1 oster * thereof, and that both notices appear in supporting documentation.
13 1.1 oster *
14 1.1 oster * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 1.1 oster * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 1.1 oster * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 1.1 oster *
18 1.1 oster * Carnegie Mellon requests users of this software to return to
19 1.1 oster *
20 1.1 oster * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 1.1 oster * School of Computer Science
22 1.1 oster * Carnegie Mellon University
23 1.1 oster * Pittsburgh PA 15213-3890
24 1.1 oster *
25 1.1 oster * any improvements or extensions that they make and grant Carnegie the
26 1.1 oster * rights to redistribute these changes.
27 1.1 oster */
28 1.1 oster
29 1.1 oster /*********************************************************
30 1.1 oster * rf_reconstruct.h -- header file for reconstruction code
31 1.1 oster *********************************************************/
32 1.1 oster
33 1.1 oster /* :
34 1.1 oster * Log: rf_reconstruct.h,v
35 1.1 oster * Revision 1.25 1996/08/01 15:57:24 jimz
36 1.1 oster * minor cleanup
37 1.1 oster *
38 1.1 oster * Revision 1.24 1996/07/22 19:52:16 jimz
39 1.1 oster * switched node params to RF_DagParam_t, a union of
40 1.1 oster * a 64-bit int and a void *, for better portability
41 1.1 oster * attempted hpux port, but failed partway through for
42 1.1 oster * lack of a single C compiler capable of compiling all
43 1.1 oster * source files
44 1.1 oster *
45 1.1 oster * Revision 1.23 1996/07/15 05:40:41 jimz
46 1.1 oster * some recon datastructure cleanup
47 1.1 oster * better handling of multiple failures
48 1.1 oster * added undocumented double-recon test
49 1.1 oster *
50 1.1 oster * Revision 1.22 1996/07/13 00:00:59 jimz
51 1.1 oster * sanitized generalized reconstruction architecture
52 1.1 oster * cleaned up head sep, rbuf problems
53 1.1 oster *
54 1.1 oster * Revision 1.21 1996/07/11 19:08:00 jimz
55 1.1 oster * generalize reconstruction mechanism
56 1.1 oster * allow raid1 reconstructs via copyback (done with array
57 1.1 oster * quiesced, not online, therefore not disk-directed)
58 1.1 oster *
59 1.1 oster * Revision 1.20 1996/06/11 10:57:30 jimz
60 1.1 oster * add rf_RegisterReconDoneProc
61 1.1 oster *
62 1.1 oster * Revision 1.19 1996/06/10 11:55:47 jimz
63 1.1 oster * Straightened out some per-array/not-per-array distinctions, fixed
64 1.1 oster * a couple bugs related to confusion. Added shutdown lists. Removed
65 1.1 oster * layout shutdown function (now subsumed by shutdown lists).
66 1.1 oster *
67 1.1 oster * Revision 1.18 1996/06/07 21:33:04 jimz
68 1.1 oster * begin using consistent types for sector numbers,
69 1.1 oster * stripe numbers, row+col numbers, recon unit numbers
70 1.1 oster *
71 1.1 oster * Revision 1.17 1996/06/05 18:06:02 jimz
72 1.1 oster * Major code cleanup. The Great Renaming is now done.
73 1.1 oster * Better modularity. Better typing. Fixed a bunch of
74 1.1 oster * synchronization bugs. Made a lot of global stuff
75 1.1 oster * per-desc or per-array. Removed dead code.
76 1.1 oster *
77 1.1 oster * Revision 1.16 1996/06/03 23:28:26 jimz
78 1.1 oster * more bugfixes
79 1.1 oster * check in tree to sync for IPDS runs with current bugfixes
80 1.1 oster * there still may be a problem with threads in the script test
81 1.1 oster * getting I/Os stuck- not trivially reproducible (runs ~50 times
82 1.1 oster * in a row without getting stuck)
83 1.1 oster *
84 1.1 oster * Revision 1.15 1996/06/02 17:31:48 jimz
85 1.1 oster * Moved a lot of global stuff into array structure, where it belongs.
86 1.1 oster * Fixed up paritylogging, pss modules in this manner. Some general
87 1.1 oster * code cleanup. Removed lots of dead code, some dead files.
88 1.1 oster *
89 1.1 oster * Revision 1.14 1996/05/31 22:26:54 jimz
90 1.1 oster * fix a lot of mapping problems, memory allocation problems
91 1.1 oster * found some weird lock issues, fixed 'em
92 1.1 oster * more code cleanup
93 1.1 oster *
94 1.1 oster * Revision 1.13 1996/05/30 11:29:41 jimz
95 1.1 oster * Numerous bug fixes. Stripe lock release code disagreed with the taking code
96 1.1 oster * about when stripes should be locked (I made it consistent: no parity, no lock)
97 1.1 oster * There was a lot of extra serialization of I/Os which I've removed- a lot of
98 1.1 oster * it was to calculate values for the cache code, which is no longer with us.
99 1.1 oster * More types, function, macro cleanup. Added code to properly quiesce the array
100 1.1 oster * on shutdown. Made a lot of stuff array-specific which was (bogusly) general
101 1.1 oster * before. Fixed memory allocation, freeing bugs.
102 1.1 oster *
103 1.1 oster * Revision 1.12 1996/05/27 18:56:37 jimz
104 1.1 oster * more code cleanup
105 1.1 oster * better typing
106 1.1 oster * compiles in all 3 environments
107 1.1 oster *
108 1.1 oster * Revision 1.11 1996/05/23 21:46:35 jimz
109 1.1 oster * checkpoint in code cleanup (release prep)
110 1.1 oster * lots of types, function names have been fixed
111 1.1 oster *
112 1.1 oster * Revision 1.10 1996/05/18 19:51:34 jimz
113 1.1 oster * major code cleanup- fix syntax, make some types consistent,
114 1.1 oster * add prototypes, clean out dead code, et cetera
115 1.1 oster *
116 1.1 oster * Revision 1.9 1995/12/06 15:04:55 root
117 1.1 oster * added copyright info
118 1.1 oster *
119 1.1 oster */
120 1.1 oster
121 1.1 oster #ifndef _RF__RF_RECONSTRUCT_H_
122 1.1 oster #define _RF__RF_RECONSTRUCT_H_
123 1.1 oster
124 1.1 oster #include "rf_types.h"
125 1.1 oster #include <sys/time.h>
126 1.1 oster #include "rf_reconmap.h"
127 1.1 oster #include "rf_psstatus.h"
128 1.1 oster
129 1.1 oster /* reconstruction configuration information */
130 1.1 oster struct RF_ReconConfig_s {
131 1.1 oster unsigned numFloatingReconBufs; /* number of floating recon bufs to use */
132 1.1 oster RF_HeadSepLimit_t headSepLimit; /* how far apart the heads are allow to become, in parity stripes */
133 1.1 oster };
134 1.1 oster
135 1.1 oster /* a reconstruction buffer */
136 1.1 oster struct RF_ReconBuffer_s {
137 1.1 oster RF_Raid_t *raidPtr; /* void * to avoid recursive includes */
138 1.1 oster caddr_t buffer; /* points to the data */
139 1.1 oster RF_StripeNum_t parityStripeID; /* the parity stripe that this data relates to */
140 1.1 oster int which_ru; /* which reconstruction unit within the PSS */
141 1.1 oster RF_SectorNum_t failedDiskSectorOffset;/* the offset into the failed disk */
142 1.1 oster RF_RowCol_t row, col; /* which disk this buffer belongs to or is targeted at */
143 1.1 oster RF_StripeCount_t count; /* counts the # of SUs installed so far */
144 1.1 oster int priority; /* used to force hi priority recon */
145 1.1 oster RF_RbufType_t type; /* FORCED or FLOATING */
146 1.1 oster char *arrived; /* [x] = 1/0 if SU from disk x has/hasn't arrived */
147 1.1 oster RF_ReconBuffer_t *next; /* used for buffer management */
148 1.1 oster void *arg; /* generic field for general use */
149 1.1 oster RF_RowCol_t spRow, spCol; /* spare disk to which this buf should be written */
150 1.1 oster /* if dist sparing off, always identifies the replacement disk */
151 1.1 oster RF_SectorNum_t spOffset; /* offset into the spare disk */
152 1.1 oster /* if dist sparing off, identical to failedDiskSectorOffset */
153 1.1 oster RF_ReconParityStripeStatus_t *pssPtr; /* debug- pss associated with issue-pending write */
154 1.1 oster };
155 1.1 oster
156 1.1 oster /* a reconstruction event descriptor. The event types currently are:
157 1.1 oster * RF_REVENT_READDONE -- a read operation has completed
158 1.1 oster * RF_REVENT_WRITEDONE -- a write operation has completed
159 1.1 oster * RF_REVENT_BUFREADY -- the buffer manager has produced a full buffer
160 1.1 oster * RF_REVENT_BLOCKCLEAR -- a reconstruction blockage has been cleared
161 1.1 oster * RF_REVENT_BUFCLEAR -- the buffer manager has released a process blocked on submission
162 1.1 oster * RF_REVENT_SKIP -- we need to skip the current RU and go on to the next one, typ. b/c we found recon forced
163 1.1 oster * RF_REVENT_FORCEDREADONE- a forced-reconstructoin read operation has completed
164 1.1 oster */
165 1.1 oster typedef enum RF_Revent_e {
166 1.1 oster RF_REVENT_READDONE,
167 1.1 oster RF_REVENT_WRITEDONE,
168 1.1 oster RF_REVENT_BUFREADY,
169 1.1 oster RF_REVENT_BLOCKCLEAR,
170 1.1 oster RF_REVENT_BUFCLEAR,
171 1.1 oster RF_REVENT_HEADSEPCLEAR,
172 1.1 oster RF_REVENT_SKIP,
173 1.1 oster RF_REVENT_FORCEDREADDONE
174 1.1 oster } RF_Revent_t;
175 1.1 oster
176 1.1 oster struct RF_ReconEvent_s {
177 1.1 oster RF_Revent_t type; /* what kind of event has occurred */
178 1.1 oster RF_RowCol_t col; /* row ID is implicit in the queue in which the event is placed */
179 1.1 oster void *arg; /* a generic argument */
180 1.1 oster RF_ReconEvent_t *next;
181 1.1 oster };
182 1.1 oster
183 1.1 oster /*
184 1.1 oster * Reconstruction control information maintained per-disk
185 1.1 oster * (for surviving disks)
186 1.1 oster */
187 1.1 oster struct RF_PerDiskReconCtrl_s {
188 1.1 oster RF_ReconCtrl_t *reconCtrl;
189 1.1 oster RF_RowCol_t row, col; /* to make this structure self-identifying */
190 1.1 oster RF_StripeNum_t curPSID; /* the next parity stripe ID to check on this disk */
191 1.1 oster RF_HeadSepLimit_t headSepCounter; /* counter used to control maximum head separation */
192 1.1 oster RF_SectorNum_t diskOffset; /* the offset into the indicated disk of the current PU */
193 1.1 oster RF_ReconUnitNum_t ru_count; /* this counts off the recon units within each parity unit */
194 1.1 oster RF_ReconBuffer_t *rbuf; /* the recon buffer assigned to this disk */
195 1.1 oster };
196 1.1 oster
197 1.1 oster /* main reconstruction control structure */
198 1.1 oster struct RF_ReconCtrl_s {
199 1.1 oster RF_RaidReconDesc_t *reconDesc;
200 1.1 oster RF_RowCol_t fcol; /* which column has failed */
201 1.1 oster RF_PerDiskReconCtrl_t *perDiskInfo; /* information maintained per-disk */
202 1.1 oster RF_ReconMap_t *reconMap; /* map of what has/has not been reconstructed */
203 1.1 oster RF_RowCol_t spareRow; /* which of the spare disks we're using */
204 1.1 oster RF_RowCol_t spareCol;
205 1.1 oster RF_StripeNum_t lastPSID; /* the ID of the last parity stripe we want reconstructed */
206 1.1 oster int percentComplete; /* percentage completion of reconstruction */
207 1.1 oster
208 1.1 oster /* reconstruction event queue */
209 1.1 oster RF_ReconEvent_t *eventQueue; /* queue of pending reconstruction events */
210 1.1 oster RF_DECLARE_MUTEX(eq_mutex) /* mutex for locking event queue */
211 1.1 oster RF_DECLARE_COND(eq_cond) /* condition variable for signalling recon events */
212 1.1 oster int eq_count; /* debug only */
213 1.1 oster
214 1.1 oster /* reconstruction buffer management */
215 1.1 oster RF_DECLARE_MUTEX(rb_mutex) /* mutex for messing around with recon buffers */
216 1.1 oster RF_ReconBuffer_t *floatingRbufs; /* available floating reconstruction buffers */
217 1.1 oster RF_ReconBuffer_t *committedRbufs; /* recon buffers that have been committed to some waiting disk */
218 1.1 oster RF_ReconBuffer_t *fullBufferList; /* full buffers waiting to be written out */
219 1.1 oster RF_ReconBuffer_t *priorityList; /* full buffers that have been elevated to higher priority */
220 1.1 oster RF_CallbackDesc_t *bufferWaitList; /* disks that are currently blocked waiting for buffers */
221 1.1 oster
222 1.1 oster /* parity stripe status table */
223 1.1 oster RF_PSStatusHeader_t *pssTable; /* stores the reconstruction status of active parity stripes */
224 1.1 oster
225 1.1 oster /* maximum-head separation control */
226 1.1 oster RF_HeadSepLimit_t minHeadSepCounter; /* the minimum hs counter over all disks */
227 1.1 oster RF_CallbackDesc_t *headSepCBList; /* list of callbacks to be done as minPSID advances */
228 1.1 oster
229 1.1 oster /* performance monitoring */
230 1.1 oster struct timeval starttime; /* recon start time */
231 1.1 oster
232 1.1 oster void (*continueFunc)(void *); /* function to call when io returns*/
233 1.1 oster void *continueArg; /* argument for Func */
234 1.1 oster };
235 1.1 oster
236 1.1 oster /* the default priority for reconstruction accesses */
237 1.1 oster #define RF_IO_RECON_PRIORITY RF_IO_LOW_PRIORITY
238 1.1 oster
239 1.1 oster int rf_ConfigureReconstruction(RF_ShutdownList_t **listp);
240 1.1 oster
241 1.1 oster int rf_ReconstructFailedDisk(RF_Raid_t *raidPtr, RF_RowCol_t row,
242 1.1 oster RF_RowCol_t col);
243 1.1 oster
244 1.1 oster int rf_ReconstructFailedDiskBasic(RF_Raid_t *raidPtr, RF_RowCol_t row,
245 1.1 oster RF_RowCol_t col);
246 1.1 oster
247 1.1 oster int rf_ContinueReconstructFailedDisk(RF_RaidReconDesc_t *reconDesc);
248 1.1 oster
249 1.1 oster int rf_ForceOrBlockRecon(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
250 1.1 oster void (*cbFunc)(RF_Raid_t *,void *), void *cbArg);
251 1.1 oster
252 1.1 oster int rf_UnblockRecon(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap);
253 1.1 oster
254 1.1 oster int rf_RegisterReconDoneProc(RF_Raid_t *raidPtr, void (*proc)(RF_Raid_t *, void *), void *arg,
255 1.1 oster RF_ReconDoneProc_t **handlep);
256 1.1 oster
257 1.1 oster #endif /* !_RF__RF_RECONSTRUCT_H_ */
258