rf_layout.h revision 1.1 1 /* $NetBSD: rf_layout.h,v 1.1 1998/11/13 04:20:30 oster Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: Mark Holland
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29 /* rf_layout.h -- header file defining layout data structures
30 */
31
32 /*
33 * :
34 * Log: rf_layout.h,v
35 * Revision 1.50 1996/11/05 21:10:40 jimz
36 * failed pda generalization
37 *
38 * Revision 1.49 1996/07/29 14:05:12 jimz
39 * fix numPUs/numRUs confusion (everything is now numRUs)
40 * clean up some commenting, return values
41 *
42 * Revision 1.48 1996/07/22 19:52:16 jimz
43 * switched node params to RF_DagParam_t, a union of
44 * a 64-bit int and a void *, for better portability
45 * attempted hpux port, but failed partway through for
46 * lack of a single C compiler capable of compiling all
47 * source files
48 *
49 * Revision 1.47 1996/07/18 22:57:14 jimz
50 * port simulator to AIX
51 *
52 * Revision 1.46 1996/07/13 00:00:59 jimz
53 * sanitized generalized reconstruction architecture
54 * cleaned up head sep, rbuf problems
55 *
56 * Revision 1.45 1996/07/11 19:08:00 jimz
57 * generalize reconstruction mechanism
58 * allow raid1 reconstructs via copyback (done with array
59 * quiesced, not online, therefore not disk-directed)
60 *
61 * Revision 1.44 1996/06/19 22:23:01 jimz
62 * parity verification is now a layout-configurable thing
63 * not all layouts currently support it (correctly, anyway)
64 *
65 * Revision 1.43 1996/06/19 17:53:48 jimz
66 * move GetNumSparePUs, InstallSpareTable ops into layout switch
67 *
68 * Revision 1.42 1996/06/19 14:56:48 jimz
69 * move layout-specific config parsing hooks into RF_LayoutSW_t
70 * table in rf_layout.c
71 *
72 * Revision 1.41 1996/06/10 11:55:47 jimz
73 * Straightened out some per-array/not-per-array distinctions, fixed
74 * a couple bugs related to confusion. Added shutdown lists. Removed
75 * layout shutdown function (now subsumed by shutdown lists).
76 *
77 * Revision 1.40 1996/06/07 22:26:27 jimz
78 * type-ify which_ru (RF_ReconUnitNum_t)
79 *
80 * Revision 1.39 1996/06/07 21:33:04 jimz
81 * begin using consistent types for sector numbers,
82 * stripe numbers, row+col numbers, recon unit numbers
83 *
84 * Revision 1.38 1996/06/03 23:28:26 jimz
85 * more bugfixes
86 * check in tree to sync for IPDS runs with current bugfixes
87 * there still may be a problem with threads in the script test
88 * getting I/Os stuck- not trivially reproducible (runs ~50 times
89 * in a row without getting stuck)
90 *
91 * Revision 1.37 1996/05/31 22:26:54 jimz
92 * fix a lot of mapping problems, memory allocation problems
93 * found some weird lock issues, fixed 'em
94 * more code cleanup
95 *
96 * Revision 1.36 1996/05/30 11:29:41 jimz
97 * Numerous bug fixes. Stripe lock release code disagreed with the taking code
98 * about when stripes should be locked (I made it consistent: no parity, no lock)
99 * There was a lot of extra serialization of I/Os which I've removed- a lot of
100 * it was to calculate values for the cache code, which is no longer with us.
101 * More types, function, macro cleanup. Added code to properly quiesce the array
102 * on shutdown. Made a lot of stuff array-specific which was (bogusly) general
103 * before. Fixed memory allocation, freeing bugs.
104 *
105 * Revision 1.35 1996/05/27 18:56:37 jimz
106 * more code cleanup
107 * better typing
108 * compiles in all 3 environments
109 *
110 * Revision 1.34 1996/05/24 22:17:04 jimz
111 * continue code + namespace cleanup
112 * typed a bunch of flags
113 *
114 * Revision 1.33 1996/05/24 04:28:55 jimz
115 * release cleanup ckpt
116 *
117 * Revision 1.32 1996/05/24 01:59:45 jimz
118 * another checkpoint in code cleanup for release
119 * time to sync kernel tree
120 *
121 * Revision 1.31 1996/05/23 21:46:35 jimz
122 * checkpoint in code cleanup (release prep)
123 * lots of types, function names have been fixed
124 *
125 * Revision 1.30 1996/05/18 19:51:34 jimz
126 * major code cleanup- fix syntax, make some types consistent,
127 * add prototypes, clean out dead code, et cetera
128 *
129 * Revision 1.29 1995/12/01 19:16:19 root
130 * added copyright info
131 *
132 * Revision 1.28 1995/11/28 21:26:49 amiri
133 * defined a declustering flag RF_BD_DECLUSTERED
134 *
135 * Revision 1.27 1995/11/17 19:00:59 wvcii
136 * created MapQ entry in switch table
137 * added prototyping to MapParity
138 *
139 * Revision 1.26 1995/11/07 15:40:27 wvcii
140 * changed prototype of SeclectionFunc in mapsw
141 * function no longer returns numHdrSucc, numTermAnt
142 *
143 * Revision 1.25 1995/10/12 20:57:08 arw
144 * added lots of comments
145 *
146 * Revision 1.24 1995/10/12 16:04:08 jimz
147 * added config name to mapsw
148 *
149 * Revision 1.23 1995/07/26 03:28:31 robby
150 * intermediary checkin
151 *
152 * Revision 1.22 1995/07/10 20:51:08 robby
153 * added to the asm info for the virtual striping locks
154 *
155 * Revision 1.21 1995/07/10 16:57:47 robby
156 * updated alloclistelem struct to the correct struct name
157 *
158 * Revision 1.20 1995/07/08 20:06:11 rachad
159 * *** empty log message ***
160 *
161 * Revision 1.19 1995/07/08 18:05:39 rachad
162 * Linked up Claudsons code with the real cache
163 *
164 * Revision 1.18 1995/07/06 14:29:36 robby
165 * added defaults states list to the layout switch
166 *
167 * Revision 1.17 1995/06/23 13:40:14 robby
168 * updeated to prototypes in rf_layout.h
169 *
170 * Revision 1.16 1995/06/08 22:11:03 holland
171 * bug fixes related to mutiple-row arrays
172 *
173 * Revision 1.15 1995/05/24 21:43:23 wvcii
174 * added field numParityLogCol to RaidLayout
175 *
176 * Revision 1.14 95/05/02 22:46:53 holland
177 * minor code cleanups.
178 *
179 * Revision 1.13 1995/05/02 12:48:01 holland
180 * eliminated some unused code.
181 *
182 * Revision 1.12 1995/05/01 13:28:00 holland
183 * parity range locks, locking disk requests, recon+parityscan in kernel, etc.
184 *
185 * Revision 1.11 1995/03/15 20:01:17 holland
186 * added REMAP and DONT_REMAP
187 *
188 * Revision 1.10 1995/03/09 19:54:11 rachad
189 * Added suport for threadless simulator
190 *
191 * Revision 1.9 1995/03/03 21:48:58 holland
192 * minor changes.
193 *
194 * Revision 1.8 1995/03/01 20:25:48 holland
195 * kernelization changes
196 *
197 * Revision 1.7 1995/02/03 22:31:36 holland
198 * many changes related to kernelization
199 *
200 * Revision 1.6 1995/01/30 14:53:46 holland
201 * extensive changes related to making DoIO non-blocking
202 *
203 * Revision 1.5 1995/01/24 23:58:46 holland
204 * multi-way recon XOR, plus various small changes
205 *
206 * Revision 1.4 1995/01/04 19:28:35 holland
207 * corrected comments around mapsw
208 *
209 * Revision 1.3 1994/11/28 22:15:45 danner
210 * Added type field to the physdiskaddr struct.
211 *
212 */
213
214 #ifndef _RF__RF_LAYOUT_H_
215 #define _RF__RF_LAYOUT_H_
216
217 #include "rf_types.h"
218 #include "rf_archs.h"
219 #include "rf_alloclist.h"
220
221 /*****************************************************************************************
222 *
223 * This structure identifies all layout-specific operations and parameters.
224 *
225 ****************************************************************************************/
226
227 typedef struct RF_LayoutSW_s {
228 RF_ParityConfig_t parityConfig;
229 char *configName;
230
231 #ifndef KERNEL
232 /* layout-specific parsing */
233 int (*MakeLayoutSpecific)(FILE *fp, RF_Config_t *cfgPtr, void *arg);
234 void *makeLayoutSpecificArg;
235 #endif /* !KERNEL */
236
237 #if RF_UTILITY == 0
238 /* initialization routine */
239 int (*Configure)(RF_ShutdownList_t **shutdownListp, RF_Raid_t *raidPtr, RF_Config_t *cfgPtr);
240
241 /* routine to map RAID sector address -> physical (row, col, offset) */
242 void (*MapSector)(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
243 RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap);
244
245 /* routine to map RAID sector address -> physical (r,c,o) of parity unit */
246 void (*MapParity)(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
247 RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap);
248
249 /* routine to map RAID sector address -> physical (r,c,o) of Q unit */
250 void (*MapQ)(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, RF_RowCol_t *row,
251 RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap);
252
253 /* routine to identify the disks comprising a stripe */
254 void (*IdentifyStripe)(RF_Raid_t *raidPtr, RF_RaidAddr_t addr,
255 RF_RowCol_t **diskids, RF_RowCol_t *outRow);
256
257 /* routine to select a dag */
258 void (*SelectionFunc)(RF_Raid_t *raidPtr, RF_IoType_t type,
259 RF_AccessStripeMap_t *asmap,
260 RF_VoidFuncPtr *);
261 #if 0
262 void (**createFunc)(RF_Raid_t *,
263 RF_AccessStripeMap_t *,
264 RF_DagHeader_t *, void *,
265 RF_RaidAccessFlags_t,
266 RF_AllocListElem_t *));
267
268 #endif
269
270 /* map a stripe ID to a parity stripe ID. This is typically the identity mapping */
271 void (*MapSIDToPSID)(RF_RaidLayout_t *layoutPtr, RF_StripeNum_t stripeID,
272 RF_StripeNum_t *psID, RF_ReconUnitNum_t *which_ru);
273
274 /* get default head separation limit (may be NULL) */
275 RF_HeadSepLimit_t (*GetDefaultHeadSepLimit)(RF_Raid_t *raidPtr);
276
277 /* get default num recon buffers (may be NULL) */
278 int (*GetDefaultNumFloatingReconBuffers)(RF_Raid_t *raidPtr);
279
280 /* get number of spare recon units (may be NULL) */
281 RF_ReconUnitCount_t (*GetNumSpareRUs)(RF_Raid_t *raidPtr);
282
283 /* spare table installation (may be NULL) */
284 int (*InstallSpareTable)(RF_Raid_t *raidPtr, RF_RowCol_t frow, RF_RowCol_t fcol);
285
286 /* recon buffer submission function */
287 int (*SubmitReconBuffer)(RF_ReconBuffer_t *rbuf, int keep_it,
288 int use_committed);
289
290 /*
291 * verify that parity information for a stripe is correct
292 * see rf_parityscan.h for return vals
293 */
294 int (*VerifyParity)(RF_Raid_t *raidPtr, RF_RaidAddr_t raidAddr,
295 RF_PhysDiskAddr_t *parityPDA, int correct_it, RF_RaidAccessFlags_t flags);
296
297 /* number of faults tolerated by this mapping */
298 int faultsTolerated;
299
300 /* states to step through in an access. Must end with "LastState".
301 * The default is DefaultStates in rf_layout.c */
302 RF_AccessState_t *states;
303
304 RF_AccessStripeMapFlags_t flags;
305 #endif /* RF_UTILITY == 0 */
306 } RF_LayoutSW_t;
307
308 /* enables remapping to spare location under dist sparing */
309 #define RF_REMAP 1
310 #define RF_DONT_REMAP 0
311
312 /*
313 * Flags values for RF_AccessStripeMapFlags_t
314 */
315 #define RF_NO_STRIPE_LOCKS 0x0001 /* suppress stripe locks */
316 #define RF_DISTRIBUTE_SPARE 0x0002 /* distribute spare space in archs that support it */
317 #define RF_BD_DECLUSTERED 0x0004 /* declustering uses block designs */
318
319 /*************************************************************************
320 *
321 * this structure forms the layout component of the main Raid
322 * structure. It describes everything needed to define and perform
323 * the mapping of logical RAID addresses <-> physical disk addresses.
324 *
325 *************************************************************************/
326 struct RF_RaidLayout_s {
327 /* configuration parameters */
328 RF_SectorCount_t sectorsPerStripeUnit; /* number of sectors in one stripe unit */
329 RF_StripeCount_t SUsPerPU; /* stripe units per parity unit */
330 RF_StripeCount_t SUsPerRU; /* stripe units per reconstruction unit */
331
332 /* redundant-but-useful info computed from the above, used in all layouts */
333 RF_StripeCount_t numStripe; /* total number of stripes in the array */
334 RF_SectorCount_t dataSectorsPerStripe;
335 RF_StripeCount_t dataStripeUnitsPerDisk;
336 u_int bytesPerStripeUnit;
337 u_int dataBytesPerStripe;
338 RF_StripeCount_t numDataCol; /* number of SUs of data per stripe (name here is a la RAID4) */
339 RF_StripeCount_t numParityCol; /* number of SUs of parity per stripe. Always 1 for now */
340 RF_StripeCount_t numParityLogCol; /* number of SUs of parity log per stripe. Always 1 for now */
341 RF_StripeCount_t stripeUnitsPerDisk;
342
343 RF_LayoutSW_t *map; /* ptr to struct holding mapping fns and information */
344 void *layoutSpecificInfo; /* ptr to a structure holding layout-specific params */
345 };
346
347 /*****************************************************************************************
348 *
349 * The mapping code returns a pointer to a list of AccessStripeMap structures, which
350 * describes all the mapping information about an access. The list contains one
351 * AccessStripeMap structure per stripe touched by the access. Each element in the list
352 * contains a stripe identifier and a pointer to a list of PhysDiskAddr structuress. Each
353 * element in this latter list describes the physical location of a stripe unit accessed
354 * within the corresponding stripe.
355 *
356 ****************************************************************************************/
357
358 #define RF_PDA_TYPE_DATA 0
359 #define RF_PDA_TYPE_PARITY 1
360 #define RF_PDA_TYPE_Q 2
361
362 struct RF_PhysDiskAddr_s {
363 RF_RowCol_t row,col; /* disk identifier */
364 RF_SectorNum_t startSector; /* sector offset into the disk */
365 RF_SectorCount_t numSector; /* number of sectors accessed */
366 int type; /* used by higher levels: currently, data, parity, or q */
367 caddr_t bufPtr; /* pointer to buffer supplying/receiving data */
368 RF_RaidAddr_t raidAddress; /* raid address corresponding to this physical disk address */
369 RF_PhysDiskAddr_t *next;
370 };
371
372 #define RF_MAX_FAILED_PDA RF_MAXCOL
373
374 struct RF_AccessStripeMap_s {
375 RF_StripeNum_t stripeID; /* the stripe index */
376 RF_RaidAddr_t raidAddress; /* the starting raid address within this stripe */
377 RF_RaidAddr_t endRaidAddress; /* raid address one sector past the end of the access */
378 RF_SectorCount_t totalSectorsAccessed; /* total num sectors identified in physInfo list */
379 RF_StripeCount_t numStripeUnitsAccessed; /* total num elements in physInfo list */
380 int numDataFailed; /* number of failed data disks accessed */
381 int numParityFailed; /* number of failed parity disks accessed (0 or 1) */
382 int numQFailed; /* number of failed Q units accessed (0 or 1) */
383 RF_AccessStripeMapFlags_t flags; /* various flags */
384 #if 0
385 RF_PhysDiskAddr_t *failedPDA; /* points to the PDA that has failed */
386 RF_PhysDiskAddr_t *failedPDAtwo; /* points to the second PDA that has failed, if any */
387 #else
388 int numFailedPDAs; /* number of failed phys addrs */
389 RF_PhysDiskAddr_t *failedPDAs[RF_MAX_FAILED_PDA]; /* array of failed phys addrs */
390 #endif
391 RF_PhysDiskAddr_t *physInfo; /* a list of PhysDiskAddr structs */
392 RF_PhysDiskAddr_t *parityInfo; /* list of physical addrs for the parity (P of P + Q ) */
393 RF_PhysDiskAddr_t *qInfo; /* list of physical addrs for the Q of P + Q */
394 RF_LockReqDesc_t lockReqDesc; /* used for stripe locking */
395 RF_RowCol_t origRow; /* the original row: we may redirect the acc to a different row */
396 RF_AccessStripeMap_t *next;
397 };
398
399 /* flag values */
400 #define RF_ASM_REDIR_LARGE_WRITE 0x00000001 /* allows large-write creation code to redirect failed accs */
401 #define RF_ASM_BAILOUT_DAG_USED 0x00000002 /* allows us to detect recursive calls to the bailout write dag */
402 #define RF_ASM_FLAGS_LOCK_TRIED 0x00000004 /* we've acquired the lock on the first parity range in this parity stripe */
403 #define RF_ASM_FLAGS_LOCK_TRIED2 0x00000008 /* we've acquired the lock on the 2nd parity range in this parity stripe */
404 #define RF_ASM_FLAGS_FORCE_TRIED 0x00000010 /* we've done the force-recon call on this parity stripe */
405 #define RF_ASM_FLAGS_RECON_BLOCKED 0x00000020 /* we blocked recon => we must unblock it later */
406
407 struct RF_AccessStripeMapHeader_s {
408 RF_StripeCount_t numStripes; /* total number of stripes touched by this acc */
409 RF_AccessStripeMap_t *stripeMap; /* pointer to the actual map. Also used for making lists */
410 RF_AccessStripeMapHeader_t *next;
411 };
412
413 /*****************************************************************************************
414 *
415 * various routines mapping addresses in the RAID address space. These work across
416 * all layouts. DON'T PUT ANY LAYOUT-SPECIFIC CODE HERE.
417 *
418 ****************************************************************************************/
419
420 /* return the identifier of the stripe containing the given address */
421 #define rf_RaidAddressToStripeID(_layoutPtr_, _addr_) \
422 ( ((_addr_) / (_layoutPtr_)->sectorsPerStripeUnit) / (_layoutPtr_)->numDataCol )
423
424 /* return the raid address of the start of the indicates stripe ID */
425 #define rf_StripeIDToRaidAddress(_layoutPtr_, _sid_) \
426 ( ((_sid_) * (_layoutPtr_)->sectorsPerStripeUnit) * (_layoutPtr_)->numDataCol )
427
428 /* return the identifier of the stripe containing the given stripe unit id */
429 #define rf_StripeUnitIDToStripeID(_layoutPtr_, _addr_) \
430 ( (_addr_) / (_layoutPtr_)->numDataCol )
431
432 /* return the identifier of the stripe unit containing the given address */
433 #define rf_RaidAddressToStripeUnitID(_layoutPtr_, _addr_) \
434 ( ((_addr_) / (_layoutPtr_)->sectorsPerStripeUnit) )
435
436 /* return the RAID address of next stripe boundary beyond the given address */
437 #define rf_RaidAddressOfNextStripeBoundary(_layoutPtr_, _addr_) \
438 ( (((_addr_)/(_layoutPtr_)->dataSectorsPerStripe)+1) * (_layoutPtr_)->dataSectorsPerStripe )
439
440 /* return the RAID address of the start of the stripe containing the given address */
441 #define rf_RaidAddressOfPrevStripeBoundary(_layoutPtr_, _addr_) \
442 ( (((_addr_)/(_layoutPtr_)->dataSectorsPerStripe)+0) * (_layoutPtr_)->dataSectorsPerStripe )
443
444 /* return the RAID address of next stripe unit boundary beyond the given address */
445 #define rf_RaidAddressOfNextStripeUnitBoundary(_layoutPtr_, _addr_) \
446 ( (((_addr_)/(_layoutPtr_)->sectorsPerStripeUnit)+1L)*(_layoutPtr_)->sectorsPerStripeUnit )
447
448 /* return the RAID address of the start of the stripe unit containing RAID address _addr_ */
449 #define rf_RaidAddressOfPrevStripeUnitBoundary(_layoutPtr_, _addr_) \
450 ( (((_addr_)/(_layoutPtr_)->sectorsPerStripeUnit)+0)*(_layoutPtr_)->sectorsPerStripeUnit )
451
452 /* returns the offset into the stripe. used by RaidAddressStripeAligned */
453 #define rf_RaidAddressStripeOffset(_layoutPtr_, _addr_) \
454 ( (_addr_) % ((_layoutPtr_)->dataSectorsPerStripe) )
455
456 /* returns the offset into the stripe unit. */
457 #define rf_StripeUnitOffset(_layoutPtr_, _addr_) \
458 ( (_addr_) % ((_layoutPtr_)->sectorsPerStripeUnit) )
459
460 /* returns nonzero if the given RAID address is stripe-aligned */
461 #define rf_RaidAddressStripeAligned( __layoutPtr__, __addr__ ) \
462 ( rf_RaidAddressStripeOffset(__layoutPtr__, __addr__) == 0 )
463
464 /* returns nonzero if the given address is stripe-unit aligned */
465 #define rf_StripeUnitAligned( __layoutPtr__, __addr__ ) \
466 ( rf_StripeUnitOffset(__layoutPtr__, __addr__) == 0 )
467
468 /* convert an address expressed in RAID blocks to/from an addr expressed in bytes */
469 #define rf_RaidAddressToByte(_raidPtr_, _addr_) \
470 ( (_addr_) << ( (_raidPtr_)->logBytesPerSector ) )
471
472 #define rf_ByteToRaidAddress(_raidPtr_, _addr_) \
473 ( (_addr_) >> ( (_raidPtr_)->logBytesPerSector ) )
474
475 /* convert a raid address to/from a parity stripe ID. Conversion to raid address is easy,
476 * since we're asking for the address of the first sector in the parity stripe. Conversion to a
477 * parity stripe ID is more complex, since stripes are not contiguously allocated in
478 * parity stripes.
479 */
480 #define rf_RaidAddressToParityStripeID(_layoutPtr_, _addr_, _ru_num_) \
481 rf_MapStripeIDToParityStripeID( (_layoutPtr_), rf_RaidAddressToStripeID( (_layoutPtr_), (_addr_) ), (_ru_num_) )
482
483 #define rf_ParityStripeIDToRaidAddress(_layoutPtr_, _psid_) \
484 ( (_psid_) * (_layoutPtr_)->SUsPerPU * (_layoutPtr_)->numDataCol * (_layoutPtr_)->sectorsPerStripeUnit )
485
486 RF_LayoutSW_t *rf_GetLayout(RF_ParityConfig_t parityConfig);
487 int rf_ConfigureLayout(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
488 RF_Config_t *cfgPtr);
489 RF_StripeNum_t rf_MapStripeIDToParityStripeID(RF_RaidLayout_t *layoutPtr,
490 RF_StripeNum_t stripeID, RF_ReconUnitNum_t *which_ru);
491
492 #endif /* !_RF__RF_LAYOUT_H_ */
493