rf_disks.c revision 1.4 1 /* $NetBSD: rf_disks.c,v 1.4 1999/01/26 03:49:49 oster Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: Mark Holland
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29 /***************************************************************
30 * rf_disks.c -- code to perform operations on the actual disks
31 ***************************************************************/
32
33 #include "rf_types.h"
34 #include "rf_raid.h"
35 #include "rf_alloclist.h"
36 #include "rf_utils.h"
37 #include "rf_configure.h"
38 #include "rf_general.h"
39 #if !defined(__NetBSD__)
40 #include "rf_camlayer.h"
41 #endif
42 #include "rf_options.h"
43 #include "rf_sys.h"
44
45 #include <sys/types.h>
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/proc.h>
49 #include <sys/ioctl.h>
50 #include <sys/fcntl.h>
51 #include <sys/vnode.h>
52
53 int raidlookup __P((char *, struct proc *p, struct vnode **));
54
55
56 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
57 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
58
59 /****************************************************************************************
60 *
61 * initialize the disks comprising the array
62 *
63 * We want the spare disks to have regular row,col numbers so that we can easily
64 * substitue a spare for a failed disk. But, the driver code assumes throughout
65 * that the array contains numRow by numCol _non-spare_ disks, so it's not clear
66 * how to fit in the spares. This is an unfortunate holdover from raidSim. The
67 * quick and dirty fix is to make row zero bigger than the rest, and put all the
68 * spares in it. This probably needs to get changed eventually.
69 *
70 ***************************************************************************************/
71 int rf_ConfigureDisks(
72 RF_ShutdownList_t **listp,
73 RF_Raid_t *raidPtr,
74 RF_Config_t *cfgPtr)
75 {
76 RF_RaidDisk_t **disks;
77 RF_SectorCount_t min_numblks = (RF_SectorCount_t)0x7FFFFFFFFFFFLL;
78 RF_RowCol_t r, c;
79 int bs, ret;
80 unsigned i, count, foundone=0, numFailuresThisRow;
81 RF_DiskOp_t *rdcap_op = NULL, *tur_op = NULL;
82 int num_rows_done,num_cols_done;
83
84 struct proc *proc = 0;
85 #ifndef __NetBSD__
86 ret = rf_SCSI_AllocReadCapacity(&rdcap_op);
87 if (ret)
88 goto fail;
89 ret = rf_SCSI_AllocTUR(&tur_op);
90 if (ret)
91 goto fail;
92 #endif /* !__NetBSD__ */
93
94 num_rows_done = 0;
95 num_cols_done = 0;
96
97
98 RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *), (RF_RaidDisk_t **), raidPtr->cleanupList);
99 if (disks == NULL) {
100 ret = ENOMEM;
101 goto fail;
102 }
103 raidPtr->Disks = disks;
104
105
106 proc = raidPtr->proc; /* Blah XXX */
107
108 /* get space for the device-specific stuff... */
109 RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
110 sizeof(struct raidcinfo *), (struct raidcinfo **),
111 raidPtr->cleanupList);
112 if (raidPtr->raid_cinfo == NULL) {
113 ret = ENOMEM;
114 goto fail;
115 }
116
117 for (r=0; r<raidPtr->numRow; r++) {
118 numFailuresThisRow = 0;
119 RF_CallocAndAdd(disks[r], raidPtr->numCol + ((r==0) ? raidPtr->numSpare : 0), sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *), raidPtr->cleanupList);
120 if (disks[r] == NULL) {
121 ret = ENOMEM;
122 goto fail;
123 }
124
125 /* get more space for device specific stuff.. */
126 RF_CallocAndAdd(raidPtr->raid_cinfo[r],
127 raidPtr->numCol + ((r==0) ? raidPtr->numSpare : 0),
128 sizeof(struct raidcinfo), (struct raidcinfo *),
129 raidPtr->cleanupList);
130 if (raidPtr->raid_cinfo[r] == NULL) {
131 ret = ENOMEM;
132 goto fail;
133 }
134
135
136 for (c=0; c<raidPtr->numCol; c++) {
137 ret = rf_ConfigureDisk(raidPtr,&cfgPtr->devnames[r][c][0],
138 &disks[r][c], rdcap_op, tur_op,
139 cfgPtr->devs[r][c],r,c);
140 if (ret)
141 goto fail;
142 if (disks[r][c].status != rf_ds_optimal) {
143 numFailuresThisRow++;
144 }
145 else {
146 if (disks[r][c].numBlocks < min_numblks)
147 min_numblks = disks[r][c].numBlocks;
148 DPRINTF7("Disk at row %d col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n",
149 r,c,disks[r][c].devname,
150 (long int) disks[r][c].numBlocks,
151 disks[r][c].blockSize,
152 (long int) disks[r][c].numBlocks * disks[r][c].blockSize / 1024 / 1024);
153 }
154 num_cols_done++;
155 }
156 /* XXX fix for n-fault tolerant */
157 if (numFailuresThisRow > 0)
158 raidPtr->status[r] = rf_rs_degraded;
159 num_rows_done++;
160 }
161 #if defined(__NetBSD__) && defined(_KERNEL)
162 /* we do nothing */
163 #else
164 rf_SCSI_FreeDiskOp(rdcap_op, 1); rdcap_op = NULL;
165 rf_SCSI_FreeDiskOp(tur_op, 0); tur_op = NULL;
166 #endif
167 /* all disks must be the same size & have the same block size, bs must be a power of 2 */
168 bs = 0;
169 for (foundone=r=0; !foundone && r<raidPtr->numRow; r++) {
170 for (c=0; !foundone && c<raidPtr->numCol; c++) {
171 if (disks[r][c].status == rf_ds_optimal) {
172 bs = disks[r][c].blockSize;
173 foundone = 1;
174 }
175 }
176 }
177 if (!foundone) {
178 RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
179 ret = EINVAL;
180 goto fail;
181 }
182 for (count=0,i=1; i; i<<=1) if (bs & i)
183 count++;
184 if (count != 1) {
185 RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n",bs);
186 ret = EINVAL;
187 goto fail;
188 }
189 for (r=0; r<raidPtr->numRow; r++) {
190 for (c=0; c<raidPtr->numCol; c++) {
191 if (disks[r][c].status == rf_ds_optimal) {
192 if (disks[r][c].blockSize != bs) {
193 RF_ERRORMSG2("Error: block size of disk at r %d c %d different from disk at r 0 c 0\n",r,c);
194 ret = EINVAL;
195 goto fail;
196 }
197 if (disks[r][c].numBlocks != min_numblks) {
198 RF_ERRORMSG3("WARNING: truncating disk at r %d c %d to %d blocks\n",
199 r,c,(int) min_numblks);
200 disks[r][c].numBlocks = min_numblks;
201 }
202 }
203 }
204 }
205
206 raidPtr->sectorsPerDisk = min_numblks;
207 raidPtr->logBytesPerSector = ffs(bs) - 1;
208 raidPtr->bytesPerSector = bs;
209 raidPtr->sectorMask = bs-1;
210 return(0);
211
212 fail:
213
214 #if defined(__NetBSD__) && defined(_KERNEL)
215
216 for(r=0;r<raidPtr->numRow;r++) {
217 for(c=0;c<raidPtr->numCol;c++) {
218 /* Cleanup.. */
219 #ifdef DEBUG
220 printf("Cleaning up row: %d col: %d\n",r,c);
221 #endif
222 if (raidPtr->raid_cinfo[r][c].ci_vp) {
223 (void)vn_close(raidPtr->raid_cinfo[r][c].ci_vp,
224 FREAD|FWRITE, proc->p_ucred, proc);
225 }
226 }
227 }
228 /* Space allocated for raid_vpp will get cleaned up at some other point */
229 /* XXX Need more #ifdefs in the above... */
230
231 #else
232
233 if (rdcap_op) rf_SCSI_FreeDiskOp(rdcap_op, 1);
234 if (tur_op) rf_SCSI_FreeDiskOp(tur_op, 0);
235
236 #endif
237 return(ret);
238 }
239
240
241 /****************************************************************************************
242 * set up the data structures describing the spare disks in the array
243 * recall from the above comment that the spare disk descriptors are stored
244 * in row zero, which is specially expanded to hold them.
245 ***************************************************************************************/
246 int rf_ConfigureSpareDisks(
247 RF_ShutdownList_t **listp,
248 RF_Raid_t *raidPtr,
249 RF_Config_t *cfgPtr)
250 {
251 char buf[256];
252 int r,c,i, ret;
253 RF_DiskOp_t *rdcap_op = NULL, *tur_op = NULL;
254 unsigned bs;
255 RF_RaidDisk_t *disks;
256 int num_spares_done;
257
258 struct proc *proc;
259
260 #ifndef __NetBSD__
261 ret = rf_SCSI_AllocReadCapacity(&rdcap_op);
262 if (ret)
263 goto fail;
264 ret = rf_SCSI_AllocTUR(&tur_op);
265 if (ret)
266 goto fail;
267 #endif /* !__NetBSD__ */
268
269 num_spares_done = 0;
270
271 proc = raidPtr->proc;
272 /* The space for the spares should have already been
273 allocated by ConfigureDisks() */
274
275 disks = &raidPtr->Disks[0][raidPtr->numCol];
276 for (i=0; i<raidPtr->numSpare; i++) {
277 ret = rf_ConfigureDisk(raidPtr,&cfgPtr->spare_names[i][0],
278 &disks[i], rdcap_op, tur_op,
279 cfgPtr->spare_devs[i],0,raidPtr->numCol+i);
280 if (ret)
281 goto fail;
282 if (disks[i].status != rf_ds_optimal) {
283 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",buf);
284 } else {
285 disks[i].status = rf_ds_spare; /* change status to spare */
286 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n",i,
287 disks[i].devname,
288 (long int) disks[i].numBlocks,disks[i].blockSize,
289 (long int) disks[i].numBlocks * disks[i].blockSize / 1024 / 1024);
290 }
291 num_spares_done++;
292 }
293 #if defined(__NetBSD__) && (_KERNEL)
294
295 #else
296 rf_SCSI_FreeDiskOp(rdcap_op, 1); rdcap_op = NULL;
297 rf_SCSI_FreeDiskOp(tur_op, 0); tur_op = NULL;
298 #endif
299
300 /* check sizes and block sizes on spare disks */
301 bs = 1 << raidPtr->logBytesPerSector;
302 for (i=0; i<raidPtr->numSpare; i++) {
303 if (disks[i].blockSize != bs) {
304 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n",disks[i].blockSize, disks[i].devname, bs);
305 ret = EINVAL;
306 goto fail;
307 }
308 if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
309 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
310 disks[i].devname, disks[i].blockSize, (long int)raidPtr->sectorsPerDisk);
311 ret = EINVAL;
312 goto fail;
313 } else if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
314 RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n",disks[i].devname, (long int) raidPtr->sectorsPerDisk);
315
316 disks[i].numBlocks = raidPtr->sectorsPerDisk;
317 }
318 }
319
320 return(0);
321
322 fail:
323 #if defined(__NetBSD__) && defined(_KERNEL)
324
325 /* Release the hold on the main components. We've failed to allocate a
326 spare, and since we're failing, we need to free things.. */
327
328 for(r=0;r<raidPtr->numRow;r++) {
329 for(c=0;c<raidPtr->numCol;c++) {
330 /* Cleanup.. */
331 #ifdef DEBUG
332 printf("Cleaning up row: %d col: %d\n",r,c);
333 #endif
334 if (raidPtr->raid_cinfo[r][c].ci_vp) {
335 (void)vn_close(raidPtr->raid_cinfo[r][c].ci_vp,
336 FREAD|FWRITE, proc->p_ucred, proc);
337 }
338 }
339 }
340
341 for(i=0;i<raidPtr->numSpare;i++) {
342 /* Cleanup.. */
343 #ifdef DEBUG
344 printf("Cleaning up spare: %d\n",i);
345 #endif
346 if (raidPtr->raid_cinfo[0][raidPtr->numCol+i].ci_vp) {
347 (void)vn_close(raidPtr->raid_cinfo[0][raidPtr->numCol+i].ci_vp,
348 FREAD|FWRITE, proc->p_ucred, proc);
349 }
350 }
351
352 #else
353
354 if (rdcap_op) rf_SCSI_FreeDiskOp(rdcap_op, 1);
355 if (tur_op) rf_SCSI_FreeDiskOp(tur_op, 0);
356
357 #endif
358
359 return(ret);
360 }
361
362
363
364 /* configure a single disk in the array */
365 int rf_ConfigureDisk(raidPtr, buf, diskPtr, rdcap_op, tur_op, dev, row, col)
366 RF_Raid_t *raidPtr; /* We need this down here too!! GO */
367 char *buf;
368 RF_RaidDisk_t *diskPtr;
369 RF_DiskOp_t *rdcap_op;
370 RF_DiskOp_t *tur_op;
371 dev_t dev; /* device number used only in kernel */
372 RF_RowCol_t row;
373 RF_RowCol_t col;
374 {
375 char *p;
376 int retcode;
377
378 struct partinfo dpart;
379 struct vnode *vp;
380 struct vattr va;
381 struct proc *proc;
382 int error;
383
384 retcode = 0;
385 p = rf_find_non_white(buf);
386 if (p[strlen(p)-1] == '\n') {
387 /* strip off the newline */
388 p[strlen(p)-1] = '\0';
389 }
390 (void) strcpy(diskPtr->devname, p);
391
392 #ifndef __NetBSD__
393 /* get bus, target, lun */
394 retcode = rf_extract_ids(p, &busid, &targid, &lun);
395 if (retcode)
396 return(retcode);
397
398 /* required in kernel, nop at user level */
399 retcode = rf_SCSI_OpenUnit(dev);
400 if (retcode)
401 return(retcode);
402
403 diskPtr->dev = dev;
404 if (rf_SCSI_DoTUR(tur_op, (u_char)busid, (u_char)targid, (u_char)lun, dev)) {
405 RF_ERRORMSG1("Disk %s failed TUR. Marked as dead.\n",diskPtr->devname);
406 diskPtr->status = rf_ds_failed;
407 } else {
408 diskPtr->status = rf_ds_optimal;
409 retcode = rf_SCSI_DoReadCapacity(raidPtr,rdcap_op, busid, targid, lun, dev,
410 &diskPtr->numBlocks, &diskPtr->blockSize, diskPtr->devname);
411 if (retcode)
412 return(retcode);
413
414 /* we allow the user to specify that only a fraction of the disks should be used
415 * this is just for debug: it speeds up the parity scan
416 */
417 diskPtr->numBlocks = diskPtr->numBlocks * rf_sizePercentage / 100;
418 }
419 #endif
420
421 proc = raidPtr->proc; /* XXX Yes, this is not nice.. */
422
423 /* Let's start by claiming the component is fine and well... */
424 /* XXX not the case if the disk is toast.. */
425 diskPtr->status = rf_ds_optimal;
426
427
428 raidPtr->raid_cinfo[row][col].ci_vp = NULL;
429 raidPtr->raid_cinfo[row][col].ci_dev = NULL;
430
431 error = raidlookup(diskPtr->devname, proc, &vp);
432 if (error) {
433 printf("raidlookup on device: %s failed!\n",diskPtr->devname);
434 if (error == ENXIO) {
435 /* XXX the component isn't there... must be dead :-( */
436 diskPtr->status = rf_ds_failed;
437 } else {
438 return(error);
439 }
440 }
441
442 if (diskPtr->status == rf_ds_optimal) {
443
444 if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
445 return(error);
446 }
447
448 error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart,
449 FREAD, proc->p_ucred, proc);
450 if (error) {
451 return(error);
452 }
453
454
455 diskPtr->blockSize = dpart.disklab->d_secsize;
456
457 diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
458
459 raidPtr->raid_cinfo[row][col].ci_vp = vp;
460 raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev;
461
462 #if 0
463 diskPtr->dev = dev;
464 #endif
465
466 diskPtr->dev = va.va_rdev; /* XXX or the above? */
467
468 /* we allow the user to specify that only a fraction of the disks should be used
469 * this is just for debug: it speeds up the parity scan
470 */
471 diskPtr->numBlocks = diskPtr->numBlocks * rf_sizePercentage / 100;
472
473 }
474
475 return(0);
476 }
477
478