rf_disks.c revision 1.5 1 /* $NetBSD: rf_disks.c,v 1.5 1999/02/05 00:06:09 oster Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: Mark Holland
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29 /***************************************************************
30 * rf_disks.c -- code to perform operations on the actual disks
31 ***************************************************************/
32
33 #include "rf_types.h"
34 #include "rf_raid.h"
35 #include "rf_alloclist.h"
36 #include "rf_utils.h"
37 #include "rf_configure.h"
38 #include "rf_general.h"
39 #if !defined(__NetBSD__)
40 #include "rf_camlayer.h"
41 #endif
42 #include "rf_options.h"
43 #include "rf_sys.h"
44
45 #include <sys/types.h>
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/proc.h>
49 #include <sys/ioctl.h>
50 #include <sys/fcntl.h>
51 #include <sys/vnode.h>
52
53 int raidlookup __P((char *, struct proc * p, struct vnode **));
54
55
56 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
57 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
58
59 /****************************************************************************************
60 *
61 * initialize the disks comprising the array
62 *
63 * We want the spare disks to have regular row,col numbers so that we can easily
64 * substitue a spare for a failed disk. But, the driver code assumes throughout
65 * that the array contains numRow by numCol _non-spare_ disks, so it's not clear
66 * how to fit in the spares. This is an unfortunate holdover from raidSim. The
67 * quick and dirty fix is to make row zero bigger than the rest, and put all the
68 * spares in it. This probably needs to get changed eventually.
69 *
70 ***************************************************************************************/
71 int
72 rf_ConfigureDisks(
73 RF_ShutdownList_t ** listp,
74 RF_Raid_t * raidPtr,
75 RF_Config_t * cfgPtr)
76 {
77 RF_RaidDisk_t **disks;
78 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
79 RF_RowCol_t r, c;
80 int bs, ret;
81 unsigned i, count, foundone = 0, numFailuresThisRow;
82 RF_DiskOp_t *rdcap_op = NULL, *tur_op = NULL;
83 int num_rows_done, num_cols_done;
84
85 struct proc *proc = 0;
86 #ifndef __NetBSD__
87 ret = rf_SCSI_AllocReadCapacity(&rdcap_op);
88 if (ret)
89 goto fail;
90 ret = rf_SCSI_AllocTUR(&tur_op);
91 if (ret)
92 goto fail;
93 #endif /* !__NetBSD__ */
94
95 num_rows_done = 0;
96 num_cols_done = 0;
97
98
99 RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *), (RF_RaidDisk_t **), raidPtr->cleanupList);
100 if (disks == NULL) {
101 ret = ENOMEM;
102 goto fail;
103 }
104 raidPtr->Disks = disks;
105
106
107 proc = raidPtr->proc; /* Blah XXX */
108
109 /* get space for the device-specific stuff... */
110 RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
111 sizeof(struct raidcinfo *), (struct raidcinfo **),
112 raidPtr->cleanupList);
113 if (raidPtr->raid_cinfo == NULL) {
114 ret = ENOMEM;
115 goto fail;
116 }
117 for (r = 0; r < raidPtr->numRow; r++) {
118 numFailuresThisRow = 0;
119 RF_CallocAndAdd(disks[r], raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0), sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *), raidPtr->cleanupList);
120 if (disks[r] == NULL) {
121 ret = ENOMEM;
122 goto fail;
123 }
124 /* get more space for device specific stuff.. */
125 RF_CallocAndAdd(raidPtr->raid_cinfo[r],
126 raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0),
127 sizeof(struct raidcinfo), (struct raidcinfo *),
128 raidPtr->cleanupList);
129 if (raidPtr->raid_cinfo[r] == NULL) {
130 ret = ENOMEM;
131 goto fail;
132 }
133 for (c = 0; c < raidPtr->numCol; c++) {
134 ret = rf_ConfigureDisk(raidPtr, &cfgPtr->devnames[r][c][0],
135 &disks[r][c], rdcap_op, tur_op,
136 cfgPtr->devs[r][c], r, c);
137 if (ret)
138 goto fail;
139 if (disks[r][c].status != rf_ds_optimal) {
140 numFailuresThisRow++;
141 } else {
142 if (disks[r][c].numBlocks < min_numblks)
143 min_numblks = disks[r][c].numBlocks;
144 DPRINTF7("Disk at row %d col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n",
145 r, c, disks[r][c].devname,
146 (long int) disks[r][c].numBlocks,
147 disks[r][c].blockSize,
148 (long int) disks[r][c].numBlocks * disks[r][c].blockSize / 1024 / 1024);
149 }
150 num_cols_done++;
151 }
152 /* XXX fix for n-fault tolerant */
153 if (numFailuresThisRow > 0)
154 raidPtr->status[r] = rf_rs_degraded;
155 num_rows_done++;
156 }
157 #if defined(__NetBSD__) && defined(_KERNEL)
158 /* we do nothing */
159 #else
160 rf_SCSI_FreeDiskOp(rdcap_op, 1);
161 rdcap_op = NULL;
162 rf_SCSI_FreeDiskOp(tur_op, 0);
163 tur_op = NULL;
164 #endif
165 /* all disks must be the same size & have the same block size, bs must
166 * be a power of 2 */
167 bs = 0;
168 for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) {
169 for (c = 0; !foundone && c < raidPtr->numCol; c++) {
170 if (disks[r][c].status == rf_ds_optimal) {
171 bs = disks[r][c].blockSize;
172 foundone = 1;
173 }
174 }
175 }
176 if (!foundone) {
177 RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
178 ret = EINVAL;
179 goto fail;
180 }
181 for (count = 0, i = 1; i; i <<= 1)
182 if (bs & i)
183 count++;
184 if (count != 1) {
185 RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
186 ret = EINVAL;
187 goto fail;
188 }
189 for (r = 0; r < raidPtr->numRow; r++) {
190 for (c = 0; c < raidPtr->numCol; c++) {
191 if (disks[r][c].status == rf_ds_optimal) {
192 if (disks[r][c].blockSize != bs) {
193 RF_ERRORMSG2("Error: block size of disk at r %d c %d different from disk at r 0 c 0\n", r, c);
194 ret = EINVAL;
195 goto fail;
196 }
197 if (disks[r][c].numBlocks != min_numblks) {
198 RF_ERRORMSG3("WARNING: truncating disk at r %d c %d to %d blocks\n",
199 r, c, (int) min_numblks);
200 disks[r][c].numBlocks = min_numblks;
201 }
202 }
203 }
204 }
205
206 raidPtr->sectorsPerDisk = min_numblks;
207 raidPtr->logBytesPerSector = ffs(bs) - 1;
208 raidPtr->bytesPerSector = bs;
209 raidPtr->sectorMask = bs - 1;
210 return (0);
211
212 fail:
213
214 #if defined(__NetBSD__) && defined(_KERNEL)
215
216 for (r = 0; r < raidPtr->numRow; r++) {
217 for (c = 0; c < raidPtr->numCol; c++) {
218 /* Cleanup.. */
219 #ifdef DEBUG
220 printf("Cleaning up row: %d col: %d\n", r, c);
221 #endif
222 if (raidPtr->raid_cinfo[r][c].ci_vp) {
223 (void) vn_close(raidPtr->raid_cinfo[r][c].ci_vp,
224 FREAD | FWRITE, proc->p_ucred, proc);
225 }
226 }
227 }
228 /* Space allocated for raid_vpp will get cleaned up at some other
229 * point */
230 /* XXX Need more #ifdefs in the above... */
231
232 #else
233
234 if (rdcap_op)
235 rf_SCSI_FreeDiskOp(rdcap_op, 1);
236 if (tur_op)
237 rf_SCSI_FreeDiskOp(tur_op, 0);
238
239 #endif
240 return (ret);
241 }
242
243
244 /****************************************************************************************
245 * set up the data structures describing the spare disks in the array
246 * recall from the above comment that the spare disk descriptors are stored
247 * in row zero, which is specially expanded to hold them.
248 ***************************************************************************************/
249 int
250 rf_ConfigureSpareDisks(
251 RF_ShutdownList_t ** listp,
252 RF_Raid_t * raidPtr,
253 RF_Config_t * cfgPtr)
254 {
255 char buf[256];
256 int r, c, i, ret;
257 RF_DiskOp_t *rdcap_op = NULL, *tur_op = NULL;
258 unsigned bs;
259 RF_RaidDisk_t *disks;
260 int num_spares_done;
261
262 struct proc *proc;
263
264 #ifndef __NetBSD__
265 ret = rf_SCSI_AllocReadCapacity(&rdcap_op);
266 if (ret)
267 goto fail;
268 ret = rf_SCSI_AllocTUR(&tur_op);
269 if (ret)
270 goto fail;
271 #endif /* !__NetBSD__ */
272
273 num_spares_done = 0;
274
275 proc = raidPtr->proc;
276 /* The space for the spares should have already been allocated by
277 * ConfigureDisks() */
278
279 disks = &raidPtr->Disks[0][raidPtr->numCol];
280 for (i = 0; i < raidPtr->numSpare; i++) {
281 ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
282 &disks[i], rdcap_op, tur_op,
283 cfgPtr->spare_devs[i], 0, raidPtr->numCol + i);
284 if (ret)
285 goto fail;
286 if (disks[i].status != rf_ds_optimal) {
287 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n", buf);
288 } else {
289 disks[i].status = rf_ds_spare; /* change status to
290 * spare */
291 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", i,
292 disks[i].devname,
293 (long int) disks[i].numBlocks, disks[i].blockSize,
294 (long int) disks[i].numBlocks * disks[i].blockSize / 1024 / 1024);
295 }
296 num_spares_done++;
297 }
298 #if defined(__NetBSD__) && (_KERNEL)
299
300 #else
301 rf_SCSI_FreeDiskOp(rdcap_op, 1);
302 rdcap_op = NULL;
303 rf_SCSI_FreeDiskOp(tur_op, 0);
304 tur_op = NULL;
305 #endif
306
307 /* check sizes and block sizes on spare disks */
308 bs = 1 << raidPtr->logBytesPerSector;
309 for (i = 0; i < raidPtr->numSpare; i++) {
310 if (disks[i].blockSize != bs) {
311 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
312 ret = EINVAL;
313 goto fail;
314 }
315 if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
316 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
317 disks[i].devname, disks[i].blockSize, (long int) raidPtr->sectorsPerDisk);
318 ret = EINVAL;
319 goto fail;
320 } else
321 if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
322 RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[i].devname, (long int) raidPtr->sectorsPerDisk);
323
324 disks[i].numBlocks = raidPtr->sectorsPerDisk;
325 }
326 }
327
328 return (0);
329
330 fail:
331 #if defined(__NetBSD__) && defined(_KERNEL)
332
333 /* Release the hold on the main components. We've failed to allocate
334 * a spare, and since we're failing, we need to free things.. */
335
336 for (r = 0; r < raidPtr->numRow; r++) {
337 for (c = 0; c < raidPtr->numCol; c++) {
338 /* Cleanup.. */
339 #ifdef DEBUG
340 printf("Cleaning up row: %d col: %d\n", r, c);
341 #endif
342 if (raidPtr->raid_cinfo[r][c].ci_vp) {
343 (void) vn_close(raidPtr->raid_cinfo[r][c].ci_vp,
344 FREAD | FWRITE, proc->p_ucred, proc);
345 }
346 }
347 }
348
349 for (i = 0; i < raidPtr->numSpare; i++) {
350 /* Cleanup.. */
351 #ifdef DEBUG
352 printf("Cleaning up spare: %d\n", i);
353 #endif
354 if (raidPtr->raid_cinfo[0][raidPtr->numCol + i].ci_vp) {
355 (void) vn_close(raidPtr->raid_cinfo[0][raidPtr->numCol + i].ci_vp,
356 FREAD | FWRITE, proc->p_ucred, proc);
357 }
358 }
359
360 #else
361
362 if (rdcap_op)
363 rf_SCSI_FreeDiskOp(rdcap_op, 1);
364 if (tur_op)
365 rf_SCSI_FreeDiskOp(tur_op, 0);
366
367 #endif
368
369 return (ret);
370 }
371
372
373
374 /* configure a single disk in the array */
375 int
376 rf_ConfigureDisk(raidPtr, buf, diskPtr, rdcap_op, tur_op, dev, row, col)
377 RF_Raid_t *raidPtr; /* We need this down here too!! GO */
378 char *buf;
379 RF_RaidDisk_t *diskPtr;
380 RF_DiskOp_t *rdcap_op;
381 RF_DiskOp_t *tur_op;
382 dev_t dev; /* device number used only in kernel */
383 RF_RowCol_t row;
384 RF_RowCol_t col;
385 {
386 char *p;
387 int retcode;
388
389 struct partinfo dpart;
390 struct vnode *vp;
391 struct vattr va;
392 struct proc *proc;
393 int error;
394
395 retcode = 0;
396 p = rf_find_non_white(buf);
397 if (p[strlen(p) - 1] == '\n') {
398 /* strip off the newline */
399 p[strlen(p) - 1] = '\0';
400 }
401 (void) strcpy(diskPtr->devname, p);
402
403 #ifndef __NetBSD__
404 /* get bus, target, lun */
405 retcode = rf_extract_ids(p, &busid, &targid, &lun);
406 if (retcode)
407 return (retcode);
408
409 /* required in kernel, nop at user level */
410 retcode = rf_SCSI_OpenUnit(dev);
411 if (retcode)
412 return (retcode);
413
414 diskPtr->dev = dev;
415 if (rf_SCSI_DoTUR(tur_op, (u_char) busid, (u_char) targid, (u_char) lun, dev)) {
416 RF_ERRORMSG1("Disk %s failed TUR. Marked as dead.\n", diskPtr->devname);
417 diskPtr->status = rf_ds_failed;
418 } else {
419 diskPtr->status = rf_ds_optimal;
420 retcode = rf_SCSI_DoReadCapacity(raidPtr, rdcap_op, busid, targid, lun, dev,
421 &diskPtr->numBlocks, &diskPtr->blockSize, diskPtr->devname);
422 if (retcode)
423 return (retcode);
424
425 /* we allow the user to specify that only a fraction of the
426 * disks should be used this is just for debug: it speeds up
427 * the parity scan */
428 diskPtr->numBlocks = diskPtr->numBlocks * rf_sizePercentage / 100;
429 }
430 #endif
431
432 proc = raidPtr->proc; /* XXX Yes, this is not nice.. */
433
434 /* Let's start by claiming the component is fine and well... */
435 /* XXX not the case if the disk is toast.. */
436 diskPtr->status = rf_ds_optimal;
437
438
439 raidPtr->raid_cinfo[row][col].ci_vp = NULL;
440 raidPtr->raid_cinfo[row][col].ci_dev = NULL;
441
442 error = raidlookup(diskPtr->devname, proc, &vp);
443 if (error) {
444 printf("raidlookup on device: %s failed!\n", diskPtr->devname);
445 if (error == ENXIO) {
446 /* XXX the component isn't there... must be dead :-( */
447 diskPtr->status = rf_ds_failed;
448 } else {
449 return (error);
450 }
451 }
452 if (diskPtr->status == rf_ds_optimal) {
453
454 if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
455 return (error);
456 }
457 error = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart,
458 FREAD, proc->p_ucred, proc);
459 if (error) {
460 return (error);
461 }
462 diskPtr->blockSize = dpart.disklab->d_secsize;
463
464 diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
465
466 raidPtr->raid_cinfo[row][col].ci_vp = vp;
467 raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev;
468
469 #if 0
470 diskPtr->dev = dev;
471 #endif
472
473 diskPtr->dev = va.va_rdev; /* XXX or the above? */
474
475 /* we allow the user to specify that only a fraction of the
476 * disks should be used this is just for debug: it speeds up
477 * the parity scan */
478 diskPtr->numBlocks = diskPtr->numBlocks * rf_sizePercentage / 100;
479
480 }
481 return (0);
482 }
483