rf_disks.c revision 1.6 1 /* $NetBSD: rf_disks.c,v 1.6 1999/02/24 00:00:03 oster Exp $ */
2 /*-
3 * Copyright (c) 1999 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1995 Carnegie-Mellon University.
40 * All rights reserved.
41 *
42 * Author: Mark Holland
43 *
44 * Permission to use, copy, modify and distribute this software and
45 * its documentation is hereby granted, provided that both the copyright
46 * notice and this permission notice appear in all copies of the
47 * software, derivative works or modified versions, and any portions
48 * thereof, and that both notices appear in supporting documentation.
49 *
50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53 *
54 * Carnegie Mellon requests users of this software to return to
55 *
56 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
57 * School of Computer Science
58 * Carnegie Mellon University
59 * Pittsburgh PA 15213-3890
60 *
61 * any improvements or extensions that they make and grant Carnegie the
62 * rights to redistribute these changes.
63 */
64
65 /***************************************************************
66 * rf_disks.c -- code to perform operations on the actual disks
67 ***************************************************************/
68
69 #include "rf_types.h"
70 #include "rf_raid.h"
71 #include "rf_alloclist.h"
72 #include "rf_utils.h"
73 #include "rf_configure.h"
74 #include "rf_general.h"
75 #include "rf_options.h"
76 #include "rf_sys.h"
77
78 #include <sys/types.h>
79 #include <sys/param.h>
80 #include <sys/systm.h>
81 #include <sys/proc.h>
82 #include <sys/ioctl.h>
83 #include <sys/fcntl.h>
84 #include <sys/vnode.h>
85
86 /* XXX these should be in a header file somewhere */
87 int raidlookup __P((char *, struct proc * p, struct vnode **));
88 int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
89 int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
90 void rf_UnconfigureVnodes( RF_Raid_t * );
91 int rf_CheckLabels( RF_Raid_t *, RF_Config_t *);
92
93 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
94 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
95
96 /**************************************************************************
97 *
98 * initialize the disks comprising the array
99 *
100 * We want the spare disks to have regular row,col numbers so that we can
101 * easily substitue a spare for a failed disk. But, the driver code assumes
102 * throughout that the array contains numRow by numCol _non-spare_ disks, so
103 * it's not clear how to fit in the spares. This is an unfortunate holdover
104 * from raidSim. The quick and dirty fix is to make row zero bigger than the
105 * rest, and put all the spares in it. This probably needs to get changed
106 * eventually.
107 *
108 **************************************************************************/
109
110 int
111 rf_ConfigureDisks( listp, raidPtr, cfgPtr )
112 RF_ShutdownList_t **listp;
113 RF_Raid_t *raidPtr;
114 RF_Config_t *cfgPtr;
115 {
116 RF_RaidDisk_t **disks;
117 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
118 RF_RowCol_t r, c;
119 int bs, ret;
120 unsigned i, count, foundone = 0, numFailuresThisRow;
121 int num_rows_done, num_cols_done;
122
123 num_rows_done = 0;
124 num_cols_done = 0;
125
126 RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *),
127 (RF_RaidDisk_t **), raidPtr->cleanupList);
128 if (disks == NULL) {
129 ret = ENOMEM;
130 goto fail;
131 }
132 raidPtr->Disks = disks;
133
134 /* get space for the device-specific stuff... */
135 RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
136 sizeof(struct raidcinfo *), (struct raidcinfo **),
137 raidPtr->cleanupList);
138 if (raidPtr->raid_cinfo == NULL) {
139 ret = ENOMEM;
140 goto fail;
141 }
142 for (r = 0; r < raidPtr->numRow; r++) {
143 numFailuresThisRow = 0;
144 /* We allocate RF_MAXSPARE on the first row so that we
145 have room to do hot-swapping of spares */
146 RF_CallocAndAdd(disks[r], raidPtr->numCol
147 + ((r == 0) ? RF_MAXSPARE : 0),
148 sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
149 raidPtr->cleanupList);
150 if (disks[r] == NULL) {
151 ret = ENOMEM;
152 goto fail;
153 }
154 /* get more space for device specific stuff.. */
155 RF_CallocAndAdd(raidPtr->raid_cinfo[r],
156 raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0),
157 sizeof(struct raidcinfo), (struct raidcinfo *),
158 raidPtr->cleanupList);
159 if (raidPtr->raid_cinfo[r] == NULL) {
160 ret = ENOMEM;
161 goto fail;
162 }
163 for (c = 0; c < raidPtr->numCol; c++) {
164 ret = rf_ConfigureDisk(raidPtr,
165 &cfgPtr->devnames[r][c][0],
166 &disks[r][c], r, c);
167 if (ret)
168 goto fail;
169 #ifdef NOT_YET_BOYS_AND_GIRLS
170 if (disks[r][c].status == rf_ds_optimal) {
171 raidread_component_label(
172 raidPtr->raid_cinfo[r][c].ci_dev,
173 raidPtr->raid_cinfo[r][c].ci_vp,
174 &raidPtr->raid_cinfo[r][c].ci_label);
175 }
176 #endif
177 if (disks[r][c].status != rf_ds_optimal) {
178 numFailuresThisRow++;
179 } else {
180 if (disks[r][c].numBlocks < min_numblks)
181 min_numblks = disks[r][c].numBlocks;
182 DPRINTF7("Disk at row %d col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n",
183 r, c, disks[r][c].devname,
184 (long int) disks[r][c].numBlocks,
185 disks[r][c].blockSize,
186 (long int) disks[r][c].numBlocks *
187 disks[r][c].blockSize / 1024 / 1024);
188 }
189 num_cols_done++;
190 }
191 /* XXX fix for n-fault tolerant */
192 /* XXX this should probably check to see how many failures
193 we can handle for this configuration! */
194 if (numFailuresThisRow > 0)
195 raidPtr->status[r] = rf_rs_degraded;
196 num_rows_done++;
197 }
198
199 /* all disks must be the same size & have the same block size, bs must
200 * be a power of 2 */
201 bs = 0;
202 for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) {
203 for (c = 0; !foundone && c < raidPtr->numCol; c++) {
204 if (disks[r][c].status == rf_ds_optimal) {
205 bs = disks[r][c].blockSize;
206 foundone = 1;
207 }
208 }
209 }
210 if (!foundone) {
211 RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
212 ret = EINVAL;
213 goto fail;
214 }
215 for (count = 0, i = 1; i; i <<= 1)
216 if (bs & i)
217 count++;
218 if (count != 1) {
219 RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
220 ret = EINVAL;
221 goto fail;
222 }
223
224 #if NOT_YET_BOYS_AND_GIRLS
225 if (rf_CheckLabels( raidPtr, cfgPtr )) {
226 printf("There were fatal errors (ignored for now)\n");
227 }
228 #endif
229 for (r = 0; r < raidPtr->numRow; r++) {
230 for (c = 0; c < raidPtr->numCol; c++) {
231 if (disks[r][c].status == rf_ds_optimal) {
232 if (disks[r][c].blockSize != bs) {
233 RF_ERRORMSG2("Error: block size of disk at r %d c %d different from disk at r 0 c 0\n", r, c);
234 ret = EINVAL;
235 goto fail;
236 }
237 if (disks[r][c].numBlocks != min_numblks) {
238 RF_ERRORMSG3("WARNING: truncating disk at r %d c %d to %d blocks\n",
239 r, c, (int) min_numblks);
240 disks[r][c].numBlocks = min_numblks;
241 }
242 }
243 }
244 }
245
246 raidPtr->sectorsPerDisk = min_numblks;
247 raidPtr->logBytesPerSector = ffs(bs) - 1;
248 raidPtr->bytesPerSector = bs;
249 raidPtr->sectorMask = bs - 1;
250 return (0);
251
252 fail:
253
254 rf_UnconfigureVnodes( raidPtr );
255
256 return (ret);
257 }
258
259
260 /****************************************************************************
261 * set up the data structures describing the spare disks in the array
262 * recall from the above comment that the spare disk descriptors are stored
263 * in row zero, which is specially expanded to hold them.
264 ****************************************************************************/
265 int
266 rf_ConfigureSpareDisks( listp, raidPtr, cfgPtr )
267 RF_ShutdownList_t ** listp;
268 RF_Raid_t * raidPtr;
269 RF_Config_t * cfgPtr;
270 {
271 int i, ret;
272 unsigned int bs;
273 RF_RaidDisk_t *disks;
274 int num_spares_done;
275
276 num_spares_done = 0;
277
278 /* The space for the spares should have already been allocated by
279 * ConfigureDisks() */
280
281 disks = &raidPtr->Disks[0][raidPtr->numCol];
282 for (i = 0; i < raidPtr->numSpare; i++) {
283 ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
284 &disks[i], 0, raidPtr->numCol + i);
285 if (ret)
286 goto fail;
287 if (disks[i].status != rf_ds_optimal) {
288 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
289 &cfgPtr->spare_names[i][0]);
290 } else {
291 disks[i].status = rf_ds_spare; /* change status to
292 * spare */
293 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", i,
294 disks[i].devname,
295 (long int) disks[i].numBlocks, disks[i].blockSize,
296 (long int) disks[i].numBlocks *
297 disks[i].blockSize / 1024 / 1024);
298 }
299 num_spares_done++;
300 }
301
302 /* check sizes and block sizes on spare disks */
303 bs = 1 << raidPtr->logBytesPerSector;
304 for (i = 0; i < raidPtr->numSpare; i++) {
305 if (disks[i].blockSize != bs) {
306 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
307 ret = EINVAL;
308 goto fail;
309 }
310 if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
311 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
312 disks[i].devname, disks[i].blockSize,
313 (long int) raidPtr->sectorsPerDisk);
314 ret = EINVAL;
315 goto fail;
316 } else
317 if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
318 RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[i].devname, (long int) raidPtr->sectorsPerDisk);
319
320 disks[i].numBlocks = raidPtr->sectorsPerDisk;
321 }
322 }
323
324 return (0);
325
326 fail:
327
328 /* Release the hold on the main components. We've failed to allocate
329 * a spare, and since we're failing, we need to free things..
330
331 XXX failing to allocate a spare is *not* that big of a deal...
332 We *can* survive without it, if need be, esp. if we get hot
333 adding working.
334
335 If we don't fail out here, then we need a way to remove this spare...
336 that should be easier to do here than if we are "live"...
337
338 */
339
340 rf_UnconfigureVnodes( raidPtr );
341
342 return (ret);
343 }
344
345
346
347 /* configure a single disk in the array */
348 int
349 rf_ConfigureDisk(raidPtr, buf, diskPtr, row, col)
350 RF_Raid_t *raidPtr;
351 char *buf;
352 RF_RaidDisk_t *diskPtr;
353 RF_RowCol_t row;
354 RF_RowCol_t col;
355 {
356 char *p;
357 int retcode;
358
359 struct partinfo dpart;
360 struct vnode *vp;
361 struct vattr va;
362 struct proc *proc;
363 int error;
364
365 retcode = 0;
366 p = rf_find_non_white(buf);
367 if (p[strlen(p) - 1] == '\n') {
368 /* strip off the newline */
369 p[strlen(p) - 1] = '\0';
370 }
371 (void) strcpy(diskPtr->devname, p);
372
373 proc = raidPtr->proc; /* XXX Yes, this is not nice.. */
374
375 /* Let's start by claiming the component is fine and well... */
376 diskPtr->status = rf_ds_optimal;
377
378 raidPtr->raid_cinfo[row][col].ci_vp = NULL;
379 raidPtr->raid_cinfo[row][col].ci_dev = NULL;
380
381 error = raidlookup(diskPtr->devname, proc, &vp);
382 if (error) {
383 printf("raidlookup on device: %s failed!\n", diskPtr->devname);
384 if (error == ENXIO) {
385 /* the component isn't there... must be dead :-( */
386 diskPtr->status = rf_ds_failed;
387 } else {
388 return (error);
389 }
390 }
391 if (diskPtr->status == rf_ds_optimal) {
392
393 if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
394 return (error);
395 }
396 error = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart,
397 FREAD, proc->p_ucred, proc);
398 if (error) {
399 return (error);
400 }
401
402 diskPtr->blockSize = dpart.disklab->d_secsize;
403
404 diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
405
406 raidPtr->raid_cinfo[row][col].ci_vp = vp;
407 raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev;
408
409 diskPtr->dev = va.va_rdev;
410
411 /* we allow the user to specify that only a fraction of the
412 * disks should be used this is just for debug: it speeds up
413 * the parity scan */
414 diskPtr->numBlocks = diskPtr->numBlocks *
415 rf_sizePercentage / 100;
416 }
417 return (0);
418 }
419
420 /*
421
422 rf_CheckLabels() - check all the component labels for consistency.
423 Return an error if there is anything major amiss.
424
425 */
426
427 int
428 rf_CheckLabels( raidPtr, cfgPtr )
429 RF_Raid_t *raidPtr;
430 RF_Config_t *cfgPtr;
431 {
432 int r,c;
433 char *dev_name;
434 RF_ComponentLabel_t *ci_label;
435 int version = 0;
436 int serial_number = 0;
437 int mod_counter = 0;
438 int fatal_error = 0;
439 int disk_num = 0;
440
441 for (r = 0; r < raidPtr->numRow; r++) {
442 for (c = 0; c < raidPtr->numCol; c++) {
443 dev_name = &cfgPtr->devnames[r][c][0];
444 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
445
446 printf("Component label for %s being configured at row: %d col: %d\n", dev_name, r, c );
447 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n", ci_label->row, ci_label->column,
448 ci_label->num_rows, ci_label->num_columns);
449 printf(" Version: %d Serial Number: %d Clean: %d Status: %d\n", ci_label->version, ci_label->serial_number,
450 ci_label->clean, ci_label->status );
451
452 if ( r !=0 && c != 0) {
453 if (serial_number != ci_label->serial_number) {
454 printf("%s has a different %s\n",
455 dev_name, "serial number!");
456 fatal_error = 1;
457 }
458 if (version != ci_label->version) {
459 printf("%s has a different %s\n",
460 dev_name, "version!");
461 fatal_error = 1;
462 }
463 if (mod_counter != ci_label->mod_counter) {
464 printf("%s has a different modfication count!\n",dev_name);
465 }
466 } else {
467 serial_number = ci_label->serial_number;
468 version = ci_label->version;
469 mod_counter = ci_label->mod_counter;
470 }
471
472 if (r != ci_label->row) {
473 printf("Row out of alignment for: %s\n",
474 dev_name);
475 fatal_error = 1;
476 }
477 if (c != ci_label->column) {
478 printf("Column out of alignment for: %s\n",
479 dev_name);
480 fatal_error = 1;
481 }
482 if (raidPtr->numRow != ci_label->num_rows) {
483 printf("Number of rows do not match for: %s\n",
484 dev_name);
485 fatal_error = 1;
486 }
487 if (raidPtr->numCol != ci_label->num_columns) {
488 printf("Number of columns do not match for: %s\n",
489 dev_name);
490 fatal_error = 1;
491 }
492 if (ci_label->clean == 0) {
493 /* it's not clean, but it's not fatal */
494 printf("%s is not clean!\n", dev_name);
495 }
496 disk_num++;
497 }
498 }
499
500 return(fatal_error);
501 }
502
503
504 int rf_add_hot_spare(RF_Raid_t *, RF_HotSpare_t *);
505 int
506 rf_add_hot_spare(raidPtr, sparePtr)
507 RF_Raid_t *raidPtr;
508 RF_HotSpare_t *sparePtr;
509 {
510 RF_RaidDisk_t *disks;
511 int ret;
512 unsigned int bs;
513 int spare_number;
514
515 printf("Just in rf_add_hot_spare: %d\n",raidPtr->numSpare);
516 printf("Num col: %d\n",raidPtr->numCol);
517 if (raidPtr->numSpare >= RF_MAXSPARE) {
518 RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
519 return(EINVAL);
520 }
521
522 /* the beginning of the spares... */
523 disks = &raidPtr->Disks[0][raidPtr->numCol];
524
525 spare_number = raidPtr->numSpare;
526
527 ret = rf_ConfigureDisk(raidPtr, sparePtr->spare_name,
528 &disks[spare_number], 0,
529 raidPtr->numCol + spare_number);
530
531 if (ret)
532 goto fail;
533 if (disks[spare_number].status != rf_ds_optimal) {
534 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
535 sparePtr->spare_name);
536 ret=EINVAL;
537 goto fail;
538 } else {
539 disks[spare_number].status = rf_ds_spare;
540 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", spare_number,
541 disks[spare_number].devname,
542 (long int) disks[spare_number].numBlocks,
543 disks[spare_number].blockSize,
544 (long int) disks[spare_number].numBlocks *
545 disks[spare_number].blockSize / 1024 / 1024);
546 }
547
548
549 /* check sizes and block sizes on the spare disk */
550 bs = 1 << raidPtr->logBytesPerSector;
551 if (disks[spare_number].blockSize != bs) {
552 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
553 ret = EINVAL;
554 goto fail;
555 }
556 if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
557 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
558 disks[spare_number].devname,
559 disks[spare_number].blockSize,
560 (long int) raidPtr->sectorsPerDisk);
561 ret = EINVAL;
562 goto fail;
563 } else {
564 if (disks[spare_number].numBlocks >
565 raidPtr->sectorsPerDisk) {
566 RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[spare_number].devname,
567 (long int) raidPtr->sectorsPerDisk);
568
569 disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
570 }
571 }
572
573 raidPtr->numSpare++;
574
575 return (0);
576
577 fail:
578 return(ret);
579 }
580
581 int
582 rf_remove_hot_spare(raidPtr,sparePtr)
583 RF_Raid_t *raidPtr;
584 RF_HotSpare_t *sparePtr;
585 {
586 int spare_number;
587
588
589 if (raidPtr->numSpare==0) {
590 printf("No spares to remove!\n");
591 return(EINVAL);
592 }
593
594 spare_number = sparePtr->spare_number;
595
596 return(EINVAL); /* XXX not implemented yet */
597 #if 0
598 if (spare_number < 0 || spare_number > raidPtr->numSpare) {
599 return(EINVAL);
600 }
601
602 /* verify that this spare isn't in use... */
603
604
605
606
607 /* it's gone.. */
608
609 raidPtr->numSpare--;
610
611 return(0);
612 #endif
613 }
614
615
616