rf_disks.c revision 1.23 1 /* $NetBSD: rf_disks.c,v 1.23 2000/03/07 02:28:05 oster Exp $ */
2 /*-
3 * Copyright (c) 1999 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1995 Carnegie-Mellon University.
40 * All rights reserved.
41 *
42 * Author: Mark Holland
43 *
44 * Permission to use, copy, modify and distribute this software and
45 * its documentation is hereby granted, provided that both the copyright
46 * notice and this permission notice appear in all copies of the
47 * software, derivative works or modified versions, and any portions
48 * thereof, and that both notices appear in supporting documentation.
49 *
50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53 *
54 * Carnegie Mellon requests users of this software to return to
55 *
56 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
57 * School of Computer Science
58 * Carnegie Mellon University
59 * Pittsburgh PA 15213-3890
60 *
61 * any improvements or extensions that they make and grant Carnegie the
62 * rights to redistribute these changes.
63 */
64
65 /***************************************************************
66 * rf_disks.c -- code to perform operations on the actual disks
67 ***************************************************************/
68
69 #include "rf_types.h"
70 #include "rf_raid.h"
71 #include "rf_alloclist.h"
72 #include "rf_utils.h"
73 #include "rf_configure.h"
74 #include "rf_general.h"
75 #include "rf_options.h"
76 #include "rf_kintf.h"
77 #include "rf_netbsd.h"
78
79 #include <sys/types.h>
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/proc.h>
83 #include <sys/ioctl.h>
84 #include <sys/fcntl.h>
85 #include <sys/vnode.h>
86
87 /* XXX these should be in a header file somewhere */
88 int rf_CheckLabels( RF_Raid_t *, RF_Config_t *);
89 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
90
91 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
92 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
93
94 /**************************************************************************
95 *
96 * initialize the disks comprising the array
97 *
98 * We want the spare disks to have regular row,col numbers so that we can
99 * easily substitue a spare for a failed disk. But, the driver code assumes
100 * throughout that the array contains numRow by numCol _non-spare_ disks, so
101 * it's not clear how to fit in the spares. This is an unfortunate holdover
102 * from raidSim. The quick and dirty fix is to make row zero bigger than the
103 * rest, and put all the spares in it. This probably needs to get changed
104 * eventually.
105 *
106 **************************************************************************/
107
108 int
109 rf_ConfigureDisks( listp, raidPtr, cfgPtr )
110 RF_ShutdownList_t **listp;
111 RF_Raid_t *raidPtr;
112 RF_Config_t *cfgPtr;
113 {
114 RF_RaidDisk_t **disks;
115 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
116 RF_RowCol_t r, c;
117 int bs, ret;
118 unsigned i, count, foundone = 0, numFailuresThisRow;
119 int force;
120
121 force = cfgPtr->force;
122
123 ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
124 if (ret)
125 goto fail;
126
127 disks = raidPtr->Disks;
128
129 for (r = 0; r < raidPtr->numRow; r++) {
130 numFailuresThisRow = 0;
131 for (c = 0; c < raidPtr->numCol; c++) {
132 ret = rf_ConfigureDisk(raidPtr,
133 &cfgPtr->devnames[r][c][0],
134 &disks[r][c], r, c);
135
136 if (ret)
137 goto fail;
138
139 if (disks[r][c].status == rf_ds_optimal) {
140 raidread_component_label(
141 raidPtr->raid_cinfo[r][c].ci_dev,
142 raidPtr->raid_cinfo[r][c].ci_vp,
143 &raidPtr->raid_cinfo[r][c].ci_label);
144 }
145
146 if (disks[r][c].status != rf_ds_optimal) {
147 numFailuresThisRow++;
148 } else {
149 if (disks[r][c].numBlocks < min_numblks)
150 min_numblks = disks[r][c].numBlocks;
151 DPRINTF7("Disk at row %d col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n",
152 r, c, disks[r][c].devname,
153 (long int) disks[r][c].numBlocks,
154 disks[r][c].blockSize,
155 (long int) disks[r][c].numBlocks *
156 disks[r][c].blockSize / 1024 / 1024);
157 }
158 }
159 /* XXX fix for n-fault tolerant */
160 /* XXX this should probably check to see how many failures
161 we can handle for this configuration! */
162 if (numFailuresThisRow > 0)
163 raidPtr->status[r] = rf_rs_degraded;
164 }
165
166 /* all disks must be the same size & have the same block size, bs must
167 * be a power of 2 */
168 bs = 0;
169 for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) {
170 for (c = 0; !foundone && c < raidPtr->numCol; c++) {
171 if (disks[r][c].status == rf_ds_optimal) {
172 bs = disks[r][c].blockSize;
173 foundone = 1;
174 }
175 }
176 }
177 if (!foundone) {
178 RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
179 ret = EINVAL;
180 goto fail;
181 }
182 for (count = 0, i = 1; i; i <<= 1)
183 if (bs & i)
184 count++;
185 if (count != 1) {
186 RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
187 ret = EINVAL;
188 goto fail;
189 }
190
191 if (rf_CheckLabels( raidPtr, cfgPtr )) {
192 printf("raid%d: There were fatal errors\n", raidPtr->raidid);
193 if (force != 0) {
194 printf("raid%d: Fatal errors being ignored.\n",
195 raidPtr->raidid);
196 } else {
197 ret = EINVAL;
198 goto fail;
199 }
200 }
201
202 for (r = 0; r < raidPtr->numRow; r++) {
203 for (c = 0; c < raidPtr->numCol; c++) {
204 if (disks[r][c].status == rf_ds_optimal) {
205 if (disks[r][c].blockSize != bs) {
206 RF_ERRORMSG2("Error: block size of disk at r %d c %d different from disk at r 0 c 0\n", r, c);
207 ret = EINVAL;
208 goto fail;
209 }
210 if (disks[r][c].numBlocks != min_numblks) {
211 RF_ERRORMSG3("WARNING: truncating disk at r %d c %d to %d blocks\n",
212 r, c, (int) min_numblks);
213 disks[r][c].numBlocks = min_numblks;
214 }
215 }
216 }
217 }
218
219 raidPtr->sectorsPerDisk = min_numblks;
220 raidPtr->logBytesPerSector = ffs(bs) - 1;
221 raidPtr->bytesPerSector = bs;
222 raidPtr->sectorMask = bs - 1;
223 return (0);
224
225 fail:
226
227 rf_UnconfigureVnodes( raidPtr );
228
229 return (ret);
230 }
231
232
233 /****************************************************************************
234 * set up the data structures describing the spare disks in the array
235 * recall from the above comment that the spare disk descriptors are stored
236 * in row zero, which is specially expanded to hold them.
237 ****************************************************************************/
238 int
239 rf_ConfigureSpareDisks( listp, raidPtr, cfgPtr )
240 RF_ShutdownList_t ** listp;
241 RF_Raid_t * raidPtr;
242 RF_Config_t * cfgPtr;
243 {
244 int i, ret;
245 unsigned int bs;
246 RF_RaidDisk_t *disks;
247 int num_spares_done;
248
249 num_spares_done = 0;
250
251 /* The space for the spares should have already been allocated by
252 * ConfigureDisks() */
253
254 disks = &raidPtr->Disks[0][raidPtr->numCol];
255 for (i = 0; i < raidPtr->numSpare; i++) {
256 ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
257 &disks[i], 0, raidPtr->numCol + i);
258 if (ret)
259 goto fail;
260 if (disks[i].status != rf_ds_optimal) {
261 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
262 &cfgPtr->spare_names[i][0]);
263 } else {
264 disks[i].status = rf_ds_spare; /* change status to
265 * spare */
266 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", i,
267 disks[i].devname,
268 (long int) disks[i].numBlocks, disks[i].blockSize,
269 (long int) disks[i].numBlocks *
270 disks[i].blockSize / 1024 / 1024);
271 }
272 num_spares_done++;
273 }
274
275 /* check sizes and block sizes on spare disks */
276 bs = 1 << raidPtr->logBytesPerSector;
277 for (i = 0; i < raidPtr->numSpare; i++) {
278 if (disks[i].blockSize != bs) {
279 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
280 ret = EINVAL;
281 goto fail;
282 }
283 if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
284 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
285 disks[i].devname, disks[i].blockSize,
286 (long int) raidPtr->sectorsPerDisk);
287 ret = EINVAL;
288 goto fail;
289 } else
290 if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
291 RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[i].devname, (long int) raidPtr->sectorsPerDisk);
292
293 disks[i].numBlocks = raidPtr->sectorsPerDisk;
294 }
295 }
296
297 return (0);
298
299 fail:
300
301 /* Release the hold on the main components. We've failed to allocate
302 * a spare, and since we're failing, we need to free things..
303
304 XXX failing to allocate a spare is *not* that big of a deal...
305 We *can* survive without it, if need be, esp. if we get hot
306 adding working.
307
308 If we don't fail out here, then we need a way to remove this spare...
309 that should be easier to do here than if we are "live"...
310
311 */
312
313 rf_UnconfigureVnodes( raidPtr );
314
315 return (ret);
316 }
317
318 static int
319 rf_AllocDiskStructures(raidPtr, cfgPtr)
320 RF_Raid_t *raidPtr;
321 RF_Config_t *cfgPtr;
322 {
323 RF_RaidDisk_t **disks;
324 int ret;
325 int r;
326
327 RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *),
328 (RF_RaidDisk_t **), raidPtr->cleanupList);
329 if (disks == NULL) {
330 ret = ENOMEM;
331 goto fail;
332 }
333 raidPtr->Disks = disks;
334 /* get space for the device-specific stuff... */
335 RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
336 sizeof(struct raidcinfo *), (struct raidcinfo **),
337 raidPtr->cleanupList);
338 if (raidPtr->raid_cinfo == NULL) {
339 ret = ENOMEM;
340 goto fail;
341 }
342
343 for (r = 0; r < raidPtr->numRow; r++) {
344 /* We allocate RF_MAXSPARE on the first row so that we
345 have room to do hot-swapping of spares */
346 RF_CallocAndAdd(disks[r], raidPtr->numCol
347 + ((r == 0) ? RF_MAXSPARE : 0),
348 sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
349 raidPtr->cleanupList);
350 if (disks[r] == NULL) {
351 ret = ENOMEM;
352 goto fail;
353 }
354 /* get more space for device specific stuff.. */
355 RF_CallocAndAdd(raidPtr->raid_cinfo[r],
356 raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0),
357 sizeof(struct raidcinfo), (struct raidcinfo *),
358 raidPtr->cleanupList);
359 if (raidPtr->raid_cinfo[r] == NULL) {
360 ret = ENOMEM;
361 goto fail;
362 }
363 }
364 return(0);
365 fail:
366 rf_UnconfigureVnodes( raidPtr );
367
368 return(ret);
369 }
370
371
372 /* configure a single disk during auto-configuration at boot */
373 int
374 rf_AutoConfigureDisks(raidPtr, cfgPtr, auto_config)
375 RF_Raid_t *raidPtr;
376 RF_Config_t *cfgPtr;
377 RF_AutoConfig_t *auto_config;
378 {
379 RF_RaidDisk_t **disks;
380 RF_RaidDisk_t *diskPtr;
381 RF_RowCol_t r, c;
382 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
383 int bs, ret;
384 int numFailuresThisRow;
385 int force;
386 RF_AutoConfig_t *ac;
387 int parity_good;
388 int mod_counter;
389
390 #if DEBUG
391 printf("Starting autoconfiguration of RAID set...\n");
392 #endif
393 force = cfgPtr->force;
394
395 ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
396 if (ret)
397 goto fail;
398
399 disks = raidPtr->Disks;
400
401 /* assume the parity will be fine.. */
402 parity_good = RF_RAID_CLEAN;
403
404 /* Check for mod_counters that are too low */
405 mod_counter = -1;
406 ac = auto_config;
407 while(ac!=NULL) {
408 if (ac->clabel->mod_counter > mod_counter) {
409 mod_counter = ac->clabel->mod_counter;
410 }
411 ac = ac->next;
412 }
413 if (mod_counter == -1) {
414 /* mod_counters were all negative!?!?!?
415 Ok, we can deal with that. */
416 #if 0
417 ac = auto_config;
418 while(ac!=NULL) {
419 if (ac->clabel->mod_counter > mod_counter) {
420 mod_counter = ac->clabel->mod_counter;
421 }
422 ac = ac->next;
423 }
424 #endif
425 }
426
427 for (r = 0; r < raidPtr->numRow; r++) {
428 numFailuresThisRow = 0;
429 for (c = 0; c < raidPtr->numCol; c++) {
430 diskPtr = &disks[r][c];
431
432 /* find this row/col in the autoconfig */
433 #if DEBUG
434 printf("Looking for %d,%d in autoconfig\n",r,c);
435 #endif
436 ac = auto_config;
437 while(ac!=NULL) {
438 if (ac->clabel==NULL) {
439 /* big-time bad news. */
440 goto fail;
441 }
442 if ((ac->clabel->row == r) &&
443 (ac->clabel->column == c)) {
444 /* it's this one... */
445 #if DEBUG
446 printf("Found: %s at %d,%d\n",
447 ac->devname,r,c);
448 #endif
449
450 break;
451 }
452 ac=ac->next;
453 }
454
455 if (ac!=NULL) {
456 /* Found it. Configure it.. */
457 diskPtr->blockSize = ac->clabel->blockSize;
458 diskPtr->numBlocks = ac->clabel->numBlocks;
459 /* Note: rf_protectedSectors is already
460 factored into numBlocks here */
461 raidPtr->raid_cinfo[r][c].ci_vp = ac->vp;
462 raidPtr->raid_cinfo[r][c].ci_dev = ac->dev;
463
464 memcpy(&raidPtr->raid_cinfo[r][c].ci_label,
465 ac->clabel, sizeof(*ac->clabel));
466 sprintf(diskPtr->devname, "/dev/%s",
467 ac->devname);
468
469 /* note the fact that this component was
470 autoconfigured. You'll need this info
471 later. Trust me :) */
472 diskPtr->auto_configured = 1;
473 diskPtr->dev = ac->dev;
474
475 /*
476 * we allow the user to specify that
477 * only a fraction of the disks should
478 * be used this is just for debug: it
479 * speeds up the parity scan
480 */
481
482 diskPtr->numBlocks = diskPtr->numBlocks *
483 rf_sizePercentage / 100;
484
485 /* XXX these will get set multiple times,
486 but since we're autoconfiguring, they'd
487 better be always the same each time!
488 If not, this is the least of your worries */
489
490 bs = diskPtr->blockSize;
491 min_numblks = diskPtr->numBlocks;
492
493 /* this gets done multiple times, but that's
494 fine -- the serial number will be the same
495 for all components, guaranteed */
496 raidPtr->serial_number =
497 ac->clabel->serial_number;
498 /* check the last time the label
499 was modified */
500 if (ac->clabel->mod_counter !=
501 mod_counter) {
502 /* Even though we've filled in all
503 of the above, we don't trust
504 this component since it's
505 modification counter is not
506 in sync with the rest, and we really
507 consider it to be failed. */
508 disks[r][c].status = rf_ds_failed;
509 numFailuresThisRow++;
510 } else {
511 if (ac->clabel->clean !=
512 RF_RAID_CLEAN) {
513 parity_good = RF_RAID_DIRTY;
514 }
515 }
516 } else {
517 /* Didn't find it at all!!
518 Component must really be dead */
519 disks[r][c].status = rf_ds_failed;
520 numFailuresThisRow++;
521 }
522 }
523 /* XXX fix for n-fault tolerant */
524 /* XXX this should probably check to see how many failures
525 we can handle for this configuration! */
526 if (numFailuresThisRow > 0)
527 raidPtr->status[r] = rf_rs_degraded;
528 }
529
530 raidPtr->mod_counter = mod_counter;
531
532 /* note the state of the parity, if any */
533 raidPtr->parity_good = parity_good;
534 raidPtr->sectorsPerDisk = min_numblks;
535 raidPtr->logBytesPerSector = ffs(bs) - 1;
536 raidPtr->bytesPerSector = bs;
537 raidPtr->sectorMask = bs - 1;
538 return (0);
539
540 fail:
541
542 rf_UnconfigureVnodes( raidPtr );
543
544 return (ret);
545
546 }
547
548 /* configure a single disk in the array */
549 int
550 rf_ConfigureDisk(raidPtr, buf, diskPtr, row, col)
551 RF_Raid_t *raidPtr;
552 char *buf;
553 RF_RaidDisk_t *diskPtr;
554 RF_RowCol_t row;
555 RF_RowCol_t col;
556 {
557 char *p;
558 int retcode;
559
560 struct partinfo dpart;
561 struct vnode *vp;
562 struct vattr va;
563 struct proc *proc;
564 int error;
565
566 retcode = 0;
567 p = rf_find_non_white(buf);
568 if (p[strlen(p) - 1] == '\n') {
569 /* strip off the newline */
570 p[strlen(p) - 1] = '\0';
571 }
572 (void) strcpy(diskPtr->devname, p);
573
574 proc = raidPtr->engine_thread;
575
576 /* Let's start by claiming the component is fine and well... */
577 diskPtr->status = rf_ds_optimal;
578
579 raidPtr->raid_cinfo[row][col].ci_vp = NULL;
580 raidPtr->raid_cinfo[row][col].ci_dev = NULL;
581
582 error = raidlookup(diskPtr->devname, proc, &vp);
583 if (error) {
584 printf("raidlookup on device: %s failed!\n", diskPtr->devname);
585 if (error == ENXIO) {
586 /* the component isn't there... must be dead :-( */
587 diskPtr->status = rf_ds_failed;
588 } else {
589 return (error);
590 }
591 }
592 if (diskPtr->status == rf_ds_optimal) {
593
594 if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
595 return (error);
596 }
597 error = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart,
598 FREAD, proc->p_ucred, proc);
599 if (error) {
600 return (error);
601 }
602
603 diskPtr->blockSize = dpart.disklab->d_secsize;
604
605 diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
606 diskPtr->partitionSize = dpart.part->p_size;
607
608 raidPtr->raid_cinfo[row][col].ci_vp = vp;
609 raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev;
610
611 /* This component was not automatically configured */
612 diskPtr->auto_configured = 0;
613 diskPtr->dev = va.va_rdev;
614
615 /* we allow the user to specify that only a fraction of the
616 * disks should be used this is just for debug: it speeds up
617 * the parity scan */
618 diskPtr->numBlocks = diskPtr->numBlocks *
619 rf_sizePercentage / 100;
620 }
621 return (0);
622 }
623
624 static void rf_print_label_status( RF_Raid_t *, int, int, char *,
625 RF_ComponentLabel_t *);
626
627 static void
628 rf_print_label_status( raidPtr, row, column, dev_name, ci_label )
629 RF_Raid_t *raidPtr;
630 int row;
631 int column;
632 char *dev_name;
633 RF_ComponentLabel_t *ci_label;
634 {
635
636 printf("raid%d: Component %s being configured at row: %d col: %d\n",
637 raidPtr->raidid, dev_name, row, column );
638 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
639 ci_label->row, ci_label->column,
640 ci_label->num_rows, ci_label->num_columns);
641 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
642 ci_label->version, ci_label->serial_number,
643 ci_label->mod_counter);
644 printf(" Clean: %s Status: %d\n",
645 ci_label->clean ? "Yes" : "No", ci_label->status );
646 }
647
648 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
649 RF_ComponentLabel_t *, int, int );
650 static int rf_check_label_vitals( raidPtr, row, column, dev_name, ci_label,
651 serial_number, mod_counter )
652 RF_Raid_t *raidPtr;
653 int row;
654 int column;
655 char *dev_name;
656 RF_ComponentLabel_t *ci_label;
657 int serial_number;
658 int mod_counter;
659 {
660 int fatal_error = 0;
661
662 if (serial_number != ci_label->serial_number) {
663 printf("%s has a different serial number: %d %d\n",
664 dev_name, serial_number, ci_label->serial_number);
665 fatal_error = 1;
666 }
667 if (mod_counter != ci_label->mod_counter) {
668 printf("%s has a different modfication count: %d %d\n",
669 dev_name, mod_counter, ci_label->mod_counter);
670 }
671
672 if (row != ci_label->row) {
673 printf("Row out of alignment for: %s\n", dev_name);
674 fatal_error = 1;
675 }
676 if (column != ci_label->column) {
677 printf("Column out of alignment for: %s\n", dev_name);
678 fatal_error = 1;
679 }
680 if (raidPtr->numRow != ci_label->num_rows) {
681 printf("Number of rows do not match for: %s\n", dev_name);
682 fatal_error = 1;
683 }
684 if (raidPtr->numCol != ci_label->num_columns) {
685 printf("Number of columns do not match for: %s\n", dev_name);
686 fatal_error = 1;
687 }
688 if (ci_label->clean == 0) {
689 /* it's not clean, but that's not fatal */
690 printf("%s is not clean!\n", dev_name);
691 }
692 return(fatal_error);
693 }
694
695
696 /*
697
698 rf_CheckLabels() - check all the component labels for consistency.
699 Return an error if there is anything major amiss.
700
701 */
702
703 int
704 rf_CheckLabels( raidPtr, cfgPtr )
705 RF_Raid_t *raidPtr;
706 RF_Config_t *cfgPtr;
707 {
708 int r,c;
709 char *dev_name;
710 RF_ComponentLabel_t *ci_label;
711 int serial_number = 0;
712 int mod_number = 0;
713 int fatal_error = 0;
714 int mod_values[4];
715 int mod_count[4];
716 int ser_values[4];
717 int ser_count[4];
718 int num_ser;
719 int num_mod;
720 int i;
721 int found;
722 int hosed_row;
723 int hosed_column;
724 int too_fatal;
725 int parity_good;
726 int force;
727
728 hosed_row = -1;
729 hosed_column = -1;
730 too_fatal = 0;
731 force = cfgPtr->force;
732
733 /*
734 We're going to try to be a little intelligent here. If one
735 component's label is bogus, and we can identify that it's the
736 *only* one that's gone, we'll mark it as "failed" and allow
737 the configuration to proceed. This will be the *only* case
738 that we'll proceed if there would be (otherwise) fatal errors.
739
740 Basically we simply keep a count of how many components had
741 what serial number. If all but one agree, we simply mark
742 the disagreeing component as being failed, and allow
743 things to come up "normally".
744
745 We do this first for serial numbers, and then for "mod_counter".
746
747 */
748
749 num_ser = 0;
750 num_mod = 0;
751 for (r = 0; r < raidPtr->numRow && !fatal_error ; r++) {
752 for (c = 0; c < raidPtr->numCol; c++) {
753 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
754 found=0;
755 for(i=0;i<num_ser;i++) {
756 if (ser_values[i] == ci_label->serial_number) {
757 ser_count[i]++;
758 found=1;
759 break;
760 }
761 }
762 if (!found) {
763 ser_values[num_ser] = ci_label->serial_number;
764 ser_count[num_ser] = 1;
765 num_ser++;
766 if (num_ser>2) {
767 fatal_error = 1;
768 break;
769 }
770 }
771 found=0;
772 for(i=0;i<num_mod;i++) {
773 if (mod_values[i] == ci_label->mod_counter) {
774 mod_count[i]++;
775 found=1;
776 break;
777 }
778 }
779 if (!found) {
780 mod_values[num_mod] = ci_label->mod_counter;
781 mod_count[num_mod] = 1;
782 num_mod++;
783 if (num_mod>2) {
784 fatal_error = 1;
785 break;
786 }
787 }
788 }
789 }
790 #if DEBUG
791 printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
792 for(i=0;i<num_ser;i++) {
793 printf("%d %d\n", ser_values[i], ser_count[i]);
794 }
795 printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
796 for(i=0;i<num_mod;i++) {
797 printf("%d %d\n", mod_values[i], mod_count[i]);
798 }
799 #endif
800 serial_number = ser_values[0];
801 if (num_ser == 2) {
802 if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
803 /* Locate the maverick component */
804 if (ser_count[1] > ser_count[0]) {
805 serial_number = ser_values[1];
806 }
807 for (r = 0; r < raidPtr->numRow; r++) {
808 for (c = 0; c < raidPtr->numCol; c++) {
809 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
810 if (serial_number !=
811 ci_label->serial_number) {
812 hosed_row = r;
813 hosed_column = c;
814 break;
815 }
816 }
817 }
818 printf("Hosed component: %s\n",
819 &cfgPtr->devnames[hosed_row][hosed_column][0]);
820 if (!force) {
821 /* we'll fail this component, as if there are
822 other major errors, we arn't forcing things
823 and we'll abort the config anyways */
824 raidPtr->Disks[hosed_row][hosed_column].status
825 = rf_ds_failed;
826 raidPtr->numFailures++;
827 raidPtr->status[hosed_row] = rf_rs_degraded;
828 }
829 } else {
830 too_fatal = 1;
831 }
832 if (cfgPtr->parityConfig == '0') {
833 /* We've identified two different serial numbers.
834 RAID 0 can't cope with that, so we'll punt */
835 too_fatal = 1;
836 }
837
838 }
839
840 /* record the serial number for later. If we bail later, setting
841 this doesn't matter, otherwise we've got the best guess at the
842 correct serial number */
843 raidPtr->serial_number = serial_number;
844
845 mod_number = mod_values[0];
846 if (num_mod == 2) {
847 if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
848 /* Locate the maverick component */
849 if (mod_count[1] > mod_count[0]) {
850 mod_number = mod_values[1];
851 } else if (mod_count[1] < mod_count[0]) {
852 mod_number = mod_values[0];
853 } else {
854 /* counts of different modification values
855 are the same. Assume greater value is
856 the correct one, all other things
857 considered */
858 if (mod_values[0] > mod_values[1]) {
859 mod_number = mod_values[0];
860 } else {
861 mod_number = mod_values[1];
862 }
863
864 }
865 for (r = 0; r < raidPtr->numRow && !too_fatal ; r++) {
866 for (c = 0; c < raidPtr->numCol; c++) {
867 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
868 if (mod_number !=
869 ci_label->mod_counter) {
870 if ( ( hosed_row == r ) &&
871 ( hosed_column == c )) {
872 /* same one. Can
873 deal with it. */
874 } else {
875 hosed_row = r;
876 hosed_column = c;
877 if (num_ser != 1) {
878 too_fatal = 1;
879 break;
880 }
881 }
882 }
883 }
884 }
885 printf("Hosed component: %s\n",
886 &cfgPtr->devnames[hosed_row][hosed_column][0]);
887 if (!force) {
888 /* we'll fail this component, as if there are
889 other major errors, we arn't forcing things
890 and we'll abort the config anyways */
891 if (raidPtr->Disks[hosed_row][hosed_column].status != rf_ds_failed) {
892 raidPtr->Disks[hosed_row][hosed_column].status
893 = rf_ds_failed;
894 raidPtr->numFailures++;
895 raidPtr->status[hosed_row] = rf_rs_degraded;
896 }
897 }
898 } else {
899 too_fatal = 1;
900 }
901 if (cfgPtr->parityConfig == '0') {
902 /* We've identified two different mod counters.
903 RAID 0 can't cope with that, so we'll punt */
904 too_fatal = 1;
905 }
906 }
907
908 raidPtr->mod_counter = mod_number;
909
910 if (too_fatal) {
911 /* we've had both a serial number mismatch, and a mod_counter
912 mismatch -- and they involved two different components!!
913 Bail -- make things fail so that the user must force
914 the issue... */
915 hosed_row = -1;
916 hosed_column = -1;
917 }
918
919 if (num_ser > 2) {
920 printf("raid%d: Too many different serial numbers!\n",
921 raidPtr->raidid);
922 }
923
924 if (num_mod > 2) {
925 printf("raid%d: Too many different mod counters!\n",
926 raidPtr->raidid);
927 }
928
929 /* we start by assuming the parity will be good, and flee from
930 that notion at the slightest sign of trouble */
931
932 parity_good = RF_RAID_CLEAN;
933 for (r = 0; r < raidPtr->numRow; r++) {
934 for (c = 0; c < raidPtr->numCol; c++) {
935 dev_name = &cfgPtr->devnames[r][c][0];
936 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
937
938 if ((r == hosed_row) && (c == hosed_column)) {
939 printf("raid%d: Ignoring %s\n",
940 raidPtr->raidid, dev_name);
941 } else {
942 rf_print_label_status( raidPtr, r, c,
943 dev_name, ci_label );
944 if (rf_check_label_vitals( raidPtr, r, c,
945 dev_name, ci_label,
946 serial_number,
947 mod_number )) {
948 fatal_error = 1;
949 }
950 if (ci_label->clean != RF_RAID_CLEAN) {
951 parity_good = RF_RAID_DIRTY;
952 }
953 }
954 }
955 }
956 if (fatal_error) {
957 parity_good = RF_RAID_DIRTY;
958 }
959
960 /* we note the state of the parity */
961 raidPtr->parity_good = parity_good;
962
963 return(fatal_error);
964 }
965
966 int rf_add_hot_spare(RF_Raid_t *, RF_SingleComponent_t *);
967 int
968 rf_add_hot_spare(raidPtr, sparePtr)
969 RF_Raid_t *raidPtr;
970 RF_SingleComponent_t *sparePtr;
971 {
972 RF_RaidDisk_t *disks;
973 RF_DiskQueue_t *spareQueues;
974 int ret;
975 unsigned int bs;
976 int spare_number;
977
978 printf("Just in rf_add_hot_spare: %d\n",raidPtr->numSpare);
979 printf("Num col: %d\n",raidPtr->numCol);
980 if (raidPtr->numSpare >= RF_MAXSPARE) {
981 RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
982 return(EINVAL);
983 }
984
985 RF_LOCK_MUTEX(raidPtr->mutex);
986
987 /* the beginning of the spares... */
988 disks = &raidPtr->Disks[0][raidPtr->numCol];
989
990 spare_number = raidPtr->numSpare;
991
992 ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
993 &disks[spare_number], 0,
994 raidPtr->numCol + spare_number);
995
996 if (ret)
997 goto fail;
998 if (disks[spare_number].status != rf_ds_optimal) {
999 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
1000 sparePtr->component_name);
1001 ret=EINVAL;
1002 goto fail;
1003 } else {
1004 disks[spare_number].status = rf_ds_spare;
1005 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", spare_number,
1006 disks[spare_number].devname,
1007 (long int) disks[spare_number].numBlocks,
1008 disks[spare_number].blockSize,
1009 (long int) disks[spare_number].numBlocks *
1010 disks[spare_number].blockSize / 1024 / 1024);
1011 }
1012
1013
1014 /* check sizes and block sizes on the spare disk */
1015 bs = 1 << raidPtr->logBytesPerSector;
1016 if (disks[spare_number].blockSize != bs) {
1017 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
1018 ret = EINVAL;
1019 goto fail;
1020 }
1021 if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
1022 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
1023 disks[spare_number].devname,
1024 disks[spare_number].blockSize,
1025 (long int) raidPtr->sectorsPerDisk);
1026 ret = EINVAL;
1027 goto fail;
1028 } else {
1029 if (disks[spare_number].numBlocks >
1030 raidPtr->sectorsPerDisk) {
1031 RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[spare_number].devname,
1032 (long int) raidPtr->sectorsPerDisk);
1033
1034 disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
1035 }
1036 }
1037
1038 spareQueues = &raidPtr->Queues[0][raidPtr->numCol];
1039 ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
1040 0, raidPtr->numCol + spare_number,
1041 raidPtr->qType,
1042 raidPtr->sectorsPerDisk,
1043 raidPtr->Disks[0][raidPtr->numCol +
1044 spare_number].dev,
1045 raidPtr->maxOutstanding,
1046 &raidPtr->shutdownList,
1047 raidPtr->cleanupList);
1048
1049
1050 raidPtr->numSpare++;
1051 RF_UNLOCK_MUTEX(raidPtr->mutex);
1052 return (0);
1053
1054 fail:
1055 RF_UNLOCK_MUTEX(raidPtr->mutex);
1056 return(ret);
1057 }
1058
1059 int
1060 rf_remove_hot_spare(raidPtr,sparePtr)
1061 RF_Raid_t *raidPtr;
1062 RF_SingleComponent_t *sparePtr;
1063 {
1064 int spare_number;
1065
1066
1067 if (raidPtr->numSpare==0) {
1068 printf("No spares to remove!\n");
1069 return(EINVAL);
1070 }
1071
1072 spare_number = sparePtr->column;
1073
1074 return(EINVAL); /* XXX not implemented yet */
1075 #if 0
1076 if (spare_number < 0 || spare_number > raidPtr->numSpare) {
1077 return(EINVAL);
1078 }
1079
1080 /* verify that this spare isn't in use... */
1081
1082
1083
1084
1085 /* it's gone.. */
1086
1087 raidPtr->numSpare--;
1088
1089 return(0);
1090 #endif
1091 }
1092
1093
1094