rf_disks.c revision 1.69.10.2 1 /* $NetBSD: rf_disks.c,v 1.69.10.2 2009/05/04 08:13:16 yamt Exp $ */
2 /*-
3 * Copyright (c) 1999 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 /*
32 * Copyright (c) 1995 Carnegie-Mellon University.
33 * All rights reserved.
34 *
35 * Author: Mark Holland
36 *
37 * Permission to use, copy, modify and distribute this software and
38 * its documentation is hereby granted, provided that both the copyright
39 * notice and this permission notice appear in all copies of the
40 * software, derivative works or modified versions, and any portions
41 * thereof, and that both notices appear in supporting documentation.
42 *
43 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
44 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
45 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 *
47 * Carnegie Mellon requests users of this software to return to
48 *
49 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
50 * School of Computer Science
51 * Carnegie Mellon University
52 * Pittsburgh PA 15213-3890
53 *
54 * any improvements or extensions that they make and grant Carnegie the
55 * rights to redistribute these changes.
56 */
57
58 /***************************************************************
59 * rf_disks.c -- code to perform operations on the actual disks
60 ***************************************************************/
61
62 #include <sys/cdefs.h>
63 __KERNEL_RCSID(0, "$NetBSD: rf_disks.c,v 1.69.10.2 2009/05/04 08:13:16 yamt Exp $");
64
65 #include <dev/raidframe/raidframevar.h>
66
67 #include "rf_raid.h"
68 #include "rf_alloclist.h"
69 #include "rf_utils.h"
70 #include "rf_general.h"
71 #include "rf_options.h"
72 #include "rf_kintf.h"
73 #include "rf_netbsd.h"
74
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/proc.h>
78 #include <sys/ioctl.h>
79 #include <sys/fcntl.h>
80 #include <sys/vnode.h>
81 #include <sys/kauth.h>
82
83 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
84 static void rf_print_label_status( RF_Raid_t *, int, char *,
85 RF_ComponentLabel_t *);
86 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
87 RF_ComponentLabel_t *, int, int );
88
89 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
90 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
91
92 /**************************************************************************
93 *
94 * initialize the disks comprising the array
95 *
96 * We want the spare disks to have regular row,col numbers so that we can
97 * easily substitue a spare for a failed disk. But, the driver code assumes
98 * throughout that the array contains numRow by numCol _non-spare_ disks, so
99 * it's not clear how to fit in the spares. This is an unfortunate holdover
100 * from raidSim. The quick and dirty fix is to make row zero bigger than the
101 * rest, and put all the spares in it. This probably needs to get changed
102 * eventually.
103 *
104 **************************************************************************/
105
106 int
107 rf_ConfigureDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
108 RF_Config_t *cfgPtr)
109 {
110 RF_RaidDisk_t *disks;
111 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
112 RF_RowCol_t c;
113 int bs, ret;
114 unsigned i, count, foundone = 0, numFailuresThisRow;
115 int force;
116
117 force = cfgPtr->force;
118
119 ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
120 if (ret)
121 goto fail;
122
123 disks = raidPtr->Disks;
124
125 numFailuresThisRow = 0;
126 for (c = 0; c < raidPtr->numCol; c++) {
127 ret = rf_ConfigureDisk(raidPtr,
128 &cfgPtr->devnames[0][c][0],
129 &disks[c], c);
130
131 if (ret)
132 goto fail;
133
134 if (disks[c].status == rf_ds_optimal) {
135 raidread_component_label(
136 raidPtr->raid_cinfo[c].ci_dev,
137 raidPtr->raid_cinfo[c].ci_vp,
138 &raidPtr->raid_cinfo[c].ci_label);
139 }
140
141 if (disks[c].status != rf_ds_optimal) {
142 numFailuresThisRow++;
143 } else {
144 if (disks[c].numBlocks < min_numblks)
145 min_numblks = disks[c].numBlocks;
146 DPRINTF6("Disk at col %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n",
147 c, disks[c].devname,
148 disks[c].numBlocks,
149 disks[c].blockSize,
150 (long int) disks[c].numBlocks *
151 disks[c].blockSize / 1024 / 1024);
152 }
153 }
154 /* XXX fix for n-fault tolerant */
155 /* XXX this should probably check to see how many failures
156 we can handle for this configuration! */
157 if (numFailuresThisRow > 0)
158 raidPtr->status = rf_rs_degraded;
159
160 /* all disks must be the same size & have the same block size, bs must
161 * be a power of 2 */
162 bs = 0;
163 foundone = 0;
164 for (c = 0; c < raidPtr->numCol; c++) {
165 if (disks[c].status == rf_ds_optimal) {
166 bs = disks[c].blockSize;
167 foundone = 1;
168 break;
169 }
170 }
171 if (!foundone) {
172 RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
173 ret = EINVAL;
174 goto fail;
175 }
176 for (count = 0, i = 1; i; i <<= 1)
177 if (bs & i)
178 count++;
179 if (count != 1) {
180 RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
181 ret = EINVAL;
182 goto fail;
183 }
184
185 if (rf_CheckLabels( raidPtr, cfgPtr )) {
186 printf("raid%d: There were fatal errors\n", raidPtr->raidid);
187 if (force != 0) {
188 printf("raid%d: Fatal errors being ignored.\n",
189 raidPtr->raidid);
190 } else {
191 ret = EINVAL;
192 goto fail;
193 }
194 }
195
196 for (c = 0; c < raidPtr->numCol; c++) {
197 if (disks[c].status == rf_ds_optimal) {
198 if (disks[c].blockSize != bs) {
199 RF_ERRORMSG1("Error: block size of disk at c %d different from disk at c 0\n", c);
200 ret = EINVAL;
201 goto fail;
202 }
203 if (disks[c].numBlocks != min_numblks) {
204 RF_ERRORMSG2("WARNING: truncating disk at c %d to %d blocks\n",
205 c, (int) min_numblks);
206 disks[c].numBlocks = min_numblks;
207 }
208 }
209 }
210
211 raidPtr->sectorsPerDisk = min_numblks;
212 raidPtr->logBytesPerSector = ffs(bs) - 1;
213 raidPtr->bytesPerSector = bs;
214 raidPtr->sectorMask = bs - 1;
215 return (0);
216
217 fail:
218
219 rf_UnconfigureVnodes( raidPtr );
220
221 return (ret);
222 }
223
224
225 /****************************************************************************
226 * set up the data structures describing the spare disks in the array
227 * recall from the above comment that the spare disk descriptors are stored
228 * in row zero, which is specially expanded to hold them.
229 ****************************************************************************/
230 int
231 rf_ConfigureSpareDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
232 RF_Config_t *cfgPtr)
233 {
234 int i, ret;
235 unsigned int bs;
236 RF_RaidDisk_t *disks;
237 int num_spares_done;
238
239 num_spares_done = 0;
240
241 /* The space for the spares should have already been allocated by
242 * ConfigureDisks() */
243
244 disks = &raidPtr->Disks[raidPtr->numCol];
245 for (i = 0; i < raidPtr->numSpare; i++) {
246 ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
247 &disks[i], raidPtr->numCol + i);
248 if (ret)
249 goto fail;
250 if (disks[i].status != rf_ds_optimal) {
251 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
252 &cfgPtr->spare_names[i][0]);
253 } else {
254 disks[i].status = rf_ds_spare; /* change status to
255 * spare */
256 DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n", i,
257 disks[i].devname,
258 disks[i].numBlocks, disks[i].blockSize,
259 (long int) disks[i].numBlocks *
260 disks[i].blockSize / 1024 / 1024);
261 }
262 num_spares_done++;
263 }
264
265 /* check sizes and block sizes on spare disks */
266 bs = 1 << raidPtr->logBytesPerSector;
267 for (i = 0; i < raidPtr->numSpare; i++) {
268 if (disks[i].blockSize != bs) {
269 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
270 ret = EINVAL;
271 goto fail;
272 }
273 if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
274 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n",
275 disks[i].devname, disks[i].blockSize,
276 raidPtr->sectorsPerDisk);
277 ret = EINVAL;
278 goto fail;
279 } else
280 if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
281 RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n",
282 disks[i].devname,
283 raidPtr->sectorsPerDisk,
284 disks[i].numBlocks);
285
286 disks[i].numBlocks = raidPtr->sectorsPerDisk;
287 }
288 }
289
290 return (0);
291
292 fail:
293
294 /* Release the hold on the main components. We've failed to allocate
295 * a spare, and since we're failing, we need to free things..
296
297 XXX failing to allocate a spare is *not* that big of a deal...
298 We *can* survive without it, if need be, esp. if we get hot
299 adding working.
300
301 If we don't fail out here, then we need a way to remove this spare...
302 that should be easier to do here than if we are "live"...
303
304 */
305
306 rf_UnconfigureVnodes( raidPtr );
307
308 return (ret);
309 }
310
311 static int
312 rf_AllocDiskStructures(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
313 {
314 int ret;
315
316 /* We allocate RF_MAXSPARE on the first row so that we
317 have room to do hot-swapping of spares */
318 RF_MallocAndAdd(raidPtr->Disks, (raidPtr->numCol + RF_MAXSPARE) *
319 sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
320 raidPtr->cleanupList);
321 if (raidPtr->Disks == NULL) {
322 ret = ENOMEM;
323 goto fail;
324 }
325
326 /* get space for device specific stuff.. */
327 RF_MallocAndAdd(raidPtr->raid_cinfo,
328 (raidPtr->numCol + RF_MAXSPARE) *
329 sizeof(struct raidcinfo), (struct raidcinfo *),
330 raidPtr->cleanupList);
331
332 if (raidPtr->raid_cinfo == NULL) {
333 ret = ENOMEM;
334 goto fail;
335 }
336
337 return(0);
338 fail:
339 rf_UnconfigureVnodes( raidPtr );
340
341 return(ret);
342 }
343
344
345 /* configure a single disk during auto-configuration at boot */
346 int
347 rf_AutoConfigureDisks(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr,
348 RF_AutoConfig_t *auto_config)
349 {
350 RF_RaidDisk_t *disks;
351 RF_RaidDisk_t *diskPtr;
352 RF_RowCol_t c;
353 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
354 int bs, ret;
355 int numFailuresThisRow;
356 RF_AutoConfig_t *ac;
357 int parity_good;
358 int mod_counter;
359 int mod_counter_found;
360
361 #if DEBUG
362 printf("Starting autoconfiguration of RAID set...\n");
363 #endif
364
365 ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
366 if (ret)
367 goto fail;
368
369 disks = raidPtr->Disks;
370
371 /* assume the parity will be fine.. */
372 parity_good = RF_RAID_CLEAN;
373
374 /* Check for mod_counters that are too low */
375 mod_counter_found = 0;
376 mod_counter = 0;
377 ac = auto_config;
378 while(ac!=NULL) {
379 if (mod_counter_found==0) {
380 mod_counter = ac->clabel->mod_counter;
381 mod_counter_found = 1;
382 } else {
383 if (ac->clabel->mod_counter > mod_counter) {
384 mod_counter = ac->clabel->mod_counter;
385 }
386 }
387 ac->flag = 0; /* clear the general purpose flag */
388 ac = ac->next;
389 }
390
391 bs = 0;
392
393 numFailuresThisRow = 0;
394 for (c = 0; c < raidPtr->numCol; c++) {
395 diskPtr = &disks[c];
396
397 /* find this row/col in the autoconfig */
398 #if DEBUG
399 printf("Looking for %d in autoconfig\n",c);
400 #endif
401 ac = auto_config;
402 while(ac!=NULL) {
403 if (ac->clabel==NULL) {
404 /* big-time bad news. */
405 goto fail;
406 }
407 if ((ac->clabel->column == c) &&
408 (ac->clabel->mod_counter == mod_counter)) {
409 /* it's this one... */
410 /* flag it as 'used', so we don't
411 free it later. */
412 ac->flag = 1;
413 #if DEBUG
414 printf("Found: %s at %d\n",
415 ac->devname,c);
416 #endif
417
418 break;
419 }
420 ac=ac->next;
421 }
422
423 if (ac==NULL) {
424 /* we didn't find an exact match with a
425 correct mod_counter above... can we find
426 one with an incorrect mod_counter to use
427 instead? (this one, if we find it, will be
428 marked as failed once the set configures)
429 */
430
431 ac = auto_config;
432 while(ac!=NULL) {
433 if (ac->clabel==NULL) {
434 /* big-time bad news. */
435 goto fail;
436 }
437 if (ac->clabel->column == c) {
438 /* it's this one...
439 flag it as 'used', so we
440 don't free it later. */
441 ac->flag = 1;
442 #if DEBUG
443 printf("Found(low mod_counter): %s at %d\n",
444 ac->devname,c);
445 #endif
446
447 break;
448 }
449 ac=ac->next;
450 }
451 }
452
453
454
455 if (ac!=NULL) {
456 /* Found it. Configure it.. */
457 diskPtr->blockSize = ac->clabel->blockSize;
458 diskPtr->numBlocks = ac->clabel->numBlocks;
459 /* Note: rf_protectedSectors is already
460 factored into numBlocks here */
461 raidPtr->raid_cinfo[c].ci_vp = ac->vp;
462 raidPtr->raid_cinfo[c].ci_dev = ac->dev;
463
464 memcpy(&raidPtr->raid_cinfo[c].ci_label,
465 ac->clabel, sizeof(*ac->clabel));
466 snprintf(diskPtr->devname, sizeof(diskPtr->devname),
467 "/dev/%s", ac->devname);
468
469 /* note the fact that this component was
470 autoconfigured. You'll need this info
471 later. Trust me :) */
472 diskPtr->auto_configured = 1;
473 diskPtr->dev = ac->dev;
474
475 /*
476 * we allow the user to specify that
477 * only a fraction of the disks should
478 * be used this is just for debug: it
479 * speeds up the parity scan
480 */
481
482 diskPtr->numBlocks = diskPtr->numBlocks *
483 rf_sizePercentage / 100;
484
485 /* XXX these will get set multiple times,
486 but since we're autoconfiguring, they'd
487 better be always the same each time!
488 If not, this is the least of your worries */
489
490 bs = diskPtr->blockSize;
491 min_numblks = diskPtr->numBlocks;
492
493 /* this gets done multiple times, but that's
494 fine -- the serial number will be the same
495 for all components, guaranteed */
496 raidPtr->serial_number = ac->clabel->serial_number;
497 /* check the last time the label was modified */
498
499 if (ac->clabel->mod_counter != mod_counter) {
500 /* Even though we've filled in all of
501 the above, we don't trust this
502 component since it's modification
503 counter is not in sync with the
504 rest, and we really consider it to
505 be failed. */
506 disks[c].status = rf_ds_failed;
507 numFailuresThisRow++;
508 } else {
509 if (ac->clabel->clean != RF_RAID_CLEAN) {
510 parity_good = RF_RAID_DIRTY;
511 }
512 }
513 } else {
514 /* Didn't find it at all!! Component must
515 really be dead */
516 disks[c].status = rf_ds_failed;
517 snprintf(disks[c].devname, sizeof(disks[c].devname),
518 "component%d", c);
519 numFailuresThisRow++;
520 }
521 }
522 /* XXX fix for n-fault tolerant */
523 /* XXX this should probably check to see how many failures
524 we can handle for this configuration! */
525 if (numFailuresThisRow > 0) {
526 raidPtr->status = rf_rs_degraded;
527 raidPtr->numFailures = numFailuresThisRow;
528 }
529
530 /* close the device for the ones that didn't get used */
531
532 ac = auto_config;
533 while(ac!=NULL) {
534 if (ac->flag == 0) {
535 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
536 VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
537 vput(ac->vp);
538 ac->vp = NULL;
539 #if DEBUG
540 printf("Released %s from auto-config set.\n",
541 ac->devname);
542 #endif
543 }
544 ac = ac->next;
545 }
546
547 raidPtr->mod_counter = mod_counter;
548
549 /* note the state of the parity, if any */
550 raidPtr->parity_good = parity_good;
551 raidPtr->sectorsPerDisk = min_numblks;
552 raidPtr->logBytesPerSector = ffs(bs) - 1;
553 raidPtr->bytesPerSector = bs;
554 raidPtr->sectorMask = bs - 1;
555 return (0);
556
557 fail:
558
559 rf_UnconfigureVnodes( raidPtr );
560
561 return (ret);
562
563 }
564
565 /* configure a single disk in the array */
566 int
567 rf_ConfigureDisk(RF_Raid_t *raidPtr, char *bf, RF_RaidDisk_t *diskPtr,
568 RF_RowCol_t col)
569 {
570 char *p;
571 struct vnode *vp;
572 struct vattr va;
573 int error;
574
575 p = rf_find_non_white(bf);
576 if (p[strlen(p) - 1] == '\n') {
577 /* strip off the newline */
578 p[strlen(p) - 1] = '\0';
579 }
580 (void) strcpy(diskPtr->devname, p);
581
582 /* Let's start by claiming the component is fine and well... */
583 diskPtr->status = rf_ds_optimal;
584
585 raidPtr->raid_cinfo[col].ci_vp = NULL;
586 raidPtr->raid_cinfo[col].ci_dev = 0;
587
588 if (!strcmp("absent", diskPtr->devname)) {
589 printf("Ignoring missing component at column %d\n", col);
590 sprintf(diskPtr->devname, "component%d", col);
591 diskPtr->status = rf_ds_failed;
592 return (0);
593 }
594
595 error = dk_lookup(diskPtr->devname, curlwp, &vp, UIO_SYSSPACE);
596 if (error) {
597 printf("dk_lookup on device: %s failed!\n", diskPtr->devname);
598 if (error == ENXIO) {
599 /* the component isn't there... must be dead :-( */
600 diskPtr->status = rf_ds_failed;
601 } else {
602 return (error);
603 }
604 }
605 if (diskPtr->status == rf_ds_optimal) {
606
607 if ((error = VOP_GETATTR(vp, &va, curlwp->l_cred)) != 0)
608 return (error);
609 if ((error = rf_getdisksize(vp, curlwp, diskPtr)) != 0)
610 return (error);
611
612 raidPtr->raid_cinfo[col].ci_vp = vp;
613 raidPtr->raid_cinfo[col].ci_dev = va.va_rdev;
614
615 /* This component was not automatically configured */
616 diskPtr->auto_configured = 0;
617 diskPtr->dev = va.va_rdev;
618
619 /* we allow the user to specify that only a fraction of the
620 * disks should be used this is just for debug: it speeds up
621 * the parity scan */
622 diskPtr->numBlocks = diskPtr->numBlocks *
623 rf_sizePercentage / 100;
624 }
625 return (0);
626 }
627
628 static void
629 rf_print_label_status(RF_Raid_t *raidPtr, int column, char *dev_name,
630 RF_ComponentLabel_t *ci_label)
631 {
632
633 printf("raid%d: Component %s being configured at col: %d\n",
634 raidPtr->raidid, dev_name, column );
635 printf(" Column: %d Num Columns: %d\n",
636 ci_label->column,
637 ci_label->num_columns);
638 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
639 ci_label->version, ci_label->serial_number,
640 ci_label->mod_counter);
641 printf(" Clean: %s Status: %d\n",
642 ci_label->clean ? "Yes" : "No", ci_label->status );
643 }
644
645 static int rf_check_label_vitals(RF_Raid_t *raidPtr, int row, int column,
646 char *dev_name, RF_ComponentLabel_t *ci_label,
647 int serial_number, int mod_counter)
648 {
649 int fatal_error = 0;
650
651 if (serial_number != ci_label->serial_number) {
652 printf("%s has a different serial number: %d %d\n",
653 dev_name, serial_number, ci_label->serial_number);
654 fatal_error = 1;
655 }
656 if (mod_counter != ci_label->mod_counter) {
657 printf("%s has a different modification count: %d %d\n",
658 dev_name, mod_counter, ci_label->mod_counter);
659 }
660
661 if (row != ci_label->row) {
662 printf("Row out of alignment for: %s\n", dev_name);
663 fatal_error = 1;
664 }
665 if (column != ci_label->column) {
666 printf("Column out of alignment for: %s\n", dev_name);
667 fatal_error = 1;
668 }
669 if (raidPtr->numCol != ci_label->num_columns) {
670 printf("Number of columns do not match for: %s\n", dev_name);
671 fatal_error = 1;
672 }
673 if (ci_label->clean == 0) {
674 /* it's not clean, but that's not fatal */
675 printf("%s is not clean!\n", dev_name);
676 }
677 return(fatal_error);
678 }
679
680
681 /*
682
683 rf_CheckLabels() - check all the component labels for consistency.
684 Return an error if there is anything major amiss.
685
686 */
687
688 int
689 rf_CheckLabels(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
690 {
691 int c;
692 char *dev_name;
693 RF_ComponentLabel_t *ci_label;
694 int serial_number = 0;
695 int mod_number = 0;
696 int fatal_error = 0;
697 int mod_values[4];
698 int mod_count[4];
699 int ser_values[4];
700 int ser_count[4];
701 int num_ser;
702 int num_mod;
703 int i;
704 int found;
705 int hosed_column;
706 int too_fatal;
707 int parity_good;
708 int force;
709
710 hosed_column = -1;
711 too_fatal = 0;
712 force = cfgPtr->force;
713
714 /*
715 We're going to try to be a little intelligent here. If one
716 component's label is bogus, and we can identify that it's the
717 *only* one that's gone, we'll mark it as "failed" and allow
718 the configuration to proceed. This will be the *only* case
719 that we'll proceed if there would be (otherwise) fatal errors.
720
721 Basically we simply keep a count of how many components had
722 what serial number. If all but one agree, we simply mark
723 the disagreeing component as being failed, and allow
724 things to come up "normally".
725
726 We do this first for serial numbers, and then for "mod_counter".
727
728 */
729
730 num_ser = 0;
731 num_mod = 0;
732
733 for (c = 0; c < raidPtr->numCol; c++) {
734 ci_label = &raidPtr->raid_cinfo[c].ci_label;
735 found=0;
736 for(i=0;i<num_ser;i++) {
737 if (ser_values[i] == ci_label->serial_number) {
738 ser_count[i]++;
739 found=1;
740 break;
741 }
742 }
743 if (!found) {
744 ser_values[num_ser] = ci_label->serial_number;
745 ser_count[num_ser] = 1;
746 num_ser++;
747 if (num_ser>2) {
748 fatal_error = 1;
749 break;
750 }
751 }
752 found=0;
753 for(i=0;i<num_mod;i++) {
754 if (mod_values[i] == ci_label->mod_counter) {
755 mod_count[i]++;
756 found=1;
757 break;
758 }
759 }
760 if (!found) {
761 mod_values[num_mod] = ci_label->mod_counter;
762 mod_count[num_mod] = 1;
763 num_mod++;
764 if (num_mod>2) {
765 fatal_error = 1;
766 break;
767 }
768 }
769 }
770 #if DEBUG
771 printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
772 for(i=0;i<num_ser;i++) {
773 printf("%d %d\n", ser_values[i], ser_count[i]);
774 }
775 printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
776 for(i=0;i<num_mod;i++) {
777 printf("%d %d\n", mod_values[i], mod_count[i]);
778 }
779 #endif
780 serial_number = ser_values[0];
781 if (num_ser == 2) {
782 if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
783 /* Locate the maverick component */
784 if (ser_count[1] > ser_count[0]) {
785 serial_number = ser_values[1];
786 }
787
788 for (c = 0; c < raidPtr->numCol; c++) {
789 ci_label = &raidPtr->raid_cinfo[c].ci_label;
790 if (serial_number != ci_label->serial_number) {
791 hosed_column = c;
792 break;
793 }
794 }
795 printf("Hosed component: %s\n",
796 &cfgPtr->devnames[0][hosed_column][0]);
797 if (!force) {
798 /* we'll fail this component, as if there are
799 other major errors, we arn't forcing things
800 and we'll abort the config anyways */
801 raidPtr->Disks[hosed_column].status
802 = rf_ds_failed;
803 raidPtr->numFailures++;
804 raidPtr->status = rf_rs_degraded;
805 }
806 } else {
807 too_fatal = 1;
808 }
809 if (cfgPtr->parityConfig == '0') {
810 /* We've identified two different serial numbers.
811 RAID 0 can't cope with that, so we'll punt */
812 too_fatal = 1;
813 }
814
815 }
816
817 /* record the serial number for later. If we bail later, setting
818 this doesn't matter, otherwise we've got the best guess at the
819 correct serial number */
820 raidPtr->serial_number = serial_number;
821
822 mod_number = mod_values[0];
823 if (num_mod == 2) {
824 if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
825 /* Locate the maverick component */
826 if (mod_count[1] > mod_count[0]) {
827 mod_number = mod_values[1];
828 } else if (mod_count[1] < mod_count[0]) {
829 mod_number = mod_values[0];
830 } else {
831 /* counts of different modification values
832 are the same. Assume greater value is
833 the correct one, all other things
834 considered */
835 if (mod_values[0] > mod_values[1]) {
836 mod_number = mod_values[0];
837 } else {
838 mod_number = mod_values[1];
839 }
840
841 }
842
843 for (c = 0; c < raidPtr->numCol; c++) {
844 ci_label = &raidPtr->raid_cinfo[c].ci_label;
845 if (mod_number != ci_label->mod_counter) {
846 if (hosed_column == c) {
847 /* same one. Can
848 deal with it. */
849 } else {
850 hosed_column = c;
851 if (num_ser != 1) {
852 too_fatal = 1;
853 break;
854 }
855 }
856 }
857 }
858 printf("Hosed component: %s\n",
859 &cfgPtr->devnames[0][hosed_column][0]);
860 if (!force) {
861 /* we'll fail this component, as if there are
862 other major errors, we arn't forcing things
863 and we'll abort the config anyways */
864 if (raidPtr->Disks[hosed_column].status != rf_ds_failed) {
865 raidPtr->Disks[hosed_column].status
866 = rf_ds_failed;
867 raidPtr->numFailures++;
868 raidPtr->status = rf_rs_degraded;
869 }
870 }
871 } else {
872 too_fatal = 1;
873 }
874 if (cfgPtr->parityConfig == '0') {
875 /* We've identified two different mod counters.
876 RAID 0 can't cope with that, so we'll punt */
877 too_fatal = 1;
878 }
879 }
880
881 raidPtr->mod_counter = mod_number;
882
883 if (too_fatal) {
884 /* we've had both a serial number mismatch, and a mod_counter
885 mismatch -- and they involved two different components!!
886 Bail -- make things fail so that the user must force
887 the issue... */
888 hosed_column = -1;
889 fatal_error = 1;
890 }
891
892 if (num_ser > 2) {
893 printf("raid%d: Too many different serial numbers!\n",
894 raidPtr->raidid);
895 fatal_error = 1;
896 }
897
898 if (num_mod > 2) {
899 printf("raid%d: Too many different mod counters!\n",
900 raidPtr->raidid);
901 fatal_error = 1;
902 }
903
904 /* we start by assuming the parity will be good, and flee from
905 that notion at the slightest sign of trouble */
906
907 parity_good = RF_RAID_CLEAN;
908
909 for (c = 0; c < raidPtr->numCol; c++) {
910 dev_name = &cfgPtr->devnames[0][c][0];
911 ci_label = &raidPtr->raid_cinfo[c].ci_label;
912
913 if (c == hosed_column) {
914 printf("raid%d: Ignoring %s\n",
915 raidPtr->raidid, dev_name);
916 } else {
917 rf_print_label_status( raidPtr, c, dev_name, ci_label);
918 if (rf_check_label_vitals( raidPtr, 0, c,
919 dev_name, ci_label,
920 serial_number,
921 mod_number )) {
922 fatal_error = 1;
923 }
924 if (ci_label->clean != RF_RAID_CLEAN) {
925 parity_good = RF_RAID_DIRTY;
926 }
927 }
928 }
929
930 if (fatal_error) {
931 parity_good = RF_RAID_DIRTY;
932 }
933
934 /* we note the state of the parity */
935 raidPtr->parity_good = parity_good;
936
937 return(fatal_error);
938 }
939
940 int
941 rf_add_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
942 {
943 RF_RaidDisk_t *disks;
944 RF_DiskQueue_t *spareQueues;
945 int ret;
946 unsigned int bs;
947 int spare_number;
948
949 ret=0;
950
951 if (raidPtr->numSpare >= RF_MAXSPARE) {
952 RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
953 return(EINVAL);
954 }
955
956 RF_LOCK_MUTEX(raidPtr->mutex);
957 while (raidPtr->adding_hot_spare==1) {
958 ltsleep(&(raidPtr->adding_hot_spare), PRIBIO, "raidhs", 0,
959 &(raidPtr->mutex));
960 }
961 raidPtr->adding_hot_spare=1;
962 RF_UNLOCK_MUTEX(raidPtr->mutex);
963
964 /* the beginning of the spares... */
965 disks = &raidPtr->Disks[raidPtr->numCol];
966
967 spare_number = raidPtr->numSpare;
968
969 ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
970 &disks[spare_number],
971 raidPtr->numCol + spare_number);
972
973 if (ret)
974 goto fail;
975 if (disks[spare_number].status != rf_ds_optimal) {
976 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
977 sparePtr->component_name);
978 rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
979 ret=EINVAL;
980 goto fail;
981 } else {
982 disks[spare_number].status = rf_ds_spare;
983 DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n",
984 spare_number,
985 disks[spare_number].devname,
986 disks[spare_number].numBlocks,
987 disks[spare_number].blockSize,
988 (long int) disks[spare_number].numBlocks *
989 disks[spare_number].blockSize / 1024 / 1024);
990 }
991
992
993 /* check sizes and block sizes on the spare disk */
994 bs = 1 << raidPtr->logBytesPerSector;
995 if (disks[spare_number].blockSize != bs) {
996 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
997 rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
998 ret = EINVAL;
999 goto fail;
1000 }
1001 if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
1002 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n",
1003 disks[spare_number].devname,
1004 disks[spare_number].blockSize,
1005 raidPtr->sectorsPerDisk);
1006 rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
1007 ret = EINVAL;
1008 goto fail;
1009 } else {
1010 if (disks[spare_number].numBlocks >
1011 raidPtr->sectorsPerDisk) {
1012 RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n",
1013 disks[spare_number].devname,
1014 raidPtr->sectorsPerDisk,
1015 disks[spare_number].numBlocks);
1016
1017 disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
1018 }
1019 }
1020
1021 spareQueues = &raidPtr->Queues[raidPtr->numCol];
1022 ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
1023 raidPtr->numCol + spare_number,
1024 raidPtr->qType,
1025 raidPtr->sectorsPerDisk,
1026 raidPtr->Disks[raidPtr->numCol +
1027 spare_number].dev,
1028 raidPtr->maxOutstanding,
1029 &raidPtr->shutdownList,
1030 raidPtr->cleanupList);
1031
1032 RF_LOCK_MUTEX(raidPtr->mutex);
1033 raidPtr->numSpare++;
1034 RF_UNLOCK_MUTEX(raidPtr->mutex);
1035
1036 fail:
1037 RF_LOCK_MUTEX(raidPtr->mutex);
1038 raidPtr->adding_hot_spare=0;
1039 wakeup(&(raidPtr->adding_hot_spare));
1040 RF_UNLOCK_MUTEX(raidPtr->mutex);
1041
1042 return(ret);
1043 }
1044
1045 int
1046 rf_remove_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
1047 {
1048 int spare_number;
1049
1050
1051 if (raidPtr->numSpare==0) {
1052 printf("No spares to remove!\n");
1053 return(EINVAL);
1054 }
1055
1056 spare_number = sparePtr->column;
1057
1058 return(EINVAL); /* XXX not implemented yet */
1059 #if 0
1060 if (spare_number < 0 || spare_number > raidPtr->numSpare) {
1061 return(EINVAL);
1062 }
1063
1064 /* verify that this spare isn't in use... */
1065
1066
1067
1068
1069 /* it's gone.. */
1070
1071 raidPtr->numSpare--;
1072
1073 return(0);
1074 #endif
1075 }
1076
1077
1078 int
1079 rf_delete_component(RF_Raid_t *raidPtr, RF_SingleComponent_t *component)
1080 {
1081 RF_RaidDisk_t *disks;
1082
1083 if ((component->column < 0) ||
1084 (component->column >= raidPtr->numCol)) {
1085 return(EINVAL);
1086 }
1087
1088 disks = &raidPtr->Disks[component->column];
1089
1090 /* 1. This component must be marked as 'failed' */
1091
1092 return(EINVAL); /* Not implemented yet. */
1093 }
1094
1095 int
1096 rf_incorporate_hot_spare(RF_Raid_t *raidPtr,
1097 RF_SingleComponent_t *component)
1098 {
1099
1100 /* Issues here include how to 'move' this in if there is IO
1101 taking place (e.g. component queues and such) */
1102
1103 return(EINVAL); /* Not implemented yet. */
1104 }
1105