rf_disks.c revision 1.69.10.3 1 /* $NetBSD: rf_disks.c,v 1.69.10.3 2010/03/11 15:04:01 yamt Exp $ */
2 /*-
3 * Copyright (c) 1999 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 /*
32 * Copyright (c) 1995 Carnegie-Mellon University.
33 * All rights reserved.
34 *
35 * Author: Mark Holland
36 *
37 * Permission to use, copy, modify and distribute this software and
38 * its documentation is hereby granted, provided that both the copyright
39 * notice and this permission notice appear in all copies of the
40 * software, derivative works or modified versions, and any portions
41 * thereof, and that both notices appear in supporting documentation.
42 *
43 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
44 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
45 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 *
47 * Carnegie Mellon requests users of this software to return to
48 *
49 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
50 * School of Computer Science
51 * Carnegie Mellon University
52 * Pittsburgh PA 15213-3890
53 *
54 * any improvements or extensions that they make and grant Carnegie the
55 * rights to redistribute these changes.
56 */
57
58 /***************************************************************
59 * rf_disks.c -- code to perform operations on the actual disks
60 ***************************************************************/
61
62 #include <sys/cdefs.h>
63 __KERNEL_RCSID(0, "$NetBSD: rf_disks.c,v 1.69.10.3 2010/03/11 15:04:01 yamt Exp $");
64
65 #include <dev/raidframe/raidframevar.h>
66
67 #include "rf_raid.h"
68 #include "rf_alloclist.h"
69 #include "rf_utils.h"
70 #include "rf_general.h"
71 #include "rf_options.h"
72 #include "rf_kintf.h"
73 #include "rf_netbsd.h"
74
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/proc.h>
78 #include <sys/ioctl.h>
79 #include <sys/fcntl.h>
80 #include <sys/vnode.h>
81 #include <sys/kauth.h>
82
83 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
84 static void rf_print_label_status( RF_Raid_t *, int, char *,
85 RF_ComponentLabel_t *);
86 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
87 RF_ComponentLabel_t *, int, int );
88
89 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
90 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
91
92 /**************************************************************************
93 *
94 * initialize the disks comprising the array
95 *
96 * We want the spare disks to have regular row,col numbers so that we can
97 * easily substitue a spare for a failed disk. But, the driver code assumes
98 * throughout that the array contains numRow by numCol _non-spare_ disks, so
99 * it's not clear how to fit in the spares. This is an unfortunate holdover
100 * from raidSim. The quick and dirty fix is to make row zero bigger than the
101 * rest, and put all the spares in it. This probably needs to get changed
102 * eventually.
103 *
104 **************************************************************************/
105
106 int
107 rf_ConfigureDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
108 RF_Config_t *cfgPtr)
109 {
110 RF_RaidDisk_t *disks;
111 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
112 RF_RowCol_t c;
113 int bs, ret;
114 unsigned i, count, foundone = 0, numFailuresThisRow;
115 int force;
116
117 force = cfgPtr->force;
118
119 ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
120 if (ret)
121 goto fail;
122
123 disks = raidPtr->Disks;
124
125 numFailuresThisRow = 0;
126 for (c = 0; c < raidPtr->numCol; c++) {
127 ret = rf_ConfigureDisk(raidPtr,
128 &cfgPtr->devnames[0][c][0],
129 &disks[c], c);
130
131 if (ret)
132 goto fail;
133
134 if (disks[c].status == rf_ds_optimal) {
135 ret = raidfetch_component_label(raidPtr, c);
136 if (ret)
137 goto fail;
138 }
139
140 if (disks[c].status != rf_ds_optimal) {
141 numFailuresThisRow++;
142 } else {
143 if (disks[c].numBlocks < min_numblks)
144 min_numblks = disks[c].numBlocks;
145 DPRINTF6("Disk at col %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n",
146 c, disks[c].devname,
147 disks[c].numBlocks,
148 disks[c].blockSize,
149 (long int) disks[c].numBlocks *
150 disks[c].blockSize / 1024 / 1024);
151 }
152 }
153 /* XXX fix for n-fault tolerant */
154 /* XXX this should probably check to see how many failures
155 we can handle for this configuration! */
156 if (numFailuresThisRow > 0)
157 raidPtr->status = rf_rs_degraded;
158
159 /* all disks must be the same size & have the same block size, bs must
160 * be a power of 2 */
161 bs = 0;
162 foundone = 0;
163 for (c = 0; c < raidPtr->numCol; c++) {
164 if (disks[c].status == rf_ds_optimal) {
165 bs = disks[c].blockSize;
166 foundone = 1;
167 break;
168 }
169 }
170 if (!foundone) {
171 RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
172 ret = EINVAL;
173 goto fail;
174 }
175 for (count = 0, i = 1; i; i <<= 1)
176 if (bs & i)
177 count++;
178 if (count != 1) {
179 RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
180 ret = EINVAL;
181 goto fail;
182 }
183
184 if (rf_CheckLabels( raidPtr, cfgPtr )) {
185 printf("raid%d: There were fatal errors\n", raidPtr->raidid);
186 if (force != 0) {
187 printf("raid%d: Fatal errors being ignored.\n",
188 raidPtr->raidid);
189 } else {
190 ret = EINVAL;
191 goto fail;
192 }
193 }
194
195 for (c = 0; c < raidPtr->numCol; c++) {
196 if (disks[c].status == rf_ds_optimal) {
197 if (disks[c].blockSize != bs) {
198 RF_ERRORMSG1("Error: block size of disk at c %d different from disk at c 0\n", c);
199 ret = EINVAL;
200 goto fail;
201 }
202 if (disks[c].numBlocks != min_numblks) {
203 RF_ERRORMSG2("WARNING: truncating disk at c %d to %d blocks\n",
204 c, (int) min_numblks);
205 disks[c].numBlocks = min_numblks;
206 }
207 }
208 }
209
210 raidPtr->sectorsPerDisk = min_numblks;
211 raidPtr->logBytesPerSector = ffs(bs) - 1;
212 raidPtr->bytesPerSector = bs;
213 raidPtr->sectorMask = bs - 1;
214 return (0);
215
216 fail:
217
218 rf_UnconfigureVnodes( raidPtr );
219
220 return (ret);
221 }
222
223
224 /****************************************************************************
225 * set up the data structures describing the spare disks in the array
226 * recall from the above comment that the spare disk descriptors are stored
227 * in row zero, which is specially expanded to hold them.
228 ****************************************************************************/
229 int
230 rf_ConfigureSpareDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
231 RF_Config_t *cfgPtr)
232 {
233 int i, ret;
234 unsigned int bs;
235 RF_RaidDisk_t *disks;
236 int num_spares_done;
237
238 num_spares_done = 0;
239
240 /* The space for the spares should have already been allocated by
241 * ConfigureDisks() */
242
243 disks = &raidPtr->Disks[raidPtr->numCol];
244 for (i = 0; i < raidPtr->numSpare; i++) {
245 ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
246 &disks[i], raidPtr->numCol + i);
247 if (ret)
248 goto fail;
249 if (disks[i].status != rf_ds_optimal) {
250 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
251 &cfgPtr->spare_names[i][0]);
252 } else {
253 disks[i].status = rf_ds_spare; /* change status to
254 * spare */
255 DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n", i,
256 disks[i].devname,
257 disks[i].numBlocks, disks[i].blockSize,
258 (long int) disks[i].numBlocks *
259 disks[i].blockSize / 1024 / 1024);
260 }
261 num_spares_done++;
262 }
263
264 /* check sizes and block sizes on spare disks */
265 bs = 1 << raidPtr->logBytesPerSector;
266 for (i = 0; i < raidPtr->numSpare; i++) {
267 if (disks[i].blockSize != bs) {
268 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
269 ret = EINVAL;
270 goto fail;
271 }
272 if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
273 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n",
274 disks[i].devname, disks[i].blockSize,
275 raidPtr->sectorsPerDisk);
276 ret = EINVAL;
277 goto fail;
278 } else
279 if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
280 RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n",
281 disks[i].devname,
282 raidPtr->sectorsPerDisk,
283 disks[i].numBlocks);
284
285 disks[i].numBlocks = raidPtr->sectorsPerDisk;
286 }
287 }
288
289 return (0);
290
291 fail:
292
293 /* Release the hold on the main components. We've failed to allocate
294 * a spare, and since we're failing, we need to free things..
295
296 XXX failing to allocate a spare is *not* that big of a deal...
297 We *can* survive without it, if need be, esp. if we get hot
298 adding working.
299
300 If we don't fail out here, then we need a way to remove this spare...
301 that should be easier to do here than if we are "live"...
302
303 */
304
305 rf_UnconfigureVnodes( raidPtr );
306
307 return (ret);
308 }
309
310 static int
311 rf_AllocDiskStructures(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
312 {
313 int ret;
314
315 /* We allocate RF_MAXSPARE on the first row so that we
316 have room to do hot-swapping of spares */
317 RF_MallocAndAdd(raidPtr->Disks, (raidPtr->numCol + RF_MAXSPARE) *
318 sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
319 raidPtr->cleanupList);
320 if (raidPtr->Disks == NULL) {
321 ret = ENOMEM;
322 goto fail;
323 }
324
325 /* get space for device specific stuff.. */
326 RF_MallocAndAdd(raidPtr->raid_cinfo,
327 (raidPtr->numCol + RF_MAXSPARE) *
328 sizeof(struct raidcinfo), (struct raidcinfo *),
329 raidPtr->cleanupList);
330
331 if (raidPtr->raid_cinfo == NULL) {
332 ret = ENOMEM;
333 goto fail;
334 }
335
336 return(0);
337 fail:
338 rf_UnconfigureVnodes( raidPtr );
339
340 return(ret);
341 }
342
343
344 /* configure a single disk during auto-configuration at boot */
345 int
346 rf_AutoConfigureDisks(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr,
347 RF_AutoConfig_t *auto_config)
348 {
349 RF_RaidDisk_t *disks;
350 RF_RaidDisk_t *diskPtr;
351 RF_RowCol_t c;
352 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
353 int bs, ret;
354 int numFailuresThisRow;
355 RF_AutoConfig_t *ac;
356 int parity_good;
357 int mod_counter;
358 int mod_counter_found;
359
360 #if DEBUG
361 printf("Starting autoconfiguration of RAID set...\n");
362 #endif
363
364 ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
365 if (ret)
366 goto fail;
367
368 disks = raidPtr->Disks;
369
370 /* assume the parity will be fine.. */
371 parity_good = RF_RAID_CLEAN;
372
373 /* Check for mod_counters that are too low */
374 mod_counter_found = 0;
375 mod_counter = 0;
376 ac = auto_config;
377 while(ac!=NULL) {
378 if (mod_counter_found==0) {
379 mod_counter = ac->clabel->mod_counter;
380 mod_counter_found = 1;
381 } else {
382 if (ac->clabel->mod_counter > mod_counter) {
383 mod_counter = ac->clabel->mod_counter;
384 }
385 }
386 ac->flag = 0; /* clear the general purpose flag */
387 ac = ac->next;
388 }
389
390 bs = 0;
391
392 numFailuresThisRow = 0;
393 for (c = 0; c < raidPtr->numCol; c++) {
394 diskPtr = &disks[c];
395
396 /* find this row/col in the autoconfig */
397 #if DEBUG
398 printf("Looking for %d in autoconfig\n",c);
399 #endif
400 ac = auto_config;
401 while(ac!=NULL) {
402 if (ac->clabel==NULL) {
403 /* big-time bad news. */
404 goto fail;
405 }
406 if ((ac->clabel->column == c) &&
407 (ac->clabel->mod_counter == mod_counter)) {
408 /* it's this one... */
409 /* flag it as 'used', so we don't
410 free it later. */
411 ac->flag = 1;
412 #if DEBUG
413 printf("Found: %s at %d\n",
414 ac->devname,c);
415 #endif
416
417 break;
418 }
419 ac=ac->next;
420 }
421
422 if (ac==NULL) {
423 /* we didn't find an exact match with a
424 correct mod_counter above... can we find
425 one with an incorrect mod_counter to use
426 instead? (this one, if we find it, will be
427 marked as failed once the set configures)
428 */
429
430 ac = auto_config;
431 while(ac!=NULL) {
432 if (ac->clabel==NULL) {
433 /* big-time bad news. */
434 goto fail;
435 }
436 if (ac->clabel->column == c) {
437 /* it's this one...
438 flag it as 'used', so we
439 don't free it later. */
440 ac->flag = 1;
441 #if DEBUG
442 printf("Found(low mod_counter): %s at %d\n",
443 ac->devname,c);
444 #endif
445
446 break;
447 }
448 ac=ac->next;
449 }
450 }
451
452
453
454 if (ac!=NULL) {
455 /* Found it. Configure it.. */
456 diskPtr->blockSize = ac->clabel->blockSize;
457 diskPtr->numBlocks = ac->clabel->numBlocks;
458 /* Note: rf_protectedSectors is already
459 factored into numBlocks here */
460 raidPtr->raid_cinfo[c].ci_vp = ac->vp;
461 raidPtr->raid_cinfo[c].ci_dev = ac->dev;
462
463 memcpy(raidget_component_label(raidPtr, c),
464 ac->clabel, sizeof(*ac->clabel));
465 snprintf(diskPtr->devname, sizeof(diskPtr->devname),
466 "/dev/%s", ac->devname);
467
468 /* note the fact that this component was
469 autoconfigured. You'll need this info
470 later. Trust me :) */
471 diskPtr->auto_configured = 1;
472 diskPtr->dev = ac->dev;
473
474 /*
475 * we allow the user to specify that
476 * only a fraction of the disks should
477 * be used this is just for debug: it
478 * speeds up the parity scan
479 */
480
481 diskPtr->numBlocks = diskPtr->numBlocks *
482 rf_sizePercentage / 100;
483
484 /* XXX these will get set multiple times,
485 but since we're autoconfiguring, they'd
486 better be always the same each time!
487 If not, this is the least of your worries */
488
489 bs = diskPtr->blockSize;
490 min_numblks = diskPtr->numBlocks;
491
492 /* this gets done multiple times, but that's
493 fine -- the serial number will be the same
494 for all components, guaranteed */
495 raidPtr->serial_number = ac->clabel->serial_number;
496 /* check the last time the label was modified */
497
498 if (ac->clabel->mod_counter != mod_counter) {
499 /* Even though we've filled in all of
500 the above, we don't trust this
501 component since it's modification
502 counter is not in sync with the
503 rest, and we really consider it to
504 be failed. */
505 disks[c].status = rf_ds_failed;
506 numFailuresThisRow++;
507 } else {
508 if (ac->clabel->clean != RF_RAID_CLEAN) {
509 parity_good = RF_RAID_DIRTY;
510 }
511 }
512 } else {
513 /* Didn't find it at all!! Component must
514 really be dead */
515 disks[c].status = rf_ds_failed;
516 snprintf(disks[c].devname, sizeof(disks[c].devname),
517 "component%d", c);
518 numFailuresThisRow++;
519 }
520 }
521 /* XXX fix for n-fault tolerant */
522 /* XXX this should probably check to see how many failures
523 we can handle for this configuration! */
524 if (numFailuresThisRow > 0) {
525 raidPtr->status = rf_rs_degraded;
526 raidPtr->numFailures = numFailuresThisRow;
527 }
528
529 /* close the device for the ones that didn't get used */
530
531 ac = auto_config;
532 while(ac!=NULL) {
533 if (ac->flag == 0) {
534 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
535 VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
536 vput(ac->vp);
537 ac->vp = NULL;
538 #if DEBUG
539 printf("Released %s from auto-config set.\n",
540 ac->devname);
541 #endif
542 }
543 ac = ac->next;
544 }
545
546 raidPtr->mod_counter = mod_counter;
547
548 /* note the state of the parity, if any */
549 raidPtr->parity_good = parity_good;
550 raidPtr->sectorsPerDisk = min_numblks;
551 raidPtr->logBytesPerSector = ffs(bs) - 1;
552 raidPtr->bytesPerSector = bs;
553 raidPtr->sectorMask = bs - 1;
554 return (0);
555
556 fail:
557
558 rf_UnconfigureVnodes( raidPtr );
559
560 return (ret);
561
562 }
563
564 /* configure a single disk in the array */
565 int
566 rf_ConfigureDisk(RF_Raid_t *raidPtr, char *bf, RF_RaidDisk_t *diskPtr,
567 RF_RowCol_t col)
568 {
569 char *p;
570 struct vnode *vp;
571 struct vattr va;
572 int error;
573
574 p = rf_find_non_white(bf);
575 if (p[strlen(p) - 1] == '\n') {
576 /* strip off the newline */
577 p[strlen(p) - 1] = '\0';
578 }
579 (void) strcpy(diskPtr->devname, p);
580
581 /* Let's start by claiming the component is fine and well... */
582 diskPtr->status = rf_ds_optimal;
583
584 raidPtr->raid_cinfo[col].ci_vp = NULL;
585 raidPtr->raid_cinfo[col].ci_dev = 0;
586
587 if (!strcmp("absent", diskPtr->devname)) {
588 printf("Ignoring missing component at column %d\n", col);
589 sprintf(diskPtr->devname, "component%d", col);
590 diskPtr->status = rf_ds_failed;
591 return (0);
592 }
593
594 error = dk_lookup(diskPtr->devname, curlwp, &vp, UIO_SYSSPACE);
595 if (error) {
596 printf("dk_lookup on device: %s failed!\n", diskPtr->devname);
597 if (error == ENXIO) {
598 /* the component isn't there... must be dead :-( */
599 diskPtr->status = rf_ds_failed;
600 } else {
601 return (error);
602 }
603 }
604 if (diskPtr->status == rf_ds_optimal) {
605
606 if ((error = VOP_GETATTR(vp, &va, curlwp->l_cred)) != 0)
607 return (error);
608 if ((error = rf_getdisksize(vp, curlwp, diskPtr)) != 0)
609 return (error);
610
611 raidPtr->raid_cinfo[col].ci_vp = vp;
612 raidPtr->raid_cinfo[col].ci_dev = va.va_rdev;
613
614 /* This component was not automatically configured */
615 diskPtr->auto_configured = 0;
616 diskPtr->dev = va.va_rdev;
617
618 /* we allow the user to specify that only a fraction of the
619 * disks should be used this is just for debug: it speeds up
620 * the parity scan */
621 diskPtr->numBlocks = diskPtr->numBlocks *
622 rf_sizePercentage / 100;
623 }
624 return (0);
625 }
626
627 static void
628 rf_print_label_status(RF_Raid_t *raidPtr, int column, char *dev_name,
629 RF_ComponentLabel_t *ci_label)
630 {
631
632 printf("raid%d: Component %s being configured at col: %d\n",
633 raidPtr->raidid, dev_name, column );
634 printf(" Column: %d Num Columns: %d\n",
635 ci_label->column,
636 ci_label->num_columns);
637 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
638 ci_label->version, ci_label->serial_number,
639 ci_label->mod_counter);
640 printf(" Clean: %s Status: %d\n",
641 ci_label->clean ? "Yes" : "No", ci_label->status );
642 }
643
644 static int rf_check_label_vitals(RF_Raid_t *raidPtr, int row, int column,
645 char *dev_name, RF_ComponentLabel_t *ci_label,
646 int serial_number, int mod_counter)
647 {
648 int fatal_error = 0;
649
650 if (serial_number != ci_label->serial_number) {
651 printf("%s has a different serial number: %d %d\n",
652 dev_name, serial_number, ci_label->serial_number);
653 fatal_error = 1;
654 }
655 if (mod_counter != ci_label->mod_counter) {
656 printf("%s has a different modification count: %d %d\n",
657 dev_name, mod_counter, ci_label->mod_counter);
658 }
659
660 if (row != ci_label->row) {
661 printf("Row out of alignment for: %s\n", dev_name);
662 fatal_error = 1;
663 }
664 if (column != ci_label->column) {
665 printf("Column out of alignment for: %s\n", dev_name);
666 fatal_error = 1;
667 }
668 if (raidPtr->numCol != ci_label->num_columns) {
669 printf("Number of columns do not match for: %s\n", dev_name);
670 fatal_error = 1;
671 }
672 if (ci_label->clean == 0) {
673 /* it's not clean, but that's not fatal */
674 printf("%s is not clean!\n", dev_name);
675 }
676 return(fatal_error);
677 }
678
679
680 /*
681
682 rf_CheckLabels() - check all the component labels for consistency.
683 Return an error if there is anything major amiss.
684
685 */
686
687 int
688 rf_CheckLabels(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
689 {
690 int c;
691 char *dev_name;
692 RF_ComponentLabel_t *ci_label;
693 int serial_number = 0;
694 int mod_number = 0;
695 int fatal_error = 0;
696 int mod_values[4];
697 int mod_count[4];
698 int ser_values[4];
699 int ser_count[4];
700 int num_ser;
701 int num_mod;
702 int i;
703 int found;
704 int hosed_column;
705 int too_fatal;
706 int parity_good;
707 int force;
708
709 hosed_column = -1;
710 too_fatal = 0;
711 force = cfgPtr->force;
712
713 /*
714 We're going to try to be a little intelligent here. If one
715 component's label is bogus, and we can identify that it's the
716 *only* one that's gone, we'll mark it as "failed" and allow
717 the configuration to proceed. This will be the *only* case
718 that we'll proceed if there would be (otherwise) fatal errors.
719
720 Basically we simply keep a count of how many components had
721 what serial number. If all but one agree, we simply mark
722 the disagreeing component as being failed, and allow
723 things to come up "normally".
724
725 We do this first for serial numbers, and then for "mod_counter".
726
727 */
728
729 num_ser = 0;
730 num_mod = 0;
731
732 for (c = 0; c < raidPtr->numCol; c++) {
733 ci_label = raidget_component_label(raidPtr, c);
734 found=0;
735 for(i=0;i<num_ser;i++) {
736 if (ser_values[i] == ci_label->serial_number) {
737 ser_count[i]++;
738 found=1;
739 break;
740 }
741 }
742 if (!found) {
743 ser_values[num_ser] = ci_label->serial_number;
744 ser_count[num_ser] = 1;
745 num_ser++;
746 if (num_ser>2) {
747 fatal_error = 1;
748 break;
749 }
750 }
751 found=0;
752 for(i=0;i<num_mod;i++) {
753 if (mod_values[i] == ci_label->mod_counter) {
754 mod_count[i]++;
755 found=1;
756 break;
757 }
758 }
759 if (!found) {
760 mod_values[num_mod] = ci_label->mod_counter;
761 mod_count[num_mod] = 1;
762 num_mod++;
763 if (num_mod>2) {
764 fatal_error = 1;
765 break;
766 }
767 }
768 }
769 #if DEBUG
770 printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
771 for(i=0;i<num_ser;i++) {
772 printf("%d %d\n", ser_values[i], ser_count[i]);
773 }
774 printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
775 for(i=0;i<num_mod;i++) {
776 printf("%d %d\n", mod_values[i], mod_count[i]);
777 }
778 #endif
779 serial_number = ser_values[0];
780 if (num_ser == 2) {
781 if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
782 /* Locate the maverick component */
783 if (ser_count[1] > ser_count[0]) {
784 serial_number = ser_values[1];
785 }
786
787 for (c = 0; c < raidPtr->numCol; c++) {
788 ci_label = raidget_component_label(raidPtr, c);
789 if (serial_number != ci_label->serial_number) {
790 hosed_column = c;
791 break;
792 }
793 }
794 printf("Hosed component: %s\n",
795 &cfgPtr->devnames[0][hosed_column][0]);
796 if (!force) {
797 /* we'll fail this component, as if there are
798 other major errors, we arn't forcing things
799 and we'll abort the config anyways */
800 raidPtr->Disks[hosed_column].status
801 = rf_ds_failed;
802 raidPtr->numFailures++;
803 raidPtr->status = rf_rs_degraded;
804 }
805 } else {
806 too_fatal = 1;
807 }
808 if (cfgPtr->parityConfig == '0') {
809 /* We've identified two different serial numbers.
810 RAID 0 can't cope with that, so we'll punt */
811 too_fatal = 1;
812 }
813
814 }
815
816 /* record the serial number for later. If we bail later, setting
817 this doesn't matter, otherwise we've got the best guess at the
818 correct serial number */
819 raidPtr->serial_number = serial_number;
820
821 mod_number = mod_values[0];
822 if (num_mod == 2) {
823 if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
824 /* Locate the maverick component */
825 if (mod_count[1] > mod_count[0]) {
826 mod_number = mod_values[1];
827 } else if (mod_count[1] < mod_count[0]) {
828 mod_number = mod_values[0];
829 } else {
830 /* counts of different modification values
831 are the same. Assume greater value is
832 the correct one, all other things
833 considered */
834 if (mod_values[0] > mod_values[1]) {
835 mod_number = mod_values[0];
836 } else {
837 mod_number = mod_values[1];
838 }
839
840 }
841
842 for (c = 0; c < raidPtr->numCol; c++) {
843 ci_label = raidget_component_label(raidPtr, c);
844 if (mod_number != ci_label->mod_counter) {
845 if (hosed_column == c) {
846 /* same one. Can
847 deal with it. */
848 } else {
849 hosed_column = c;
850 if (num_ser != 1) {
851 too_fatal = 1;
852 break;
853 }
854 }
855 }
856 }
857 printf("Hosed component: %s\n",
858 &cfgPtr->devnames[0][hosed_column][0]);
859 if (!force) {
860 /* we'll fail this component, as if there are
861 other major errors, we arn't forcing things
862 and we'll abort the config anyways */
863 if (raidPtr->Disks[hosed_column].status != rf_ds_failed) {
864 raidPtr->Disks[hosed_column].status
865 = rf_ds_failed;
866 raidPtr->numFailures++;
867 raidPtr->status = rf_rs_degraded;
868 }
869 }
870 } else {
871 too_fatal = 1;
872 }
873 if (cfgPtr->parityConfig == '0') {
874 /* We've identified two different mod counters.
875 RAID 0 can't cope with that, so we'll punt */
876 too_fatal = 1;
877 }
878 }
879
880 raidPtr->mod_counter = mod_number;
881
882 if (too_fatal) {
883 /* we've had both a serial number mismatch, and a mod_counter
884 mismatch -- and they involved two different components!!
885 Bail -- make things fail so that the user must force
886 the issue... */
887 hosed_column = -1;
888 fatal_error = 1;
889 }
890
891 if (num_ser > 2) {
892 printf("raid%d: Too many different serial numbers!\n",
893 raidPtr->raidid);
894 fatal_error = 1;
895 }
896
897 if (num_mod > 2) {
898 printf("raid%d: Too many different mod counters!\n",
899 raidPtr->raidid);
900 fatal_error = 1;
901 }
902
903 /* we start by assuming the parity will be good, and flee from
904 that notion at the slightest sign of trouble */
905
906 parity_good = RF_RAID_CLEAN;
907
908 for (c = 0; c < raidPtr->numCol; c++) {
909 dev_name = &cfgPtr->devnames[0][c][0];
910 ci_label = raidget_component_label(raidPtr, c);
911
912 if (c == hosed_column) {
913 printf("raid%d: Ignoring %s\n",
914 raidPtr->raidid, dev_name);
915 } else {
916 rf_print_label_status( raidPtr, c, dev_name, ci_label);
917 if (rf_check_label_vitals( raidPtr, 0, c,
918 dev_name, ci_label,
919 serial_number,
920 mod_number )) {
921 fatal_error = 1;
922 }
923 if (ci_label->clean != RF_RAID_CLEAN) {
924 parity_good = RF_RAID_DIRTY;
925 }
926 }
927 }
928
929 if (fatal_error) {
930 parity_good = RF_RAID_DIRTY;
931 }
932
933 /* we note the state of the parity */
934 raidPtr->parity_good = parity_good;
935
936 return(fatal_error);
937 }
938
939 int
940 rf_add_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
941 {
942 RF_RaidDisk_t *disks;
943 RF_DiskQueue_t *spareQueues;
944 int ret;
945 unsigned int bs;
946 int spare_number;
947
948 ret=0;
949
950 if (raidPtr->numSpare >= RF_MAXSPARE) {
951 RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
952 return(EINVAL);
953 }
954
955 RF_LOCK_MUTEX(raidPtr->mutex);
956 while (raidPtr->adding_hot_spare==1) {
957 ltsleep(&(raidPtr->adding_hot_spare), PRIBIO, "raidhs", 0,
958 &(raidPtr->mutex));
959 }
960 raidPtr->adding_hot_spare=1;
961 RF_UNLOCK_MUTEX(raidPtr->mutex);
962
963 /* the beginning of the spares... */
964 disks = &raidPtr->Disks[raidPtr->numCol];
965
966 spare_number = raidPtr->numSpare;
967
968 ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
969 &disks[spare_number],
970 raidPtr->numCol + spare_number);
971
972 if (ret)
973 goto fail;
974 if (disks[spare_number].status != rf_ds_optimal) {
975 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
976 sparePtr->component_name);
977 rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
978 ret=EINVAL;
979 goto fail;
980 } else {
981 disks[spare_number].status = rf_ds_spare;
982 DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n",
983 spare_number,
984 disks[spare_number].devname,
985 disks[spare_number].numBlocks,
986 disks[spare_number].blockSize,
987 (long int) disks[spare_number].numBlocks *
988 disks[spare_number].blockSize / 1024 / 1024);
989 }
990
991
992 /* check sizes and block sizes on the spare disk */
993 bs = 1 << raidPtr->logBytesPerSector;
994 if (disks[spare_number].blockSize != bs) {
995 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
996 rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
997 ret = EINVAL;
998 goto fail;
999 }
1000 if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
1001 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n",
1002 disks[spare_number].devname,
1003 disks[spare_number].blockSize,
1004 raidPtr->sectorsPerDisk);
1005 rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
1006 ret = EINVAL;
1007 goto fail;
1008 } else {
1009 if (disks[spare_number].numBlocks >
1010 raidPtr->sectorsPerDisk) {
1011 RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n",
1012 disks[spare_number].devname,
1013 raidPtr->sectorsPerDisk,
1014 disks[spare_number].numBlocks);
1015
1016 disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
1017 }
1018 }
1019
1020 spareQueues = &raidPtr->Queues[raidPtr->numCol];
1021 ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
1022 raidPtr->numCol + spare_number,
1023 raidPtr->qType,
1024 raidPtr->sectorsPerDisk,
1025 raidPtr->Disks[raidPtr->numCol +
1026 spare_number].dev,
1027 raidPtr->maxOutstanding,
1028 &raidPtr->shutdownList,
1029 raidPtr->cleanupList);
1030
1031 RF_LOCK_MUTEX(raidPtr->mutex);
1032 raidPtr->numSpare++;
1033 RF_UNLOCK_MUTEX(raidPtr->mutex);
1034
1035 fail:
1036 RF_LOCK_MUTEX(raidPtr->mutex);
1037 raidPtr->adding_hot_spare=0;
1038 wakeup(&(raidPtr->adding_hot_spare));
1039 RF_UNLOCK_MUTEX(raidPtr->mutex);
1040
1041 return(ret);
1042 }
1043
1044 int
1045 rf_remove_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
1046 {
1047 int spare_number;
1048
1049
1050 if (raidPtr->numSpare==0) {
1051 printf("No spares to remove!\n");
1052 return(EINVAL);
1053 }
1054
1055 spare_number = sparePtr->column;
1056
1057 return(EINVAL); /* XXX not implemented yet */
1058 #if 0
1059 if (spare_number < 0 || spare_number > raidPtr->numSpare) {
1060 return(EINVAL);
1061 }
1062
1063 /* verify that this spare isn't in use... */
1064
1065
1066
1067
1068 /* it's gone.. */
1069
1070 raidPtr->numSpare--;
1071
1072 return(0);
1073 #endif
1074 }
1075
1076
1077 int
1078 rf_delete_component(RF_Raid_t *raidPtr, RF_SingleComponent_t *component)
1079 {
1080 RF_RaidDisk_t *disks;
1081
1082 if ((component->column < 0) ||
1083 (component->column >= raidPtr->numCol)) {
1084 return(EINVAL);
1085 }
1086
1087 disks = &raidPtr->Disks[component->column];
1088
1089 /* 1. This component must be marked as 'failed' */
1090
1091 return(EINVAL); /* Not implemented yet. */
1092 }
1093
1094 int
1095 rf_incorporate_hot_spare(RF_Raid_t *raidPtr,
1096 RF_SingleComponent_t *component)
1097 {
1098
1099 /* Issues here include how to 'move' this in if there is IO
1100 taking place (e.g. component queues and such) */
1101
1102 return(EINVAL); /* Not implemented yet. */
1103 }
1104