rf_disks.c revision 1.72.2.2 1 /* $NetBSD: rf_disks.c,v 1.72.2.2 2010/11/06 08:08:32 uebayasi Exp $ */
2 /*-
3 * Copyright (c) 1999 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 /*
32 * Copyright (c) 1995 Carnegie-Mellon University.
33 * All rights reserved.
34 *
35 * Author: Mark Holland
36 *
37 * Permission to use, copy, modify and distribute this software and
38 * its documentation is hereby granted, provided that both the copyright
39 * notice and this permission notice appear in all copies of the
40 * software, derivative works or modified versions, and any portions
41 * thereof, and that both notices appear in supporting documentation.
42 *
43 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
44 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
45 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 *
47 * Carnegie Mellon requests users of this software to return to
48 *
49 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
50 * School of Computer Science
51 * Carnegie Mellon University
52 * Pittsburgh PA 15213-3890
53 *
54 * any improvements or extensions that they make and grant Carnegie the
55 * rights to redistribute these changes.
56 */
57
58 /***************************************************************
59 * rf_disks.c -- code to perform operations on the actual disks
60 ***************************************************************/
61
62 #include <sys/cdefs.h>
63 __KERNEL_RCSID(0, "$NetBSD: rf_disks.c,v 1.72.2.2 2010/11/06 08:08:32 uebayasi Exp $");
64
65 #include <dev/raidframe/raidframevar.h>
66
67 #include "rf_raid.h"
68 #include "rf_alloclist.h"
69 #include "rf_utils.h"
70 #include "rf_general.h"
71 #include "rf_options.h"
72 #include "rf_kintf.h"
73 #include "rf_netbsd.h"
74
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/proc.h>
78 #include <sys/ioctl.h>
79 #include <sys/fcntl.h>
80 #include <sys/vnode.h>
81 #include <sys/kauth.h>
82
83 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
84 static void rf_print_label_status( RF_Raid_t *, int, char *,
85 RF_ComponentLabel_t *);
86 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
87 RF_ComponentLabel_t *, int, int );
88
89 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
90 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
91
92 /**************************************************************************
93 *
94 * initialize the disks comprising the array
95 *
96 * We want the spare disks to have regular row,col numbers so that we can
97 * easily substitue a spare for a failed disk. But, the driver code assumes
98 * throughout that the array contains numRow by numCol _non-spare_ disks, so
99 * it's not clear how to fit in the spares. This is an unfortunate holdover
100 * from raidSim. The quick and dirty fix is to make row zero bigger than the
101 * rest, and put all the spares in it. This probably needs to get changed
102 * eventually.
103 *
104 **************************************************************************/
105
106 int
107 rf_ConfigureDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
108 RF_Config_t *cfgPtr)
109 {
110 RF_RaidDisk_t *disks;
111 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
112 RF_RowCol_t c;
113 int bs, ret;
114 unsigned i, count, foundone = 0, numFailuresThisRow;
115 int force;
116
117 force = cfgPtr->force;
118
119 ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
120 if (ret)
121 goto fail;
122
123 disks = raidPtr->Disks;
124
125 numFailuresThisRow = 0;
126 for (c = 0; c < raidPtr->numCol; c++) {
127 ret = rf_ConfigureDisk(raidPtr,
128 &cfgPtr->devnames[0][c][0],
129 &disks[c], c);
130
131 if (ret)
132 goto fail;
133
134 if (disks[c].status == rf_ds_optimal) {
135 ret = raidfetch_component_label(raidPtr, c);
136 if (ret)
137 goto fail;
138 }
139
140 if (disks[c].status != rf_ds_optimal) {
141 numFailuresThisRow++;
142 } else {
143 if (disks[c].numBlocks < min_numblks)
144 min_numblks = disks[c].numBlocks;
145 DPRINTF6("Disk at col %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n",
146 c, disks[c].devname,
147 disks[c].numBlocks,
148 disks[c].blockSize,
149 (long int) disks[c].numBlocks *
150 disks[c].blockSize / 1024 / 1024);
151 }
152 }
153 /* XXX fix for n-fault tolerant */
154 /* XXX this should probably check to see how many failures
155 we can handle for this configuration! */
156 if (numFailuresThisRow > 0)
157 raidPtr->status = rf_rs_degraded;
158
159 /* all disks must be the same size & have the same block size, bs must
160 * be a power of 2 */
161 bs = 0;
162 foundone = 0;
163 for (c = 0; c < raidPtr->numCol; c++) {
164 if (disks[c].status == rf_ds_optimal) {
165 bs = disks[c].blockSize;
166 foundone = 1;
167 break;
168 }
169 }
170 if (!foundone) {
171 RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
172 ret = EINVAL;
173 goto fail;
174 }
175 for (count = 0, i = 1; i; i <<= 1)
176 if (bs & i)
177 count++;
178 if (count != 1) {
179 RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
180 ret = EINVAL;
181 goto fail;
182 }
183
184 if (rf_CheckLabels( raidPtr, cfgPtr )) {
185 printf("raid%d: There were fatal errors\n", raidPtr->raidid);
186 if (force != 0) {
187 printf("raid%d: Fatal errors being ignored.\n",
188 raidPtr->raidid);
189 } else {
190 ret = EINVAL;
191 goto fail;
192 }
193 }
194
195 for (c = 0; c < raidPtr->numCol; c++) {
196 if (disks[c].status == rf_ds_optimal) {
197 if (disks[c].blockSize != bs) {
198 RF_ERRORMSG1("Error: block size of disk at c %d different from disk at c 0\n", c);
199 ret = EINVAL;
200 goto fail;
201 }
202 if (disks[c].numBlocks != min_numblks) {
203 RF_ERRORMSG2("WARNING: truncating disk at c %d to %d blocks\n",
204 c, (int) min_numblks);
205 disks[c].numBlocks = min_numblks;
206 }
207 }
208 }
209
210 raidPtr->sectorsPerDisk = min_numblks;
211 raidPtr->logBytesPerSector = ffs(bs) - 1;
212 raidPtr->bytesPerSector = bs;
213 raidPtr->sectorMask = bs - 1;
214 return (0);
215
216 fail:
217
218 rf_UnconfigureVnodes( raidPtr );
219
220 return (ret);
221 }
222
223
224 /****************************************************************************
225 * set up the data structures describing the spare disks in the array
226 * recall from the above comment that the spare disk descriptors are stored
227 * in row zero, which is specially expanded to hold them.
228 ****************************************************************************/
229 int
230 rf_ConfigureSpareDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
231 RF_Config_t *cfgPtr)
232 {
233 int i, ret;
234 unsigned int bs;
235 RF_RaidDisk_t *disks;
236 int num_spares_done;
237
238 num_spares_done = 0;
239
240 /* The space for the spares should have already been allocated by
241 * ConfigureDisks() */
242
243 disks = &raidPtr->Disks[raidPtr->numCol];
244 for (i = 0; i < raidPtr->numSpare; i++) {
245 ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
246 &disks[i], raidPtr->numCol + i);
247 if (ret)
248 goto fail;
249 if (disks[i].status != rf_ds_optimal) {
250 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
251 &cfgPtr->spare_names[i][0]);
252 } else {
253 disks[i].status = rf_ds_spare; /* change status to
254 * spare */
255 DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n", i,
256 disks[i].devname,
257 disks[i].numBlocks, disks[i].blockSize,
258 (long int) disks[i].numBlocks *
259 disks[i].blockSize / 1024 / 1024);
260 }
261 num_spares_done++;
262 }
263
264 /* check sizes and block sizes on spare disks */
265 bs = 1 << raidPtr->logBytesPerSector;
266 for (i = 0; i < raidPtr->numSpare; i++) {
267 if (disks[i].blockSize != bs) {
268 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
269 ret = EINVAL;
270 goto fail;
271 }
272 if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
273 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n",
274 disks[i].devname, disks[i].blockSize,
275 raidPtr->sectorsPerDisk);
276 ret = EINVAL;
277 goto fail;
278 } else
279 if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
280 RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n",
281 disks[i].devname,
282 raidPtr->sectorsPerDisk,
283 disks[i].numBlocks);
284
285 disks[i].numBlocks = raidPtr->sectorsPerDisk;
286 }
287 }
288
289 return (0);
290
291 fail:
292
293 /* Release the hold on the main components. We've failed to allocate
294 * a spare, and since we're failing, we need to free things..
295
296 XXX failing to allocate a spare is *not* that big of a deal...
297 We *can* survive without it, if need be, esp. if we get hot
298 adding working.
299
300 If we don't fail out here, then we need a way to remove this spare...
301 that should be easier to do here than if we are "live"...
302
303 */
304
305 rf_UnconfigureVnodes( raidPtr );
306
307 return (ret);
308 }
309
310 static int
311 rf_AllocDiskStructures(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
312 {
313 int ret;
314
315 /* We allocate RF_MAXSPARE on the first row so that we
316 have room to do hot-swapping of spares */
317 RF_MallocAndAdd(raidPtr->Disks, (raidPtr->numCol + RF_MAXSPARE) *
318 sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
319 raidPtr->cleanupList);
320 if (raidPtr->Disks == NULL) {
321 ret = ENOMEM;
322 goto fail;
323 }
324
325 /* get space for device specific stuff.. */
326 RF_MallocAndAdd(raidPtr->raid_cinfo,
327 (raidPtr->numCol + RF_MAXSPARE) *
328 sizeof(struct raidcinfo), (struct raidcinfo *),
329 raidPtr->cleanupList);
330
331 if (raidPtr->raid_cinfo == NULL) {
332 ret = ENOMEM;
333 goto fail;
334 }
335
336 return(0);
337 fail:
338 rf_UnconfigureVnodes( raidPtr );
339
340 return(ret);
341 }
342
343
344 /* configure a single disk during auto-configuration at boot */
345 int
346 rf_AutoConfigureDisks(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr,
347 RF_AutoConfig_t *auto_config)
348 {
349 RF_RaidDisk_t *disks;
350 RF_RaidDisk_t *diskPtr;
351 RF_RowCol_t c;
352 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
353 int bs, ret;
354 int numFailuresThisRow;
355 RF_AutoConfig_t *ac;
356 int parity_good;
357 int mod_counter;
358 int mod_counter_found;
359
360 #if DEBUG
361 printf("Starting autoconfiguration of RAID set...\n");
362 #endif
363
364 ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
365 if (ret)
366 goto fail;
367
368 disks = raidPtr->Disks;
369
370 /* assume the parity will be fine.. */
371 parity_good = RF_RAID_CLEAN;
372
373 /* Check for mod_counters that are too low */
374 mod_counter_found = 0;
375 mod_counter = 0;
376 ac = auto_config;
377 while(ac!=NULL) {
378 if (mod_counter_found==0) {
379 mod_counter = ac->clabel->mod_counter;
380 mod_counter_found = 1;
381 } else {
382 if (ac->clabel->mod_counter > mod_counter) {
383 mod_counter = ac->clabel->mod_counter;
384 }
385 }
386 ac->flag = 0; /* clear the general purpose flag */
387 ac = ac->next;
388 }
389
390 bs = 0;
391
392 numFailuresThisRow = 0;
393 for (c = 0; c < raidPtr->numCol; c++) {
394 diskPtr = &disks[c];
395
396 /* find this row/col in the autoconfig */
397 #if DEBUG
398 printf("Looking for %d in autoconfig\n",c);
399 #endif
400 ac = auto_config;
401 while(ac!=NULL) {
402 if (ac->clabel==NULL) {
403 /* big-time bad news. */
404 goto fail;
405 }
406 if ((ac->clabel->column == c) &&
407 (ac->clabel->mod_counter == mod_counter)) {
408 /* it's this one... */
409 /* flag it as 'used', so we don't
410 free it later. */
411 ac->flag = 1;
412 #if DEBUG
413 printf("Found: %s at %d\n",
414 ac->devname,c);
415 #endif
416
417 break;
418 }
419 ac=ac->next;
420 }
421
422 if (ac==NULL) {
423 /* we didn't find an exact match with a
424 correct mod_counter above... can we find
425 one with an incorrect mod_counter to use
426 instead? (this one, if we find it, will be
427 marked as failed once the set configures)
428 */
429
430 ac = auto_config;
431 while(ac!=NULL) {
432 if (ac->clabel==NULL) {
433 /* big-time bad news. */
434 goto fail;
435 }
436 if (ac->clabel->column == c) {
437 /* it's this one...
438 flag it as 'used', so we
439 don't free it later. */
440 ac->flag = 1;
441 #if DEBUG
442 printf("Found(low mod_counter): %s at %d\n",
443 ac->devname,c);
444 #endif
445
446 break;
447 }
448 ac=ac->next;
449 }
450 }
451
452
453
454 if (ac!=NULL) {
455 /* Found it. Configure it.. */
456 diskPtr->blockSize = ac->clabel->blockSize;
457 diskPtr->numBlocks = ac->clabel->numBlocks;
458 diskPtr->numBlocks |=
459 (uint64_t)ac->clabel->numBlocksHi << 32;
460 /* Note: rf_protectedSectors is already
461 factored into numBlocks here */
462 raidPtr->raid_cinfo[c].ci_vp = ac->vp;
463 raidPtr->raid_cinfo[c].ci_dev = ac->dev;
464
465 memcpy(raidget_component_label(raidPtr, c),
466 ac->clabel, sizeof(*ac->clabel));
467 snprintf(diskPtr->devname, sizeof(diskPtr->devname),
468 "/dev/%s", ac->devname);
469
470 /* note the fact that this component was
471 autoconfigured. You'll need this info
472 later. Trust me :) */
473 diskPtr->auto_configured = 1;
474 diskPtr->dev = ac->dev;
475
476 /*
477 * we allow the user to specify that
478 * only a fraction of the disks should
479 * be used this is just for debug: it
480 * speeds up the parity scan
481 */
482
483 diskPtr->numBlocks = diskPtr->numBlocks *
484 rf_sizePercentage / 100;
485
486 /* XXX these will get set multiple times,
487 but since we're autoconfiguring, they'd
488 better be always the same each time!
489 If not, this is the least of your worries */
490
491 bs = diskPtr->blockSize;
492 min_numblks = diskPtr->numBlocks;
493
494 /* this gets done multiple times, but that's
495 fine -- the serial number will be the same
496 for all components, guaranteed */
497 raidPtr->serial_number = ac->clabel->serial_number;
498 /* check the last time the label was modified */
499
500 if (ac->clabel->mod_counter != mod_counter) {
501 /* Even though we've filled in all of
502 the above, we don't trust this
503 component since it's modification
504 counter is not in sync with the
505 rest, and we really consider it to
506 be failed. */
507 disks[c].status = rf_ds_failed;
508 numFailuresThisRow++;
509 } else {
510 if (ac->clabel->clean != RF_RAID_CLEAN) {
511 parity_good = RF_RAID_DIRTY;
512 }
513 }
514 } else {
515 /* Didn't find it at all!! Component must
516 really be dead */
517 disks[c].status = rf_ds_failed;
518 snprintf(disks[c].devname, sizeof(disks[c].devname),
519 "component%d", c);
520 numFailuresThisRow++;
521 }
522 }
523 /* XXX fix for n-fault tolerant */
524 /* XXX this should probably check to see how many failures
525 we can handle for this configuration! */
526 if (numFailuresThisRow > 0) {
527 raidPtr->status = rf_rs_degraded;
528 raidPtr->numFailures = numFailuresThisRow;
529 }
530
531 /* close the device for the ones that didn't get used */
532
533 ac = auto_config;
534 while(ac!=NULL) {
535 if (ac->flag == 0) {
536 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
537 VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
538 vput(ac->vp);
539 ac->vp = NULL;
540 #if DEBUG
541 printf("Released %s from auto-config set.\n",
542 ac->devname);
543 #endif
544 }
545 ac = ac->next;
546 }
547
548 raidPtr->mod_counter = mod_counter;
549
550 /* note the state of the parity, if any */
551 raidPtr->parity_good = parity_good;
552 raidPtr->sectorsPerDisk = min_numblks;
553 raidPtr->logBytesPerSector = ffs(bs) - 1;
554 raidPtr->bytesPerSector = bs;
555 raidPtr->sectorMask = bs - 1;
556 return (0);
557
558 fail:
559
560 rf_UnconfigureVnodes( raidPtr );
561
562 return (ret);
563
564 }
565
566 /* configure a single disk in the array */
567 int
568 rf_ConfigureDisk(RF_Raid_t *raidPtr, char *bf, RF_RaidDisk_t *diskPtr,
569 RF_RowCol_t col)
570 {
571 char *p;
572 struct vnode *vp;
573 struct vattr va;
574 int error;
575
576 p = rf_find_non_white(bf);
577 if (p[strlen(p) - 1] == '\n') {
578 /* strip off the newline */
579 p[strlen(p) - 1] = '\0';
580 }
581 (void) strcpy(diskPtr->devname, p);
582
583 /* Let's start by claiming the component is fine and well... */
584 diskPtr->status = rf_ds_optimal;
585
586 raidPtr->raid_cinfo[col].ci_vp = NULL;
587 raidPtr->raid_cinfo[col].ci_dev = 0;
588
589 if (!strcmp("absent", diskPtr->devname)) {
590 printf("Ignoring missing component at column %d\n", col);
591 sprintf(diskPtr->devname, "component%d", col);
592 diskPtr->status = rf_ds_failed;
593 return (0);
594 }
595
596 error = dk_lookup(diskPtr->devname, curlwp, &vp, UIO_SYSSPACE);
597 if (error) {
598 printf("dk_lookup on device: %s failed!\n", diskPtr->devname);
599 if (error == ENXIO) {
600 /* the component isn't there... must be dead :-( */
601 diskPtr->status = rf_ds_failed;
602 } else {
603 return (error);
604 }
605 }
606 if (diskPtr->status == rf_ds_optimal) {
607
608 if ((error = VOP_GETATTR(vp, &va, curlwp->l_cred)) != 0)
609 return (error);
610 if ((error = rf_getdisksize(vp, curlwp, diskPtr)) != 0)
611 return (error);
612
613 raidPtr->raid_cinfo[col].ci_vp = vp;
614 raidPtr->raid_cinfo[col].ci_dev = va.va_rdev;
615
616 /* This component was not automatically configured */
617 diskPtr->auto_configured = 0;
618 diskPtr->dev = va.va_rdev;
619
620 /* we allow the user to specify that only a fraction of the
621 * disks should be used this is just for debug: it speeds up
622 * the parity scan */
623 diskPtr->numBlocks = diskPtr->numBlocks *
624 rf_sizePercentage / 100;
625 }
626 return (0);
627 }
628
629 static void
630 rf_print_label_status(RF_Raid_t *raidPtr, int column, char *dev_name,
631 RF_ComponentLabel_t *ci_label)
632 {
633
634 printf("raid%d: Component %s being configured at col: %d\n",
635 raidPtr->raidid, dev_name, column );
636 printf(" Column: %d Num Columns: %d\n",
637 ci_label->column,
638 ci_label->num_columns);
639 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
640 ci_label->version, ci_label->serial_number,
641 ci_label->mod_counter);
642 printf(" Clean: %s Status: %d\n",
643 ci_label->clean ? "Yes" : "No", ci_label->status );
644 }
645
646 static int rf_check_label_vitals(RF_Raid_t *raidPtr, int row, int column,
647 char *dev_name, RF_ComponentLabel_t *ci_label,
648 int serial_number, int mod_counter)
649 {
650 int fatal_error = 0;
651
652 if (serial_number != ci_label->serial_number) {
653 printf("%s has a different serial number: %d %d\n",
654 dev_name, serial_number, ci_label->serial_number);
655 fatal_error = 1;
656 }
657 if (mod_counter != ci_label->mod_counter) {
658 printf("%s has a different modification count: %d %d\n",
659 dev_name, mod_counter, ci_label->mod_counter);
660 }
661
662 if (row != ci_label->row) {
663 printf("Row out of alignment for: %s\n", dev_name);
664 fatal_error = 1;
665 }
666 if (column != ci_label->column) {
667 printf("Column out of alignment for: %s\n", dev_name);
668 fatal_error = 1;
669 }
670 if (raidPtr->numCol != ci_label->num_columns) {
671 printf("Number of columns do not match for: %s\n", dev_name);
672 fatal_error = 1;
673 }
674 if (ci_label->clean == 0) {
675 /* it's not clean, but that's not fatal */
676 printf("%s is not clean!\n", dev_name);
677 }
678 return(fatal_error);
679 }
680
681
682 /*
683
684 rf_CheckLabels() - check all the component labels for consistency.
685 Return an error if there is anything major amiss.
686
687 */
688
689 int
690 rf_CheckLabels(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
691 {
692 int c;
693 char *dev_name;
694 RF_ComponentLabel_t *ci_label;
695 int serial_number = 0;
696 int mod_number = 0;
697 int fatal_error = 0;
698 int mod_values[4];
699 int mod_count[4];
700 int ser_values[4];
701 int ser_count[4];
702 int num_ser;
703 int num_mod;
704 int i;
705 int found;
706 int hosed_column;
707 int too_fatal;
708 int parity_good;
709 int force;
710
711 hosed_column = -1;
712 too_fatal = 0;
713 force = cfgPtr->force;
714
715 /*
716 We're going to try to be a little intelligent here. If one
717 component's label is bogus, and we can identify that it's the
718 *only* one that's gone, we'll mark it as "failed" and allow
719 the configuration to proceed. This will be the *only* case
720 that we'll proceed if there would be (otherwise) fatal errors.
721
722 Basically we simply keep a count of how many components had
723 what serial number. If all but one agree, we simply mark
724 the disagreeing component as being failed, and allow
725 things to come up "normally".
726
727 We do this first for serial numbers, and then for "mod_counter".
728
729 */
730
731 num_ser = 0;
732 num_mod = 0;
733
734 for (c = 0; c < raidPtr->numCol; c++) {
735 ci_label = raidget_component_label(raidPtr, c);
736 found=0;
737 for(i=0;i<num_ser;i++) {
738 if (ser_values[i] == ci_label->serial_number) {
739 ser_count[i]++;
740 found=1;
741 break;
742 }
743 }
744 if (!found) {
745 ser_values[num_ser] = ci_label->serial_number;
746 ser_count[num_ser] = 1;
747 num_ser++;
748 if (num_ser>2) {
749 fatal_error = 1;
750 break;
751 }
752 }
753 found=0;
754 for(i=0;i<num_mod;i++) {
755 if (mod_values[i] == ci_label->mod_counter) {
756 mod_count[i]++;
757 found=1;
758 break;
759 }
760 }
761 if (!found) {
762 mod_values[num_mod] = ci_label->mod_counter;
763 mod_count[num_mod] = 1;
764 num_mod++;
765 if (num_mod>2) {
766 fatal_error = 1;
767 break;
768 }
769 }
770 }
771 #if DEBUG
772 printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
773 for(i=0;i<num_ser;i++) {
774 printf("%d %d\n", ser_values[i], ser_count[i]);
775 }
776 printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
777 for(i=0;i<num_mod;i++) {
778 printf("%d %d\n", mod_values[i], mod_count[i]);
779 }
780 #endif
781 serial_number = ser_values[0];
782 if (num_ser == 2) {
783 if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
784 /* Locate the maverick component */
785 if (ser_count[1] > ser_count[0]) {
786 serial_number = ser_values[1];
787 }
788
789 for (c = 0; c < raidPtr->numCol; c++) {
790 ci_label = raidget_component_label(raidPtr, c);
791 if (serial_number != ci_label->serial_number) {
792 hosed_column = c;
793 break;
794 }
795 }
796 printf("Hosed component: %s\n",
797 &cfgPtr->devnames[0][hosed_column][0]);
798 if (!force) {
799 /* we'll fail this component, as if there are
800 other major errors, we arn't forcing things
801 and we'll abort the config anyways */
802 raidPtr->Disks[hosed_column].status
803 = rf_ds_failed;
804 raidPtr->numFailures++;
805 raidPtr->status = rf_rs_degraded;
806 }
807 } else {
808 too_fatal = 1;
809 }
810 if (cfgPtr->parityConfig == '0') {
811 /* We've identified two different serial numbers.
812 RAID 0 can't cope with that, so we'll punt */
813 too_fatal = 1;
814 }
815
816 }
817
818 /* record the serial number for later. If we bail later, setting
819 this doesn't matter, otherwise we've got the best guess at the
820 correct serial number */
821 raidPtr->serial_number = serial_number;
822
823 mod_number = mod_values[0];
824 if (num_mod == 2) {
825 if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
826 /* Locate the maverick component */
827 if (mod_count[1] > mod_count[0]) {
828 mod_number = mod_values[1];
829 } else if (mod_count[1] < mod_count[0]) {
830 mod_number = mod_values[0];
831 } else {
832 /* counts of different modification values
833 are the same. Assume greater value is
834 the correct one, all other things
835 considered */
836 if (mod_values[0] > mod_values[1]) {
837 mod_number = mod_values[0];
838 } else {
839 mod_number = mod_values[1];
840 }
841
842 }
843
844 for (c = 0; c < raidPtr->numCol; c++) {
845 ci_label = raidget_component_label(raidPtr, c);
846 if (mod_number != ci_label->mod_counter) {
847 if (hosed_column == c) {
848 /* same one. Can
849 deal with it. */
850 } else {
851 hosed_column = c;
852 if (num_ser != 1) {
853 too_fatal = 1;
854 break;
855 }
856 }
857 }
858 }
859 printf("Hosed component: %s\n",
860 &cfgPtr->devnames[0][hosed_column][0]);
861 if (!force) {
862 /* we'll fail this component, as if there are
863 other major errors, we arn't forcing things
864 and we'll abort the config anyways */
865 if (raidPtr->Disks[hosed_column].status != rf_ds_failed) {
866 raidPtr->Disks[hosed_column].status
867 = rf_ds_failed;
868 raidPtr->numFailures++;
869 raidPtr->status = rf_rs_degraded;
870 }
871 }
872 } else {
873 too_fatal = 1;
874 }
875 if (cfgPtr->parityConfig == '0') {
876 /* We've identified two different mod counters.
877 RAID 0 can't cope with that, so we'll punt */
878 too_fatal = 1;
879 }
880 }
881
882 raidPtr->mod_counter = mod_number;
883
884 if (too_fatal) {
885 /* we've had both a serial number mismatch, and a mod_counter
886 mismatch -- and they involved two different components!!
887 Bail -- make things fail so that the user must force
888 the issue... */
889 hosed_column = -1;
890 fatal_error = 1;
891 }
892
893 if (num_ser > 2) {
894 printf("raid%d: Too many different serial numbers!\n",
895 raidPtr->raidid);
896 fatal_error = 1;
897 }
898
899 if (num_mod > 2) {
900 printf("raid%d: Too many different mod counters!\n",
901 raidPtr->raidid);
902 fatal_error = 1;
903 }
904
905 /* we start by assuming the parity will be good, and flee from
906 that notion at the slightest sign of trouble */
907
908 parity_good = RF_RAID_CLEAN;
909
910 for (c = 0; c < raidPtr->numCol; c++) {
911 dev_name = &cfgPtr->devnames[0][c][0];
912 ci_label = raidget_component_label(raidPtr, c);
913
914 if (c == hosed_column) {
915 printf("raid%d: Ignoring %s\n",
916 raidPtr->raidid, dev_name);
917 } else {
918 rf_print_label_status( raidPtr, c, dev_name, ci_label);
919 if (rf_check_label_vitals( raidPtr, 0, c,
920 dev_name, ci_label,
921 serial_number,
922 mod_number )) {
923 fatal_error = 1;
924 }
925 if (ci_label->clean != RF_RAID_CLEAN) {
926 parity_good = RF_RAID_DIRTY;
927 }
928 }
929 }
930
931 if (fatal_error) {
932 parity_good = RF_RAID_DIRTY;
933 }
934
935 /* we note the state of the parity */
936 raidPtr->parity_good = parity_good;
937
938 return(fatal_error);
939 }
940
941 int
942 rf_add_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
943 {
944 RF_RaidDisk_t *disks;
945 RF_DiskQueue_t *spareQueues;
946 int ret;
947 unsigned int bs;
948 int spare_number;
949
950 ret=0;
951
952 if (raidPtr->numSpare >= RF_MAXSPARE) {
953 RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
954 return(EINVAL);
955 }
956
957 RF_LOCK_MUTEX(raidPtr->mutex);
958 while (raidPtr->adding_hot_spare==1) {
959 ltsleep(&(raidPtr->adding_hot_spare), PRIBIO, "raidhs", 0,
960 &(raidPtr->mutex));
961 }
962 raidPtr->adding_hot_spare=1;
963 RF_UNLOCK_MUTEX(raidPtr->mutex);
964
965 /* the beginning of the spares... */
966 disks = &raidPtr->Disks[raidPtr->numCol];
967
968 spare_number = raidPtr->numSpare;
969
970 ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
971 &disks[spare_number],
972 raidPtr->numCol + spare_number);
973
974 if (ret)
975 goto fail;
976 if (disks[spare_number].status != rf_ds_optimal) {
977 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
978 sparePtr->component_name);
979 rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
980 ret=EINVAL;
981 goto fail;
982 } else {
983 disks[spare_number].status = rf_ds_spare;
984 DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n",
985 spare_number,
986 disks[spare_number].devname,
987 disks[spare_number].numBlocks,
988 disks[spare_number].blockSize,
989 (long int) disks[spare_number].numBlocks *
990 disks[spare_number].blockSize / 1024 / 1024);
991 }
992
993
994 /* check sizes and block sizes on the spare disk */
995 bs = 1 << raidPtr->logBytesPerSector;
996 if (disks[spare_number].blockSize != bs) {
997 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
998 rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
999 ret = EINVAL;
1000 goto fail;
1001 }
1002 if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
1003 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n",
1004 disks[spare_number].devname,
1005 disks[spare_number].blockSize,
1006 raidPtr->sectorsPerDisk);
1007 rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
1008 ret = EINVAL;
1009 goto fail;
1010 } else {
1011 if (disks[spare_number].numBlocks >
1012 raidPtr->sectorsPerDisk) {
1013 RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n",
1014 disks[spare_number].devname,
1015 raidPtr->sectorsPerDisk,
1016 disks[spare_number].numBlocks);
1017
1018 disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
1019 }
1020 }
1021
1022 spareQueues = &raidPtr->Queues[raidPtr->numCol];
1023 ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
1024 raidPtr->numCol + spare_number,
1025 raidPtr->qType,
1026 raidPtr->sectorsPerDisk,
1027 raidPtr->Disks[raidPtr->numCol +
1028 spare_number].dev,
1029 raidPtr->maxOutstanding,
1030 &raidPtr->shutdownList,
1031 raidPtr->cleanupList);
1032
1033 RF_LOCK_MUTEX(raidPtr->mutex);
1034 raidPtr->numSpare++;
1035 RF_UNLOCK_MUTEX(raidPtr->mutex);
1036
1037 fail:
1038 RF_LOCK_MUTEX(raidPtr->mutex);
1039 raidPtr->adding_hot_spare=0;
1040 wakeup(&(raidPtr->adding_hot_spare));
1041 RF_UNLOCK_MUTEX(raidPtr->mutex);
1042
1043 return(ret);
1044 }
1045
1046 int
1047 rf_remove_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
1048 {
1049 int spare_number;
1050
1051
1052 if (raidPtr->numSpare==0) {
1053 printf("No spares to remove!\n");
1054 return(EINVAL);
1055 }
1056
1057 spare_number = sparePtr->column;
1058
1059 return(EINVAL); /* XXX not implemented yet */
1060 #if 0
1061 if (spare_number < 0 || spare_number > raidPtr->numSpare) {
1062 return(EINVAL);
1063 }
1064
1065 /* verify that this spare isn't in use... */
1066
1067
1068
1069
1070 /* it's gone.. */
1071
1072 raidPtr->numSpare--;
1073
1074 return(0);
1075 #endif
1076 }
1077
1078
1079 int
1080 rf_delete_component(RF_Raid_t *raidPtr, RF_SingleComponent_t *component)
1081 {
1082 RF_RaidDisk_t *disks;
1083
1084 if ((component->column < 0) ||
1085 (component->column >= raidPtr->numCol)) {
1086 return(EINVAL);
1087 }
1088
1089 disks = &raidPtr->Disks[component->column];
1090
1091 /* 1. This component must be marked as 'failed' */
1092
1093 return(EINVAL); /* Not implemented yet. */
1094 }
1095
1096 int
1097 rf_incorporate_hot_spare(RF_Raid_t *raidPtr,
1098 RF_SingleComponent_t *component)
1099 {
1100
1101 /* Issues here include how to 'move' this in if there is IO
1102 taking place (e.g. component queues and such) */
1103
1104 return(EINVAL); /* Not implemented yet. */
1105 }
1106