rf_disks.c revision 1.83.2.2 1 /* $NetBSD: rf_disks.c,v 1.83.2.2 2014/08/20 00:03:49 tls Exp $ */
2 /*-
3 * Copyright (c) 1999 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 /*
32 * Copyright (c) 1995 Carnegie-Mellon University.
33 * All rights reserved.
34 *
35 * Author: Mark Holland
36 *
37 * Permission to use, copy, modify and distribute this software and
38 * its documentation is hereby granted, provided that both the copyright
39 * notice and this permission notice appear in all copies of the
40 * software, derivative works or modified versions, and any portions
41 * thereof, and that both notices appear in supporting documentation.
42 *
43 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
44 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
45 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 *
47 * Carnegie Mellon requests users of this software to return to
48 *
49 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
50 * School of Computer Science
51 * Carnegie Mellon University
52 * Pittsburgh PA 15213-3890
53 *
54 * any improvements or extensions that they make and grant Carnegie the
55 * rights to redistribute these changes.
56 */
57
58 /***************************************************************
59 * rf_disks.c -- code to perform operations on the actual disks
60 ***************************************************************/
61
62 #include <sys/cdefs.h>
63 __KERNEL_RCSID(0, "$NetBSD: rf_disks.c,v 1.83.2.2 2014/08/20 00:03:49 tls Exp $");
64
65 #include <dev/raidframe/raidframevar.h>
66
67 #include "rf_raid.h"
68 #include "rf_alloclist.h"
69 #include "rf_utils.h"
70 #include "rf_general.h"
71 #include "rf_options.h"
72 #include "rf_kintf.h"
73 #include "rf_netbsd.h"
74
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/proc.h>
78 #include <sys/ioctl.h>
79 #include <sys/fcntl.h>
80 #include <sys/vnode.h>
81 #include <sys/namei.h> /* for pathbuf */
82 #include <sys/kauth.h>
83 #include <sys/atomic.h>
84 #include <sys/disk.h>
85
86 #include <miscfs/specfs/specdev.h> /* for v_rdev */
87
88 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
89 static void rf_print_label_status( RF_Raid_t *, int, char *,
90 RF_ComponentLabel_t *);
91 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
92 RF_ComponentLabel_t *, int, int );
93
94 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
95 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
96
97 /**************************************************************************
98 *
99 * initialize the disks comprising the array
100 *
101 * We want the spare disks to have regular row,col numbers so that we can
102 * easily substitue a spare for a failed disk. But, the driver code assumes
103 * throughout that the array contains numRow by numCol _non-spare_ disks, so
104 * it's not clear how to fit in the spares. This is an unfortunate holdover
105 * from raidSim. The quick and dirty fix is to make row zero bigger than the
106 * rest, and put all the spares in it. This probably needs to get changed
107 * eventually.
108 *
109 **************************************************************************/
110
111 int
112 rf_ConfigureDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
113 RF_Config_t *cfgPtr)
114 {
115 RF_RaidDisk_t *disks;
116 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
117 RF_RowCol_t c;
118 int bs, ret;
119 unsigned i, count, foundone = 0, numFailuresThisRow;
120 int force;
121
122 force = cfgPtr->force;
123
124 ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
125 if (ret)
126 goto fail;
127
128 disks = raidPtr->Disks;
129
130 numFailuresThisRow = 0;
131 for (c = 0; c < raidPtr->numCol; c++) {
132 ret = rf_ConfigureDisk(raidPtr,
133 &cfgPtr->devnames[0][c][0],
134 &disks[c], c);
135
136 if (ret)
137 goto fail;
138
139 if (disks[c].status == rf_ds_optimal) {
140 ret = raidfetch_component_label(raidPtr, c);
141 if (ret)
142 goto fail;
143
144 /* mark it as failed if the label looks bogus... */
145 if (!rf_reasonable_label(&raidPtr->raid_cinfo[c].ci_label,0) && !force) {
146 disks[c].status = rf_ds_failed;
147 }
148 }
149
150 if (disks[c].status != rf_ds_optimal) {
151 numFailuresThisRow++;
152 } else {
153 if (disks[c].numBlocks < min_numblks)
154 min_numblks = disks[c].numBlocks;
155 DPRINTF6("Disk at col %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n",
156 c, disks[c].devname,
157 disks[c].numBlocks,
158 disks[c].blockSize,
159 (long int) disks[c].numBlocks *
160 disks[c].blockSize / 1024 / 1024);
161 }
162 }
163 /* XXX fix for n-fault tolerant */
164 /* XXX this should probably check to see how many failures
165 we can handle for this configuration! */
166 if (numFailuresThisRow > 0)
167 raidPtr->status = rf_rs_degraded;
168
169 /* all disks must be the same size & have the same block size, bs must
170 * be a power of 2 */
171 bs = 0;
172 foundone = 0;
173 for (c = 0; c < raidPtr->numCol; c++) {
174 if (disks[c].status == rf_ds_optimal) {
175 bs = disks[c].blockSize;
176 foundone = 1;
177 break;
178 }
179 }
180 if (!foundone) {
181 RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
182 ret = EINVAL;
183 goto fail;
184 }
185 for (count = 0, i = 1; i; i <<= 1)
186 if (bs & i)
187 count++;
188 if (count != 1) {
189 RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
190 ret = EINVAL;
191 goto fail;
192 }
193
194 if (rf_CheckLabels( raidPtr, cfgPtr )) {
195 printf("raid%d: There were fatal errors\n", raidPtr->raidid);
196 if (force != 0) {
197 printf("raid%d: Fatal errors being ignored.\n",
198 raidPtr->raidid);
199 } else {
200 ret = EINVAL;
201 goto fail;
202 }
203 }
204
205 for (c = 0; c < raidPtr->numCol; c++) {
206 if (disks[c].status == rf_ds_optimal) {
207 if (disks[c].blockSize != bs) {
208 RF_ERRORMSG1("Error: block size of disk at c %d different from disk at c 0\n", c);
209 ret = EINVAL;
210 goto fail;
211 }
212 if (disks[c].numBlocks != min_numblks) {
213 RF_ERRORMSG2("WARNING: truncating disk at c %d to %d blocks\n",
214 c, (int) min_numblks);
215 disks[c].numBlocks = min_numblks;
216 }
217 }
218 }
219
220 raidPtr->sectorsPerDisk = min_numblks;
221 raidPtr->logBytesPerSector = ffs(bs) - 1;
222 raidPtr->bytesPerSector = bs;
223 raidPtr->sectorMask = bs - 1;
224 return (0);
225
226 fail:
227
228 rf_UnconfigureVnodes( raidPtr );
229
230 return (ret);
231 }
232
233
234 /****************************************************************************
235 * set up the data structures describing the spare disks in the array
236 * recall from the above comment that the spare disk descriptors are stored
237 * in row zero, which is specially expanded to hold them.
238 ****************************************************************************/
239 int
240 rf_ConfigureSpareDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
241 RF_Config_t *cfgPtr)
242 {
243 int i, ret;
244 unsigned int bs;
245 RF_RaidDisk_t *disks;
246 int num_spares_done;
247
248 num_spares_done = 0;
249
250 /* The space for the spares should have already been allocated by
251 * ConfigureDisks() */
252
253 disks = &raidPtr->Disks[raidPtr->numCol];
254 for (i = 0; i < raidPtr->numSpare; i++) {
255 ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
256 &disks[i], raidPtr->numCol + i);
257 if (ret)
258 goto fail;
259 if (disks[i].status != rf_ds_optimal) {
260 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
261 &cfgPtr->spare_names[i][0]);
262 } else {
263 disks[i].status = rf_ds_spare; /* change status to
264 * spare */
265 DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n", i,
266 disks[i].devname,
267 disks[i].numBlocks, disks[i].blockSize,
268 (long int) disks[i].numBlocks *
269 disks[i].blockSize / 1024 / 1024);
270 }
271 num_spares_done++;
272 }
273
274 /* check sizes and block sizes on spare disks */
275 bs = 1 << raidPtr->logBytesPerSector;
276 for (i = 0; i < raidPtr->numSpare; i++) {
277 if (disks[i].blockSize != bs) {
278 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
279 ret = EINVAL;
280 goto fail;
281 }
282 if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
283 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n",
284 disks[i].devname, disks[i].blockSize,
285 raidPtr->sectorsPerDisk);
286 ret = EINVAL;
287 goto fail;
288 } else
289 if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
290 RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n",
291 disks[i].devname,
292 raidPtr->sectorsPerDisk,
293 disks[i].numBlocks);
294
295 disks[i].numBlocks = raidPtr->sectorsPerDisk;
296 }
297 }
298
299 return (0);
300
301 fail:
302
303 /* Release the hold on the main components. We've failed to allocate
304 * a spare, and since we're failing, we need to free things..
305
306 XXX failing to allocate a spare is *not* that big of a deal...
307 We *can* survive without it, if need be, esp. if we get hot
308 adding working.
309
310 If we don't fail out here, then we need a way to remove this spare...
311 that should be easier to do here than if we are "live"...
312
313 */
314
315 rf_UnconfigureVnodes( raidPtr );
316
317 return (ret);
318 }
319
320 static int
321 rf_AllocDiskStructures(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
322 {
323 int ret;
324
325 /* We allocate RF_MAXSPARE on the first row so that we
326 have room to do hot-swapping of spares */
327 RF_MallocAndAdd(raidPtr->Disks, (raidPtr->numCol + RF_MAXSPARE) *
328 sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
329 raidPtr->cleanupList);
330 if (raidPtr->Disks == NULL) {
331 ret = ENOMEM;
332 goto fail;
333 }
334
335 /* get space for device specific stuff.. */
336 RF_MallocAndAdd(raidPtr->raid_cinfo,
337 (raidPtr->numCol + RF_MAXSPARE) *
338 sizeof(struct raidcinfo), (struct raidcinfo *),
339 raidPtr->cleanupList);
340
341 if (raidPtr->raid_cinfo == NULL) {
342 ret = ENOMEM;
343 goto fail;
344 }
345
346 return(0);
347 fail:
348 rf_UnconfigureVnodes( raidPtr );
349
350 return(ret);
351 }
352
353
354 /* configure a single disk during auto-configuration at boot */
355 int
356 rf_AutoConfigureDisks(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr,
357 RF_AutoConfig_t *auto_config)
358 {
359 RF_RaidDisk_t *disks;
360 RF_RaidDisk_t *diskPtr;
361 RF_RowCol_t c;
362 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
363 int bs, ret;
364 int numFailuresThisRow;
365 RF_AutoConfig_t *ac;
366 int parity_good;
367 int mod_counter;
368 int mod_counter_found;
369
370 #if DEBUG
371 printf("Starting autoconfiguration of RAID set...\n");
372 #endif
373
374 ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
375 if (ret)
376 goto fail;
377
378 disks = raidPtr->Disks;
379
380 /* assume the parity will be fine.. */
381 parity_good = RF_RAID_CLEAN;
382
383 /* Check for mod_counters that are too low */
384 mod_counter_found = 0;
385 mod_counter = 0;
386 ac = auto_config;
387 while(ac!=NULL) {
388 if (mod_counter_found==0) {
389 mod_counter = ac->clabel->mod_counter;
390 mod_counter_found = 1;
391 } else {
392 if (ac->clabel->mod_counter > mod_counter) {
393 mod_counter = ac->clabel->mod_counter;
394 }
395 }
396 ac->flag = 0; /* clear the general purpose flag */
397 ac = ac->next;
398 }
399
400 bs = 0;
401
402 numFailuresThisRow = 0;
403 for (c = 0; c < raidPtr->numCol; c++) {
404 diskPtr = &disks[c];
405
406 /* find this row/col in the autoconfig */
407 #if DEBUG
408 printf("Looking for %d in autoconfig\n",c);
409 #endif
410 ac = auto_config;
411 while(ac!=NULL) {
412 if (ac->clabel==NULL) {
413 /* big-time bad news. */
414 goto fail;
415 }
416 if ((ac->clabel->column == c) &&
417 (ac->clabel->mod_counter == mod_counter)) {
418 /* it's this one... */
419 /* flag it as 'used', so we don't
420 free it later. */
421 ac->flag = 1;
422 #if DEBUG
423 printf("Found: %s at %d\n",
424 ac->devname,c);
425 #endif
426
427 break;
428 }
429 ac=ac->next;
430 }
431
432 if (ac==NULL) {
433 /* we didn't find an exact match with a
434 correct mod_counter above... can we find
435 one with an incorrect mod_counter to use
436 instead? (this one, if we find it, will be
437 marked as failed once the set configures)
438 */
439
440 ac = auto_config;
441 while(ac!=NULL) {
442 if (ac->clabel==NULL) {
443 /* big-time bad news. */
444 goto fail;
445 }
446 if (ac->clabel->column == c) {
447 /* it's this one...
448 flag it as 'used', so we
449 don't free it later. */
450 ac->flag = 1;
451 #if DEBUG
452 printf("Found(low mod_counter): %s at %d\n",
453 ac->devname,c);
454 #endif
455
456 break;
457 }
458 ac=ac->next;
459 }
460 }
461
462
463
464 if (ac!=NULL) {
465 /* Found it. Configure it.. */
466 diskPtr->blockSize = ac->clabel->blockSize;
467 diskPtr->numBlocks =
468 rf_component_label_numblocks(ac->clabel);
469 /* Note: rf_protectedSectors is already
470 factored into numBlocks here */
471 raidPtr->raid_cinfo[c].ci_vp = ac->vp;
472 raidPtr->raid_cinfo[c].ci_dev = ac->dev;
473
474 memcpy(raidget_component_label(raidPtr, c),
475 ac->clabel, sizeof(*ac->clabel));
476 snprintf(diskPtr->devname, sizeof(diskPtr->devname),
477 "/dev/%s", ac->devname);
478
479 /* note the fact that this component was
480 autoconfigured. You'll need this info
481 later. Trust me :) */
482 diskPtr->auto_configured = 1;
483 diskPtr->dev = ac->dev;
484
485 /*
486 * we allow the user to specify that
487 * only a fraction of the disks should
488 * be used this is just for debug: it
489 * speeds up the parity scan
490 */
491
492 diskPtr->numBlocks = diskPtr->numBlocks *
493 rf_sizePercentage / 100;
494
495 /* XXX these will get set multiple times,
496 but since we're autoconfiguring, they'd
497 better be always the same each time!
498 If not, this is the least of your worries */
499
500 bs = diskPtr->blockSize;
501 min_numblks = diskPtr->numBlocks;
502
503 /* this gets done multiple times, but that's
504 fine -- the serial number will be the same
505 for all components, guaranteed */
506 raidPtr->serial_number = ac->clabel->serial_number;
507 /* check the last time the label was modified */
508
509 if (ac->clabel->mod_counter != mod_counter) {
510 /* Even though we've filled in all of
511 the above, we don't trust this
512 component since it's modification
513 counter is not in sync with the
514 rest, and we really consider it to
515 be failed. */
516 disks[c].status = rf_ds_failed;
517 numFailuresThisRow++;
518 } else {
519 if (ac->clabel->clean != RF_RAID_CLEAN) {
520 parity_good = RF_RAID_DIRTY;
521 }
522 }
523 } else {
524 /* Didn't find it at all!! Component must
525 really be dead */
526 disks[c].status = rf_ds_failed;
527 snprintf(disks[c].devname, sizeof(disks[c].devname),
528 "component%d", c);
529 numFailuresThisRow++;
530 }
531 }
532 /* XXX fix for n-fault tolerant */
533 /* XXX this should probably check to see how many failures
534 we can handle for this configuration! */
535 if (numFailuresThisRow > 0) {
536 raidPtr->status = rf_rs_degraded;
537 raidPtr->numFailures = numFailuresThisRow;
538 }
539
540 /* close the device for the ones that didn't get used */
541
542 ac = auto_config;
543 while(ac!=NULL) {
544 if (ac->flag == 0) {
545 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
546 VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
547 vput(ac->vp);
548 ac->vp = NULL;
549 #if DEBUG
550 printf("Released %s from auto-config set.\n",
551 ac->devname);
552 #endif
553 }
554 ac = ac->next;
555 }
556
557 raidPtr->mod_counter = mod_counter;
558
559 /* note the state of the parity, if any */
560 raidPtr->parity_good = parity_good;
561 raidPtr->sectorsPerDisk = min_numblks;
562 raidPtr->logBytesPerSector = ffs(bs) - 1;
563 raidPtr->bytesPerSector = bs;
564 raidPtr->sectorMask = bs - 1;
565 return (0);
566
567 fail:
568
569 rf_UnconfigureVnodes( raidPtr );
570
571 return (ret);
572
573 }
574
575 /* configure a single disk in the array */
576 int
577 rf_ConfigureDisk(RF_Raid_t *raidPtr, char *bf, RF_RaidDisk_t *diskPtr,
578 RF_RowCol_t col)
579 {
580 char *p;
581 struct pathbuf *pb;
582 struct vnode *vp;
583 int error;
584
585 p = rf_find_non_white(bf);
586 if (p[strlen(p) - 1] == '\n') {
587 /* strip off the newline */
588 p[strlen(p) - 1] = '\0';
589 }
590 (void) strcpy(diskPtr->devname, p);
591
592 /* Let's start by claiming the component is fine and well... */
593 diskPtr->status = rf_ds_optimal;
594
595 raidPtr->raid_cinfo[col].ci_vp = NULL;
596 raidPtr->raid_cinfo[col].ci_dev = 0;
597
598 if (!strcmp("absent", diskPtr->devname)) {
599 printf("Ignoring missing component at column %d\n", col);
600 snprintf(diskPtr->devname, sizeof(diskPtr->devname),
601 "component%d", col);
602 diskPtr->status = rf_ds_failed;
603 return (0);
604 }
605
606 pb = pathbuf_create(diskPtr->devname);
607 if (pb == NULL) {
608 printf("pathbuf_create for device: %s failed!\n",
609 diskPtr->devname);
610 return ENOMEM;
611 }
612 error = dk_lookup(pb, curlwp, &vp);
613 pathbuf_destroy(pb);
614 if (error) {
615 printf("dk_lookup on device: %s failed!\n", diskPtr->devname);
616 if (error == ENXIO) {
617 /* the component isn't there... must be dead :-( */
618 diskPtr->status = rf_ds_failed;
619 return 0;
620 } else {
621 return (error);
622 }
623 }
624
625 if ((error = rf_getdisksize(vp, diskPtr)) != 0)
626 return (error);
627
628 /*
629 * If this raidPtr's bytesPerSector is zero, fill it in with this
630 * components blockSize. This will give us something to work with
631 * initially, and if it is wrong, we'll get errors later.
632 */
633 if (raidPtr->bytesPerSector == 0)
634 raidPtr->bytesPerSector = diskPtr->blockSize;
635
636 if (diskPtr->status == rf_ds_optimal) {
637 raidPtr->raid_cinfo[col].ci_vp = vp;
638 raidPtr->raid_cinfo[col].ci_dev = vp->v_rdev;
639
640 /* This component was not automatically configured */
641 diskPtr->auto_configured = 0;
642 diskPtr->dev = vp->v_rdev;
643
644 /* we allow the user to specify that only a fraction of the
645 * disks should be used this is just for debug: it speeds up
646 * the parity scan */
647 diskPtr->numBlocks = diskPtr->numBlocks *
648 rf_sizePercentage / 100;
649 }
650
651 /*
652 * Tell the rest of the kernel to check whether anything's
653 * maximum transfer size has changed -- like, for example,
654 * a filesystem that might be mounted on a set where we're
655 * adding a spare with a smaller maximum transfer size than
656 * the original set members.
657 */
658 atomic_inc_uint(&disk_serial);
659 return (0);
660 }
661
662 static void
663 rf_print_label_status(RF_Raid_t *raidPtr, int column, char *dev_name,
664 RF_ComponentLabel_t *ci_label)
665 {
666
667 printf("raid%d: Component %s being configured at col: %d\n",
668 raidPtr->raidid, dev_name, column );
669 printf(" Column: %d Num Columns: %d\n",
670 ci_label->column,
671 ci_label->num_columns);
672 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
673 ci_label->version, ci_label->serial_number,
674 ci_label->mod_counter);
675 printf(" Clean: %s Status: %d\n",
676 ci_label->clean ? "Yes" : "No", ci_label->status );
677 }
678
679 static int rf_check_label_vitals(RF_Raid_t *raidPtr, int row, int column,
680 char *dev_name, RF_ComponentLabel_t *ci_label,
681 int serial_number, int mod_counter)
682 {
683 int fatal_error = 0;
684
685 if (serial_number != ci_label->serial_number) {
686 printf("%s has a different serial number: %d %d\n",
687 dev_name, serial_number, ci_label->serial_number);
688 fatal_error = 1;
689 }
690 if (mod_counter != ci_label->mod_counter) {
691 printf("%s has a different modification count: %d %d\n",
692 dev_name, mod_counter, ci_label->mod_counter);
693 }
694
695 if (row != ci_label->row) {
696 printf("Row out of alignment for: %s\n", dev_name);
697 fatal_error = 1;
698 }
699 if (column != ci_label->column) {
700 printf("Column out of alignment for: %s\n", dev_name);
701 fatal_error = 1;
702 }
703 if (raidPtr->numCol != ci_label->num_columns) {
704 printf("Number of columns do not match for: %s\n", dev_name);
705 fatal_error = 1;
706 }
707 if (ci_label->clean == 0) {
708 /* it's not clean, but that's not fatal */
709 printf("%s is not clean!\n", dev_name);
710 }
711 return(fatal_error);
712 }
713
714
715 /*
716
717 rf_CheckLabels() - check all the component labels for consistency.
718 Return an error if there is anything major amiss.
719
720 */
721
722 int
723 rf_CheckLabels(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
724 {
725 int c;
726 char *dev_name;
727 RF_ComponentLabel_t *ci_label;
728 int serial_number = 0;
729 int mod_number = 0;
730 int fatal_error = 0;
731 int mod_values[4];
732 int mod_count[4];
733 int ser_values[4];
734 int ser_count[4];
735 int num_ser;
736 int num_mod;
737 int i;
738 int found;
739 int hosed_column;
740 int too_fatal;
741 int parity_good;
742 int force;
743
744 hosed_column = -1;
745 too_fatal = 0;
746 force = cfgPtr->force;
747
748 /*
749 We're going to try to be a little intelligent here. If one
750 component's label is bogus, and we can identify that it's the
751 *only* one that's gone, we'll mark it as "failed" and allow
752 the configuration to proceed. This will be the *only* case
753 that we'll proceed if there would be (otherwise) fatal errors.
754
755 Basically we simply keep a count of how many components had
756 what serial number. If all but one agree, we simply mark
757 the disagreeing component as being failed, and allow
758 things to come up "normally".
759
760 We do this first for serial numbers, and then for "mod_counter".
761
762 */
763
764 num_ser = 0;
765 num_mod = 0;
766
767 ser_values[0] = ser_values[1] = ser_values[2] = ser_values[3] = 0;
768 ser_count[0] = ser_count[1] = ser_count[2] = ser_count[3] = 0;
769 mod_values[0] = mod_values[1] = mod_values[2] = mod_values[3] = 0;
770 mod_count[0] = mod_count[1] = mod_count[2] = mod_count[3] = 0;
771
772 for (c = 0; c < raidPtr->numCol; c++) {
773 if (raidPtr->Disks[c].status != rf_ds_optimal)
774 continue;
775 ci_label = raidget_component_label(raidPtr, c);
776 found=0;
777 for(i=0;i<num_ser;i++) {
778 if (ser_values[i] == ci_label->serial_number) {
779 ser_count[i]++;
780 found=1;
781 break;
782 }
783 }
784 if (!found) {
785 ser_values[num_ser] = ci_label->serial_number;
786 ser_count[num_ser] = 1;
787 num_ser++;
788 if (num_ser>2) {
789 fatal_error = 1;
790 break;
791 }
792 }
793 found=0;
794 for(i=0;i<num_mod;i++) {
795 if (mod_values[i] == ci_label->mod_counter) {
796 mod_count[i]++;
797 found=1;
798 break;
799 }
800 }
801 if (!found) {
802 mod_values[num_mod] = ci_label->mod_counter;
803 mod_count[num_mod] = 1;
804 num_mod++;
805 if (num_mod>2) {
806 fatal_error = 1;
807 break;
808 }
809 }
810 }
811 #if DEBUG
812 printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
813 for(i=0;i<num_ser;i++) {
814 printf("%d %d\n", ser_values[i], ser_count[i]);
815 }
816 printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
817 for(i=0;i<num_mod;i++) {
818 printf("%d %d\n", mod_values[i], mod_count[i]);
819 }
820 #endif
821 serial_number = ser_values[0];
822 if (num_ser == 2) {
823 if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
824 /* Locate the maverick component */
825 if (ser_count[1] > ser_count[0]) {
826 serial_number = ser_values[1];
827 }
828
829 for (c = 0; c < raidPtr->numCol; c++) {
830 if (raidPtr->Disks[c].status != rf_ds_optimal)
831 continue;
832 ci_label = raidget_component_label(raidPtr, c);
833 if (serial_number != ci_label->serial_number) {
834 hosed_column = c;
835 break;
836 }
837 }
838 printf("Hosed component: %s\n",
839 &cfgPtr->devnames[0][hosed_column][0]);
840 if (!force) {
841 /* we'll fail this component, as if there are
842 other major errors, we arn't forcing things
843 and we'll abort the config anyways */
844 raidPtr->Disks[hosed_column].status
845 = rf_ds_failed;
846 raidPtr->numFailures++;
847 raidPtr->status = rf_rs_degraded;
848 }
849 } else {
850 too_fatal = 1;
851 }
852 if (cfgPtr->parityConfig == '0') {
853 /* We've identified two different serial numbers.
854 RAID 0 can't cope with that, so we'll punt */
855 too_fatal = 1;
856 }
857
858 }
859
860 /* record the serial number for later. If we bail later, setting
861 this doesn't matter, otherwise we've got the best guess at the
862 correct serial number */
863 raidPtr->serial_number = serial_number;
864
865 mod_number = mod_values[0];
866 if (num_mod == 2) {
867 if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
868 /* Locate the maverick component */
869 if (mod_count[1] > mod_count[0]) {
870 mod_number = mod_values[1];
871 } else if (mod_count[1] < mod_count[0]) {
872 mod_number = mod_values[0];
873 } else {
874 /* counts of different modification values
875 are the same. Assume greater value is
876 the correct one, all other things
877 considered */
878 if (mod_values[0] > mod_values[1]) {
879 mod_number = mod_values[0];
880 } else {
881 mod_number = mod_values[1];
882 }
883
884 }
885
886 for (c = 0; c < raidPtr->numCol; c++) {
887 if (raidPtr->Disks[c].status != rf_ds_optimal)
888 continue;
889
890 ci_label = raidget_component_label(raidPtr, c);
891 if (mod_number != ci_label->mod_counter) {
892 if (hosed_column == c) {
893 /* same one. Can
894 deal with it. */
895 } else {
896 hosed_column = c;
897 if (num_ser != 1) {
898 too_fatal = 1;
899 break;
900 }
901 }
902 }
903 }
904 printf("Hosed component: %s\n",
905 &cfgPtr->devnames[0][hosed_column][0]);
906 if (!force) {
907 /* we'll fail this component, as if there are
908 other major errors, we arn't forcing things
909 and we'll abort the config anyways */
910 if (raidPtr->Disks[hosed_column].status != rf_ds_failed) {
911 raidPtr->Disks[hosed_column].status
912 = rf_ds_failed;
913 raidPtr->numFailures++;
914 raidPtr->status = rf_rs_degraded;
915 }
916 }
917 } else {
918 too_fatal = 1;
919 }
920 if (cfgPtr->parityConfig == '0') {
921 /* We've identified two different mod counters.
922 RAID 0 can't cope with that, so we'll punt */
923 too_fatal = 1;
924 }
925 }
926
927 raidPtr->mod_counter = mod_number;
928
929 if (too_fatal) {
930 /* we've had both a serial number mismatch, and a mod_counter
931 mismatch -- and they involved two different components!!
932 Bail -- make things fail so that the user must force
933 the issue... */
934 hosed_column = -1;
935 fatal_error = 1;
936 }
937
938 if (num_ser > 2) {
939 printf("raid%d: Too many different serial numbers!\n",
940 raidPtr->raidid);
941 fatal_error = 1;
942 }
943
944 if (num_mod > 2) {
945 printf("raid%d: Too many different mod counters!\n",
946 raidPtr->raidid);
947 fatal_error = 1;
948 }
949
950 for (c = 0; c < raidPtr->numCol; c++) {
951 if (raidPtr->Disks[c].status != rf_ds_optimal) {
952 hosed_column = c;
953 break;
954 }
955 }
956
957 /* we start by assuming the parity will be good, and flee from
958 that notion at the slightest sign of trouble */
959
960 parity_good = RF_RAID_CLEAN;
961
962 for (c = 0; c < raidPtr->numCol; c++) {
963 dev_name = &cfgPtr->devnames[0][c][0];
964 ci_label = raidget_component_label(raidPtr, c);
965
966 if (c == hosed_column) {
967 printf("raid%d: Ignoring %s\n",
968 raidPtr->raidid, dev_name);
969 } else {
970 rf_print_label_status( raidPtr, c, dev_name, ci_label);
971 if (rf_check_label_vitals( raidPtr, 0, c,
972 dev_name, ci_label,
973 serial_number,
974 mod_number )) {
975 fatal_error = 1;
976 }
977 if (ci_label->clean != RF_RAID_CLEAN) {
978 parity_good = RF_RAID_DIRTY;
979 }
980 }
981 }
982
983 if (fatal_error) {
984 parity_good = RF_RAID_DIRTY;
985 }
986
987 /* we note the state of the parity */
988 raidPtr->parity_good = parity_good;
989
990 return(fatal_error);
991 }
992
993 int
994 rf_add_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
995 {
996 RF_RaidDisk_t *disks;
997 RF_DiskQueue_t *spareQueues;
998 int ret;
999 unsigned int bs;
1000 int spare_number;
1001
1002 ret=0;
1003
1004 if (raidPtr->numSpare >= RF_MAXSPARE) {
1005 RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
1006 return(EINVAL);
1007 }
1008
1009 rf_lock_mutex2(raidPtr->mutex);
1010 while (raidPtr->adding_hot_spare == 1) {
1011 rf_wait_cond2(raidPtr->adding_hot_spare_cv, raidPtr->mutex);
1012 }
1013 raidPtr->adding_hot_spare = 1;
1014 rf_unlock_mutex2(raidPtr->mutex);
1015
1016 /* the beginning of the spares... */
1017 disks = &raidPtr->Disks[raidPtr->numCol];
1018
1019 spare_number = raidPtr->numSpare;
1020
1021 ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
1022 &disks[spare_number],
1023 raidPtr->numCol + spare_number);
1024
1025 if (ret)
1026 goto fail;
1027 if (disks[spare_number].status != rf_ds_optimal) {
1028 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
1029 sparePtr->component_name);
1030 rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
1031 ret=EINVAL;
1032 goto fail;
1033 } else {
1034 disks[spare_number].status = rf_ds_spare;
1035 DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n",
1036 spare_number,
1037 disks[spare_number].devname,
1038 disks[spare_number].numBlocks,
1039 disks[spare_number].blockSize,
1040 (long int) disks[spare_number].numBlocks *
1041 disks[spare_number].blockSize / 1024 / 1024);
1042 }
1043
1044
1045 /* check sizes and block sizes on the spare disk */
1046 bs = 1 << raidPtr->logBytesPerSector;
1047 if (disks[spare_number].blockSize != bs) {
1048 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
1049 rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
1050 ret = EINVAL;
1051 goto fail;
1052 }
1053 if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
1054 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n",
1055 disks[spare_number].devname,
1056 disks[spare_number].blockSize,
1057 raidPtr->sectorsPerDisk);
1058 rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
1059 ret = EINVAL;
1060 goto fail;
1061 } else {
1062 if (disks[spare_number].numBlocks >
1063 raidPtr->sectorsPerDisk) {
1064 RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n",
1065 disks[spare_number].devname,
1066 raidPtr->sectorsPerDisk,
1067 disks[spare_number].numBlocks);
1068
1069 disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
1070 }
1071 }
1072
1073 spareQueues = &raidPtr->Queues[raidPtr->numCol];
1074 ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
1075 raidPtr->numCol + spare_number,
1076 raidPtr->qType,
1077 raidPtr->sectorsPerDisk,
1078 raidPtr->Disks[raidPtr->numCol +
1079 spare_number].dev,
1080 raidPtr->maxOutstanding,
1081 &raidPtr->shutdownList,
1082 raidPtr->cleanupList);
1083
1084 rf_lock_mutex2(raidPtr->mutex);
1085 raidPtr->numSpare++;
1086 rf_unlock_mutex2(raidPtr->mutex);
1087
1088 fail:
1089 rf_lock_mutex2(raidPtr->mutex);
1090 raidPtr->adding_hot_spare = 0;
1091 rf_signal_cond2(raidPtr->adding_hot_spare_cv);
1092 rf_unlock_mutex2(raidPtr->mutex);
1093
1094 return(ret);
1095 }
1096
1097 int
1098 rf_remove_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
1099 {
1100 #if 0
1101 int spare_number;
1102 #endif
1103
1104 if (raidPtr->numSpare==0) {
1105 printf("No spares to remove!\n");
1106 return(EINVAL);
1107 }
1108
1109 return(EINVAL); /* XXX not implemented yet */
1110 #if 0
1111 spare_number = sparePtr->column;
1112
1113 if (spare_number < 0 || spare_number > raidPtr->numSpare) {
1114 return(EINVAL);
1115 }
1116
1117 /* verify that this spare isn't in use... */
1118
1119
1120
1121
1122 /* it's gone.. */
1123
1124 raidPtr->numSpare--;
1125
1126 return(0);
1127 #endif
1128 }
1129
1130
1131 int
1132 rf_delete_component(RF_Raid_t *raidPtr, RF_SingleComponent_t *component)
1133 {
1134 #if 0
1135 RF_RaidDisk_t *disks;
1136 #endif
1137
1138 if ((component->column < 0) ||
1139 (component->column >= raidPtr->numCol)) {
1140 return(EINVAL);
1141 }
1142
1143 #if 0
1144 disks = &raidPtr->Disks[component->column];
1145 #endif
1146
1147 /* 1. This component must be marked as 'failed' */
1148
1149 return(EINVAL); /* Not implemented yet. */
1150 }
1151
1152 int
1153 rf_incorporate_hot_spare(RF_Raid_t *raidPtr,
1154 RF_SingleComponent_t *component)
1155 {
1156
1157 /* Issues here include how to 'move' this in if there is IO
1158 taking place (e.g. component queues and such) */
1159
1160 return(EINVAL); /* Not implemented yet. */
1161 }
1162