rf_disks.c revision 1.83.2.3 1 /* $NetBSD: rf_disks.c,v 1.83.2.3 2017/12/03 11:37:31 jdolecek Exp $ */
2 /*-
3 * Copyright (c) 1999 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 /*
32 * Copyright (c) 1995 Carnegie-Mellon University.
33 * All rights reserved.
34 *
35 * Author: Mark Holland
36 *
37 * Permission to use, copy, modify and distribute this software and
38 * its documentation is hereby granted, provided that both the copyright
39 * notice and this permission notice appear in all copies of the
40 * software, derivative works or modified versions, and any portions
41 * thereof, and that both notices appear in supporting documentation.
42 *
43 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
44 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
45 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 *
47 * Carnegie Mellon requests users of this software to return to
48 *
49 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
50 * School of Computer Science
51 * Carnegie Mellon University
52 * Pittsburgh PA 15213-3890
53 *
54 * any improvements or extensions that they make and grant Carnegie the
55 * rights to redistribute these changes.
56 */
57
58 /***************************************************************
59 * rf_disks.c -- code to perform operations on the actual disks
60 ***************************************************************/
61
62 #include <sys/cdefs.h>
63 __KERNEL_RCSID(0, "$NetBSD: rf_disks.c,v 1.83.2.3 2017/12/03 11:37:31 jdolecek Exp $");
64
65 #include <dev/raidframe/raidframevar.h>
66
67 #include "rf_raid.h"
68 #include "rf_alloclist.h"
69 #include "rf_utils.h"
70 #include "rf_general.h"
71 #include "rf_options.h"
72 #include "rf_kintf.h"
73 #include "rf_netbsd.h"
74
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/proc.h>
78 #include <sys/ioctl.h>
79 #include <sys/fcntl.h>
80 #include <sys/vnode.h>
81 #include <sys/namei.h> /* for pathbuf */
82 #include <sys/kauth.h>
83 #include <sys/atomic.h>
84 #include <sys/disk.h>
85
86 #include <miscfs/specfs/specdev.h> /* for v_rdev */
87
88 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
89 static void rf_print_label_status( RF_Raid_t *, int, char *,
90 RF_ComponentLabel_t *);
91 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
92 RF_ComponentLabel_t *, int, int );
93
94 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
95 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
96
97 /**************************************************************************
98 *
99 * initialize the disks comprising the array
100 *
101 * We want the spare disks to have regular row,col numbers so that we can
102 * easily substitue a spare for a failed disk. But, the driver code assumes
103 * throughout that the array contains numRow by numCol _non-spare_ disks, so
104 * it's not clear how to fit in the spares. This is an unfortunate holdover
105 * from raidSim. The quick and dirty fix is to make row zero bigger than the
106 * rest, and put all the spares in it. This probably needs to get changed
107 * eventually.
108 *
109 **************************************************************************/
110
111 int
112 rf_ConfigureDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
113 RF_Config_t *cfgPtr)
114 {
115 RF_RaidDisk_t *disks;
116 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
117 RF_RowCol_t c;
118 int bs, ret;
119 unsigned i, count, foundone = 0, numFailuresThisRow;
120 int force;
121
122 force = cfgPtr->force;
123
124 ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
125 if (ret)
126 goto fail;
127
128 disks = raidPtr->Disks;
129
130 numFailuresThisRow = 0;
131 for (c = 0; c < raidPtr->numCol; c++) {
132 ret = rf_ConfigureDisk(raidPtr,
133 &cfgPtr->devnames[0][c][0],
134 &disks[c], c);
135
136 if (ret)
137 goto fail;
138
139 if (disks[c].status == rf_ds_optimal) {
140 ret = raidfetch_component_label(raidPtr, c);
141 if (ret)
142 goto fail;
143
144 /* mark it as failed if the label looks bogus... */
145 if (!rf_reasonable_label(&raidPtr->raid_cinfo[c].ci_label,0) && !force) {
146 disks[c].status = rf_ds_failed;
147 }
148 }
149
150 if (disks[c].status != rf_ds_optimal) {
151 numFailuresThisRow++;
152 } else {
153 if (disks[c].numBlocks < min_numblks)
154 min_numblks = disks[c].numBlocks;
155 DPRINTF6("Disk at col %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n",
156 c, disks[c].devname,
157 disks[c].numBlocks,
158 disks[c].blockSize,
159 (long int) disks[c].numBlocks *
160 disks[c].blockSize / 1024 / 1024);
161 }
162 }
163 /* XXX fix for n-fault tolerant */
164 /* XXX this should probably check to see how many failures
165 we can handle for this configuration! */
166 if (numFailuresThisRow > 0)
167 raidPtr->status = rf_rs_degraded;
168
169 /* all disks must be the same size & have the same block size, bs must
170 * be a power of 2 */
171 bs = 0;
172 foundone = 0;
173 for (c = 0; c < raidPtr->numCol; c++) {
174 if (disks[c].status == rf_ds_optimal) {
175 bs = disks[c].blockSize;
176 foundone = 1;
177 break;
178 }
179 }
180 if (!foundone) {
181 RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
182 ret = EINVAL;
183 goto fail;
184 }
185 for (count = 0, i = 1; i; i <<= 1)
186 if (bs & i)
187 count++;
188 if (count != 1) {
189 RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
190 ret = EINVAL;
191 goto fail;
192 }
193
194 if (rf_CheckLabels( raidPtr, cfgPtr )) {
195 printf("raid%d: There were fatal errors\n", raidPtr->raidid);
196 if (force != 0) {
197 printf("raid%d: Fatal errors being ignored.\n",
198 raidPtr->raidid);
199 } else {
200 ret = EINVAL;
201 goto fail;
202 }
203 }
204
205 for (c = 0; c < raidPtr->numCol; c++) {
206 if (disks[c].status == rf_ds_optimal) {
207 if (disks[c].blockSize != bs) {
208 RF_ERRORMSG1("Error: block size of disk at c %d different from disk at c 0\n", c);
209 ret = EINVAL;
210 goto fail;
211 }
212 if (disks[c].numBlocks != min_numblks) {
213 RF_ERRORMSG2("WARNING: truncating disk at c %d to %d blocks\n",
214 c, (int) min_numblks);
215 disks[c].numBlocks = min_numblks;
216 }
217 }
218 }
219
220 raidPtr->sectorsPerDisk = min_numblks;
221 raidPtr->logBytesPerSector = ffs(bs) - 1;
222 raidPtr->bytesPerSector = bs;
223 raidPtr->sectorMask = bs - 1;
224 return (0);
225
226 fail:
227
228 rf_UnconfigureVnodes( raidPtr );
229
230 return (ret);
231 }
232
233
234 /****************************************************************************
235 * set up the data structures describing the spare disks in the array
236 * recall from the above comment that the spare disk descriptors are stored
237 * in row zero, which is specially expanded to hold them.
238 ****************************************************************************/
239 int
240 rf_ConfigureSpareDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
241 RF_Config_t *cfgPtr)
242 {
243 int i, ret;
244 unsigned int bs;
245 RF_RaidDisk_t *disks;
246 int num_spares_done;
247
248 num_spares_done = 0;
249
250 /* The space for the spares should have already been allocated by
251 * ConfigureDisks() */
252
253 disks = &raidPtr->Disks[raidPtr->numCol];
254 for (i = 0; i < raidPtr->numSpare; i++) {
255 ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
256 &disks[i], raidPtr->numCol + i);
257 if (ret)
258 goto fail;
259 if (disks[i].status != rf_ds_optimal) {
260 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
261 &cfgPtr->spare_names[i][0]);
262 } else {
263 disks[i].status = rf_ds_spare; /* change status to
264 * spare */
265 DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n", i,
266 disks[i].devname,
267 disks[i].numBlocks, disks[i].blockSize,
268 (long int) disks[i].numBlocks *
269 disks[i].blockSize / 1024 / 1024);
270 }
271 num_spares_done++;
272 }
273
274 /* check sizes and block sizes on spare disks */
275 bs = 1 << raidPtr->logBytesPerSector;
276 for (i = 0; i < raidPtr->numSpare; i++) {
277 if (disks[i].blockSize != bs) {
278 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
279 ret = EINVAL;
280 goto fail;
281 }
282 if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
283 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n",
284 disks[i].devname, disks[i].blockSize,
285 raidPtr->sectorsPerDisk);
286 ret = EINVAL;
287 goto fail;
288 } else
289 if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
290 RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n",
291 disks[i].devname,
292 raidPtr->sectorsPerDisk,
293 disks[i].numBlocks);
294
295 disks[i].numBlocks = raidPtr->sectorsPerDisk;
296 }
297 }
298
299 return (0);
300
301 fail:
302
303 /* Release the hold on the main components. We've failed to allocate
304 * a spare, and since we're failing, we need to free things..
305
306 XXX failing to allocate a spare is *not* that big of a deal...
307 We *can* survive without it, if need be, esp. if we get hot
308 adding working.
309
310 If we don't fail out here, then we need a way to remove this spare...
311 that should be easier to do here than if we are "live"...
312
313 */
314
315 rf_UnconfigureVnodes( raidPtr );
316
317 return (ret);
318 }
319
320 static int
321 rf_AllocDiskStructures(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
322 {
323 int ret;
324
325 /* We allocate RF_MAXSPARE on the first row so that we
326 have room to do hot-swapping of spares */
327 RF_MallocAndAdd(raidPtr->Disks, (raidPtr->numCol + RF_MAXSPARE) *
328 sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
329 raidPtr->cleanupList);
330 if (raidPtr->Disks == NULL) {
331 ret = ENOMEM;
332 goto fail;
333 }
334
335 /* get space for device specific stuff.. */
336 RF_MallocAndAdd(raidPtr->raid_cinfo,
337 (raidPtr->numCol + RF_MAXSPARE) *
338 sizeof(struct raidcinfo), (struct raidcinfo *),
339 raidPtr->cleanupList);
340
341 if (raidPtr->raid_cinfo == NULL) {
342 ret = ENOMEM;
343 goto fail;
344 }
345
346 return(0);
347 fail:
348 rf_UnconfigureVnodes( raidPtr );
349
350 return(ret);
351 }
352
353
354 /* configure a single disk during auto-configuration at boot */
355 int
356 rf_AutoConfigureDisks(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr,
357 RF_AutoConfig_t *auto_config)
358 {
359 RF_RaidDisk_t *disks;
360 RF_RaidDisk_t *diskPtr;
361 RF_RowCol_t c;
362 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
363 int bs, ret;
364 int numFailuresThisRow;
365 RF_AutoConfig_t *ac;
366 int parity_good;
367 int mod_counter;
368 int mod_counter_found;
369
370 #if DEBUG
371 printf("Starting autoconfiguration of RAID set...\n");
372 #endif
373
374 ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
375 if (ret)
376 goto fail;
377
378 disks = raidPtr->Disks;
379
380 /* assume the parity will be fine.. */
381 parity_good = RF_RAID_CLEAN;
382
383 /* Check for mod_counters that are too low */
384 mod_counter_found = 0;
385 mod_counter = 0;
386 ac = auto_config;
387 while(ac!=NULL) {
388 if (mod_counter_found==0) {
389 mod_counter = ac->clabel->mod_counter;
390 mod_counter_found = 1;
391 } else {
392 if (ac->clabel->mod_counter > mod_counter) {
393 mod_counter = ac->clabel->mod_counter;
394 }
395 }
396 ac->flag = 0; /* clear the general purpose flag */
397 ac = ac->next;
398 }
399
400 bs = 0;
401
402 numFailuresThisRow = 0;
403 for (c = 0; c < raidPtr->numCol; c++) {
404 diskPtr = &disks[c];
405
406 /* find this row/col in the autoconfig */
407 #if DEBUG
408 printf("Looking for %d in autoconfig\n",c);
409 #endif
410 ac = auto_config;
411 while(ac!=NULL) {
412 if (ac->clabel==NULL) {
413 /* big-time bad news. */
414 goto fail;
415 }
416 if ((ac->clabel->column == c) &&
417 (ac->clabel->mod_counter == mod_counter)) {
418 /* it's this one... */
419 /* flag it as 'used', so we don't
420 free it later. */
421 ac->flag = 1;
422 #if DEBUG
423 printf("Found: %s at %d\n",
424 ac->devname,c);
425 #endif
426
427 break;
428 }
429 ac=ac->next;
430 }
431
432 if (ac==NULL) {
433 /* we didn't find an exact match with a
434 correct mod_counter above... can we find
435 one with an incorrect mod_counter to use
436 instead? (this one, if we find it, will be
437 marked as failed once the set configures)
438 */
439
440 ac = auto_config;
441 while(ac!=NULL) {
442 if (ac->clabel==NULL) {
443 /* big-time bad news. */
444 goto fail;
445 }
446 if (ac->clabel->column == c) {
447 /* it's this one...
448 flag it as 'used', so we
449 don't free it later. */
450 ac->flag = 1;
451 #if DEBUG
452 printf("Found(low mod_counter): %s at %d\n",
453 ac->devname,c);
454 #endif
455
456 break;
457 }
458 ac=ac->next;
459 }
460 }
461
462
463
464 if (ac!=NULL) {
465 /* Found it. Configure it.. */
466 diskPtr->blockSize = ac->clabel->blockSize;
467 diskPtr->numBlocks =
468 rf_component_label_numblocks(ac->clabel);
469 /* Note: rf_protectedSectors is already
470 factored into numBlocks here */
471 raidPtr->raid_cinfo[c].ci_vp = ac->vp;
472 raidPtr->raid_cinfo[c].ci_dev = ac->dev;
473
474 memcpy(raidget_component_label(raidPtr, c),
475 ac->clabel, sizeof(*ac->clabel));
476 snprintf(diskPtr->devname, sizeof(diskPtr->devname),
477 "/dev/%s", ac->devname);
478
479 /* note the fact that this component was
480 autoconfigured. You'll need this info
481 later. Trust me :) */
482 diskPtr->auto_configured = 1;
483 diskPtr->dev = ac->dev;
484
485 /*
486 * we allow the user to specify that
487 * only a fraction of the disks should
488 * be used this is just for debug: it
489 * speeds up the parity scan
490 */
491
492 diskPtr->numBlocks = diskPtr->numBlocks *
493 rf_sizePercentage / 100;
494
495 /* XXX these will get set multiple times,
496 but since we're autoconfiguring, they'd
497 better be always the same each time!
498 If not, this is the least of your worries */
499
500 bs = diskPtr->blockSize;
501 min_numblks = diskPtr->numBlocks;
502
503 /* this gets done multiple times, but that's
504 fine -- the serial number will be the same
505 for all components, guaranteed */
506 raidPtr->serial_number = ac->clabel->serial_number;
507 /* check the last time the label was modified */
508
509 if (ac->clabel->mod_counter != mod_counter) {
510 /* Even though we've filled in all of
511 the above, we don't trust this
512 component since its modification
513 counter is not in sync with the
514 rest, and we really consider it to
515 be failed. */
516 disks[c].status = rf_ds_failed;
517 numFailuresThisRow++;
518 } else {
519 if (ac->clabel->clean != RF_RAID_CLEAN) {
520 parity_good = RF_RAID_DIRTY;
521 }
522 }
523 } else {
524 /* Didn't find it at all!! Component must
525 really be dead */
526 disks[c].status = rf_ds_failed;
527 snprintf(disks[c].devname, sizeof(disks[c].devname),
528 "component%d", c);
529 numFailuresThisRow++;
530 }
531 }
532 /* XXX fix for n-fault tolerant */
533 /* XXX this should probably check to see how many failures
534 we can handle for this configuration! */
535 if (numFailuresThisRow > 0) {
536 raidPtr->status = rf_rs_degraded;
537 raidPtr->numFailures = numFailuresThisRow;
538 }
539
540 /* close the device for the ones that didn't get used */
541
542 ac = auto_config;
543 while(ac!=NULL) {
544 if (ac->flag == 0) {
545 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
546 VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
547 vput(ac->vp);
548 ac->vp = NULL;
549 #if DEBUG
550 printf("Released %s from auto-config set.\n",
551 ac->devname);
552 #endif
553 }
554 ac = ac->next;
555 }
556
557 raidPtr->mod_counter = mod_counter;
558
559 /* note the state of the parity, if any */
560 raidPtr->parity_good = parity_good;
561 raidPtr->sectorsPerDisk = min_numblks;
562 raidPtr->logBytesPerSector = ffs(bs) - 1;
563 raidPtr->bytesPerSector = bs;
564 raidPtr->sectorMask = bs - 1;
565 return (0);
566
567 fail:
568
569 rf_UnconfigureVnodes( raidPtr );
570
571 return (ret);
572
573 }
574
575 /* configure a single disk in the array */
576 int
577 rf_ConfigureDisk(RF_Raid_t *raidPtr, char *bf, RF_RaidDisk_t *diskPtr,
578 RF_RowCol_t col)
579 {
580 char *p;
581 struct pathbuf *pb;
582 struct vnode *vp;
583 int error;
584
585 p = rf_find_non_white(bf);
586 if (p[strlen(p) - 1] == '\n') {
587 /* strip off the newline */
588 p[strlen(p) - 1] = '\0';
589 }
590 (void) strcpy(diskPtr->devname, p);
591
592 /* Let's start by claiming the component is fine and well... */
593 diskPtr->status = rf_ds_optimal;
594
595 raidPtr->raid_cinfo[col].ci_vp = NULL;
596 raidPtr->raid_cinfo[col].ci_dev = 0;
597
598 if (!strcmp("absent", diskPtr->devname)) {
599 printf("Ignoring missing component at column %d\n", col);
600 snprintf(diskPtr->devname, sizeof(diskPtr->devname),
601 "component%d", col);
602 diskPtr->status = rf_ds_failed;
603 return (0);
604 }
605
606 pb = pathbuf_create(diskPtr->devname);
607 if (pb == NULL) {
608 printf("pathbuf_create for device: %s failed!\n",
609 diskPtr->devname);
610 return ENOMEM;
611 }
612 error = dk_lookup(pb, curlwp, &vp);
613 pathbuf_destroy(pb);
614 if (error) {
615 printf("dk_lookup on device: %s failed!\n", diskPtr->devname);
616 if (error == ENXIO) {
617 /* the component isn't there... must be dead :-( */
618 diskPtr->status = rf_ds_failed;
619 return 0;
620 } else {
621 return (error);
622 }
623 }
624
625 if ((error = rf_getdisksize(vp, diskPtr)) != 0)
626 return (error);
627
628 /*
629 * If this raidPtr's bytesPerSector is zero, fill it in with this
630 * components blockSize. This will give us something to work with
631 * initially, and if it is wrong, we'll get errors later.
632 */
633 if (raidPtr->bytesPerSector == 0)
634 raidPtr->bytesPerSector = diskPtr->blockSize;
635
636 if (diskPtr->status == rf_ds_optimal) {
637 raidPtr->raid_cinfo[col].ci_vp = vp;
638 raidPtr->raid_cinfo[col].ci_dev = vp->v_rdev;
639
640 /* This component was not automatically configured */
641 diskPtr->auto_configured = 0;
642 diskPtr->dev = vp->v_rdev;
643
644 /* we allow the user to specify that only a fraction of the
645 * disks should be used this is just for debug: it speeds up
646 * the parity scan */
647 diskPtr->numBlocks = diskPtr->numBlocks *
648 rf_sizePercentage / 100;
649 }
650
651 /*
652 * Tell the rest of the kernel to check whether anything's
653 * maximum transfer size has changed -- like, for example,
654 * a filesystem that might be mounted on a set where we're
655 * adding a spare with a smaller maximum transfer size than
656 * the original set members.
657 */
658 atomic_inc_uint(&disk_serial);
659 return (0);
660 }
661
662 static void
663 rf_print_label_status(RF_Raid_t *raidPtr, int column, char *dev_name,
664 RF_ComponentLabel_t *ci_label)
665 {
666
667 printf("raid%d: Component %s being configured at col: %d\n",
668 raidPtr->raidid, dev_name, column );
669 printf(" Column: %d Num Columns: %d\n",
670 ci_label->column,
671 ci_label->num_columns);
672 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
673 ci_label->version, ci_label->serial_number,
674 ci_label->mod_counter);
675 printf(" Clean: %s Status: %d\n",
676 ci_label->clean ? "Yes" : "No", ci_label->status );
677 }
678
679 static int rf_check_label_vitals(RF_Raid_t *raidPtr, int row, int column,
680 char *dev_name, RF_ComponentLabel_t *ci_label,
681 int serial_number, int mod_counter)
682 {
683 int fatal_error = 0;
684
685 if (serial_number != ci_label->serial_number) {
686 printf("%s has a different serial number: %d %d\n",
687 dev_name, serial_number, ci_label->serial_number);
688 fatal_error = 1;
689 }
690 if (mod_counter != ci_label->mod_counter) {
691 printf("%s has a different modification count: %d %d\n",
692 dev_name, mod_counter, ci_label->mod_counter);
693 }
694
695 if (row != ci_label->row) {
696 printf("Row out of alignment for: %s\n", dev_name);
697 fatal_error = 1;
698 }
699 if (column != ci_label->column) {
700 printf("Column out of alignment for: %s\n", dev_name);
701 fatal_error = 1;
702 }
703 if (raidPtr->numCol != ci_label->num_columns) {
704 printf("Number of columns do not match for: %s\n", dev_name);
705 fatal_error = 1;
706 }
707 if (ci_label->clean == 0) {
708 /* it's not clean, but that's not fatal */
709 printf("%s is not clean!\n", dev_name);
710 }
711 return(fatal_error);
712 }
713
714
715 static void
716 rf_handle_hosed(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr, int hosed_column,
717 int again)
718 {
719 printf("Hosed component: %s\n", &cfgPtr->devnames[0][hosed_column][0]);
720 if (!cfgPtr->force)
721 return;
722
723 /* we'll fail this component, as if there are
724 other major errors, we aren't forcing things
725 and we'll abort the config anyways */
726 if (again && raidPtr->Disks[hosed_column].status == rf_ds_failed)
727 return;
728
729 raidPtr->Disks[hosed_column].status = rf_ds_failed;
730 raidPtr->numFailures++;
731 raidPtr->status = rf_rs_degraded;
732 }
733
734 /*
735
736 rf_CheckLabels() - check all the component labels for consistency.
737 Return an error if there is anything major amiss.
738
739 */
740
741 int
742 rf_CheckLabels(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
743 {
744 int c;
745 char *dev_name;
746 RF_ComponentLabel_t *ci_label;
747 int serial_number = 0;
748 int mod_number = 0;
749 int fatal_error = 0;
750 int mod_values[4];
751 int mod_count[4];
752 int ser_values[4];
753 int ser_count[4];
754 int num_ser;
755 int num_mod;
756 int i;
757 int found;
758 int hosed_column;
759 int too_fatal;
760 int parity_good;
761
762 hosed_column = -1;
763 too_fatal = 0;
764
765 /*
766 We're going to try to be a little intelligent here. If one
767 component's label is bogus, and we can identify that it's the
768 *only* one that's gone, we'll mark it as "failed" and allow
769 the configuration to proceed. This will be the *only* case
770 that we'll proceed if there would be (otherwise) fatal errors.
771
772 Basically we simply keep a count of how many components had
773 what serial number. If all but one agree, we simply mark
774 the disagreeing component as being failed, and allow
775 things to come up "normally".
776
777 We do this first for serial numbers, and then for "mod_counter".
778
779 */
780
781 num_ser = 0;
782 num_mod = 0;
783
784 ser_values[0] = ser_values[1] = ser_values[2] = ser_values[3] = 0;
785 ser_count[0] = ser_count[1] = ser_count[2] = ser_count[3] = 0;
786 mod_values[0] = mod_values[1] = mod_values[2] = mod_values[3] = 0;
787 mod_count[0] = mod_count[1] = mod_count[2] = mod_count[3] = 0;
788
789 for (c = 0; c < raidPtr->numCol; c++) {
790 if (raidPtr->Disks[c].status != rf_ds_optimal)
791 continue;
792 ci_label = raidget_component_label(raidPtr, c);
793 found=0;
794 for(i=0;i<num_ser;i++) {
795 if (ser_values[i] == ci_label->serial_number) {
796 ser_count[i]++;
797 found=1;
798 break;
799 }
800 }
801 if (!found) {
802 ser_values[num_ser] = ci_label->serial_number;
803 ser_count[num_ser] = 1;
804 num_ser++;
805 if (num_ser>2) {
806 fatal_error = 1;
807 break;
808 }
809 }
810 found=0;
811 for(i=0;i<num_mod;i++) {
812 if (mod_values[i] == ci_label->mod_counter) {
813 mod_count[i]++;
814 found=1;
815 break;
816 }
817 }
818 if (!found) {
819 mod_values[num_mod] = ci_label->mod_counter;
820 mod_count[num_mod] = 1;
821 num_mod++;
822 if (num_mod>2) {
823 fatal_error = 1;
824 break;
825 }
826 }
827 }
828 #if DEBUG
829 printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
830 for(i=0;i<num_ser;i++) {
831 printf("%d %d\n", ser_values[i], ser_count[i]);
832 }
833 printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
834 for(i=0;i<num_mod;i++) {
835 printf("%d %d\n", mod_values[i], mod_count[i]);
836 }
837 #endif
838 serial_number = ser_values[0];
839 if (num_ser == 2) {
840 if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
841 /* Locate the maverick component */
842 if (ser_count[1] > ser_count[0]) {
843 serial_number = ser_values[1];
844 }
845
846 for (c = 0; c < raidPtr->numCol; c++) {
847 if (raidPtr->Disks[c].status != rf_ds_optimal)
848 continue;
849 ci_label = raidget_component_label(raidPtr, c);
850 if (serial_number != ci_label->serial_number) {
851 hosed_column = c;
852 break;
853 }
854 }
855 if (hosed_column != -1)
856 rf_handle_hosed(raidPtr, cfgPtr, hosed_column,
857 0);
858 } else {
859 too_fatal = 1;
860 }
861 if (cfgPtr->parityConfig == '0') {
862 /* We've identified two different serial numbers.
863 RAID 0 can't cope with that, so we'll punt */
864 too_fatal = 1;
865 }
866
867 }
868
869 /* record the serial number for later. If we bail later, setting
870 this doesn't matter, otherwise we've got the best guess at the
871 correct serial number */
872 raidPtr->serial_number = serial_number;
873
874 mod_number = mod_values[0];
875 if (num_mod == 2) {
876 if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
877 /* Locate the maverick component */
878 if (mod_count[1] > mod_count[0]) {
879 mod_number = mod_values[1];
880 } else if (mod_count[1] < mod_count[0]) {
881 mod_number = mod_values[0];
882 } else {
883 /* counts of different modification values
884 are the same. Assume greater value is
885 the correct one, all other things
886 considered */
887 if (mod_values[0] > mod_values[1]) {
888 mod_number = mod_values[0];
889 } else {
890 mod_number = mod_values[1];
891 }
892
893 }
894
895 for (c = 0; c < raidPtr->numCol; c++) {
896 if (raidPtr->Disks[c].status != rf_ds_optimal)
897 continue;
898
899 ci_label = raidget_component_label(raidPtr, c);
900 if (mod_number != ci_label->mod_counter) {
901 if (hosed_column == c) {
902 /* same one. Can
903 deal with it. */
904 } else {
905 hosed_column = c;
906 if (num_ser != 1) {
907 too_fatal = 1;
908 break;
909 }
910 }
911 }
912 }
913 if (hosed_column != -1)
914 rf_handle_hosed(raidPtr, cfgPtr, hosed_column,
915 1);
916 } else {
917 too_fatal = 1;
918 }
919 if (cfgPtr->parityConfig == '0') {
920 /* We've identified two different mod counters.
921 RAID 0 can't cope with that, so we'll punt */
922 too_fatal = 1;
923 }
924 }
925
926 raidPtr->mod_counter = mod_number;
927
928 if (too_fatal) {
929 /* we've had both a serial number mismatch, and a mod_counter
930 mismatch -- and they involved two different components!!
931 Bail -- make things fail so that the user must force
932 the issue... */
933 hosed_column = -1;
934 fatal_error = 1;
935 }
936
937 if (num_ser > 2) {
938 printf("raid%d: Too many different serial numbers!\n",
939 raidPtr->raidid);
940 fatal_error = 1;
941 }
942
943 if (num_mod > 2) {
944 printf("raid%d: Too many different mod counters!\n",
945 raidPtr->raidid);
946 fatal_error = 1;
947 }
948
949 for (c = 0; c < raidPtr->numCol; c++) {
950 if (raidPtr->Disks[c].status != rf_ds_optimal) {
951 hosed_column = c;
952 break;
953 }
954 }
955
956 /* we start by assuming the parity will be good, and flee from
957 that notion at the slightest sign of trouble */
958
959 parity_good = RF_RAID_CLEAN;
960
961 for (c = 0; c < raidPtr->numCol; c++) {
962 dev_name = &cfgPtr->devnames[0][c][0];
963 ci_label = raidget_component_label(raidPtr, c);
964
965 if (c == hosed_column) {
966 printf("raid%d: Ignoring %s\n",
967 raidPtr->raidid, dev_name);
968 } else {
969 rf_print_label_status( raidPtr, c, dev_name, ci_label);
970 if (rf_check_label_vitals( raidPtr, 0, c,
971 dev_name, ci_label,
972 serial_number,
973 mod_number )) {
974 fatal_error = 1;
975 }
976 if (ci_label->clean != RF_RAID_CLEAN) {
977 parity_good = RF_RAID_DIRTY;
978 }
979 }
980 }
981
982 if (fatal_error) {
983 parity_good = RF_RAID_DIRTY;
984 }
985
986 /* we note the state of the parity */
987 raidPtr->parity_good = parity_good;
988
989 return(fatal_error);
990 }
991
992 int
993 rf_add_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
994 {
995 RF_RaidDisk_t *disks;
996 RF_DiskQueue_t *spareQueues;
997 int ret;
998 unsigned int bs;
999 int spare_number;
1000
1001 ret=0;
1002
1003 if (raidPtr->numSpare >= RF_MAXSPARE) {
1004 RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
1005 return(EINVAL);
1006 }
1007
1008 rf_lock_mutex2(raidPtr->mutex);
1009 while (raidPtr->adding_hot_spare == 1) {
1010 rf_wait_cond2(raidPtr->adding_hot_spare_cv, raidPtr->mutex);
1011 }
1012 raidPtr->adding_hot_spare = 1;
1013 rf_unlock_mutex2(raidPtr->mutex);
1014
1015 /* the beginning of the spares... */
1016 disks = &raidPtr->Disks[raidPtr->numCol];
1017
1018 spare_number = raidPtr->numSpare;
1019
1020 ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
1021 &disks[spare_number],
1022 raidPtr->numCol + spare_number);
1023
1024 if (ret)
1025 goto fail;
1026 if (disks[spare_number].status != rf_ds_optimal) {
1027 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
1028 sparePtr->component_name);
1029 rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
1030 ret=EINVAL;
1031 goto fail;
1032 } else {
1033 disks[spare_number].status = rf_ds_spare;
1034 DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n",
1035 spare_number,
1036 disks[spare_number].devname,
1037 disks[spare_number].numBlocks,
1038 disks[spare_number].blockSize,
1039 (long int) disks[spare_number].numBlocks *
1040 disks[spare_number].blockSize / 1024 / 1024);
1041 }
1042
1043
1044 /* check sizes and block sizes on the spare disk */
1045 bs = 1 << raidPtr->logBytesPerSector;
1046 if (disks[spare_number].blockSize != bs) {
1047 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
1048 rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
1049 ret = EINVAL;
1050 goto fail;
1051 }
1052 if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
1053 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n",
1054 disks[spare_number].devname,
1055 disks[spare_number].blockSize,
1056 raidPtr->sectorsPerDisk);
1057 rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
1058 ret = EINVAL;
1059 goto fail;
1060 } else {
1061 if (disks[spare_number].numBlocks >
1062 raidPtr->sectorsPerDisk) {
1063 RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n",
1064 disks[spare_number].devname,
1065 raidPtr->sectorsPerDisk,
1066 disks[spare_number].numBlocks);
1067
1068 disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
1069 }
1070 }
1071
1072 spareQueues = &raidPtr->Queues[raidPtr->numCol];
1073 ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
1074 raidPtr->numCol + spare_number,
1075 raidPtr->qType,
1076 raidPtr->sectorsPerDisk,
1077 raidPtr->Disks[raidPtr->numCol +
1078 spare_number].dev,
1079 raidPtr->maxOutstanding,
1080 &raidPtr->shutdownList,
1081 raidPtr->cleanupList);
1082
1083 rf_lock_mutex2(raidPtr->mutex);
1084 raidPtr->numSpare++;
1085 rf_unlock_mutex2(raidPtr->mutex);
1086
1087 fail:
1088 rf_lock_mutex2(raidPtr->mutex);
1089 raidPtr->adding_hot_spare = 0;
1090 rf_signal_cond2(raidPtr->adding_hot_spare_cv);
1091 rf_unlock_mutex2(raidPtr->mutex);
1092
1093 return(ret);
1094 }
1095
1096 int
1097 rf_remove_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
1098 {
1099 #if 0
1100 int spare_number;
1101 #endif
1102
1103 if (raidPtr->numSpare==0) {
1104 printf("No spares to remove!\n");
1105 return(EINVAL);
1106 }
1107
1108 return(EINVAL); /* XXX not implemented yet */
1109 #if 0
1110 spare_number = sparePtr->column;
1111
1112 if (spare_number < 0 || spare_number > raidPtr->numSpare) {
1113 return(EINVAL);
1114 }
1115
1116 /* verify that this spare isn't in use... */
1117
1118
1119
1120
1121 /* it's gone.. */
1122
1123 raidPtr->numSpare--;
1124
1125 return(0);
1126 #endif
1127 }
1128
1129
1130 int
1131 rf_delete_component(RF_Raid_t *raidPtr, RF_SingleComponent_t *component)
1132 {
1133 #if 0
1134 RF_RaidDisk_t *disks;
1135 #endif
1136
1137 if ((component->column < 0) ||
1138 (component->column >= raidPtr->numCol)) {
1139 return(EINVAL);
1140 }
1141
1142 #if 0
1143 disks = &raidPtr->Disks[component->column];
1144 #endif
1145
1146 /* 1. This component must be marked as 'failed' */
1147
1148 return(EINVAL); /* Not implemented yet. */
1149 }
1150
1151 int
1152 rf_incorporate_hot_spare(RF_Raid_t *raidPtr,
1153 RF_SingleComponent_t *component)
1154 {
1155
1156 /* Issues here include how to 'move' this in if there is IO
1157 taking place (e.g. component queues and such) */
1158
1159 return(EINVAL); /* Not implemented yet. */
1160 }
1161