rf_disks.c revision 1.70.10.1.4.1 1 /* $NetBSD: rf_disks.c,v 1.70.10.1.4.1 2010/04/21 00:27:51 matt Exp $ */
2 /*-
3 * Copyright (c) 1999 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 /*
32 * Copyright (c) 1995 Carnegie-Mellon University.
33 * All rights reserved.
34 *
35 * Author: Mark Holland
36 *
37 * Permission to use, copy, modify and distribute this software and
38 * its documentation is hereby granted, provided that both the copyright
39 * notice and this permission notice appear in all copies of the
40 * software, derivative works or modified versions, and any portions
41 * thereof, and that both notices appear in supporting documentation.
42 *
43 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
44 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
45 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 *
47 * Carnegie Mellon requests users of this software to return to
48 *
49 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
50 * School of Computer Science
51 * Carnegie Mellon University
52 * Pittsburgh PA 15213-3890
53 *
54 * any improvements or extensions that they make and grant Carnegie the
55 * rights to redistribute these changes.
56 */
57
58 /***************************************************************
59 * rf_disks.c -- code to perform operations on the actual disks
60 ***************************************************************/
61
62 #include <sys/cdefs.h>
63 __KERNEL_RCSID(0, "$NetBSD: rf_disks.c,v 1.70.10.1.4.1 2010/04/21 00:27:51 matt Exp $");
64
65 #include <dev/raidframe/raidframevar.h>
66
67 #include "rf_raid.h"
68 #include "rf_alloclist.h"
69 #include "rf_utils.h"
70 #include "rf_general.h"
71 #include "rf_options.h"
72 #include "rf_kintf.h"
73 #include "rf_netbsd.h"
74
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/proc.h>
78 #include <sys/ioctl.h>
79 #include <sys/fcntl.h>
80 #include <sys/vnode.h>
81 #include <sys/kauth.h>
82
83 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
84 static void rf_print_label_status( RF_Raid_t *, int, char *,
85 RF_ComponentLabel_t *);
86 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
87 RF_ComponentLabel_t *, int, int );
88
89 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
90 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
91
92 /**************************************************************************
93 *
94 * initialize the disks comprising the array
95 *
96 * We want the spare disks to have regular row,col numbers so that we can
97 * easily substitue a spare for a failed disk. But, the driver code assumes
98 * throughout that the array contains numRow by numCol _non-spare_ disks, so
99 * it's not clear how to fit in the spares. This is an unfortunate holdover
100 * from raidSim. The quick and dirty fix is to make row zero bigger than the
101 * rest, and put all the spares in it. This probably needs to get changed
102 * eventually.
103 *
104 **************************************************************************/
105
106 int
107 rf_ConfigureDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
108 RF_Config_t *cfgPtr)
109 {
110 RF_RaidDisk_t *disks;
111 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
112 RF_RowCol_t c;
113 int bs, ret;
114 unsigned i, count, foundone = 0, numFailuresThisRow;
115 int force;
116
117 force = cfgPtr->force;
118
119 ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
120 if (ret)
121 goto fail;
122
123 disks = raidPtr->Disks;
124
125 numFailuresThisRow = 0;
126 for (c = 0; c < raidPtr->numCol; c++) {
127 ret = rf_ConfigureDisk(raidPtr,
128 &cfgPtr->devnames[0][c][0],
129 &disks[c], c);
130
131 if (ret)
132 goto fail;
133
134 if (disks[c].status == rf_ds_optimal) {
135 raidfetch_component_label(raidPtr, c);
136 }
137
138 if (disks[c].status != rf_ds_optimal) {
139 numFailuresThisRow++;
140 } else {
141 if (disks[c].numBlocks < min_numblks)
142 min_numblks = disks[c].numBlocks;
143 DPRINTF6("Disk at col %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n",
144 c, disks[c].devname,
145 disks[c].numBlocks,
146 disks[c].blockSize,
147 (long int) disks[c].numBlocks *
148 disks[c].blockSize / 1024 / 1024);
149 }
150 }
151 /* XXX fix for n-fault tolerant */
152 /* XXX this should probably check to see how many failures
153 we can handle for this configuration! */
154 if (numFailuresThisRow > 0)
155 raidPtr->status = rf_rs_degraded;
156
157 /* all disks must be the same size & have the same block size, bs must
158 * be a power of 2 */
159 bs = 0;
160 foundone = 0;
161 for (c = 0; c < raidPtr->numCol; c++) {
162 if (disks[c].status == rf_ds_optimal) {
163 bs = disks[c].blockSize;
164 foundone = 1;
165 break;
166 }
167 }
168 if (!foundone) {
169 RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
170 ret = EINVAL;
171 goto fail;
172 }
173 for (count = 0, i = 1; i; i <<= 1)
174 if (bs & i)
175 count++;
176 if (count != 1) {
177 RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
178 ret = EINVAL;
179 goto fail;
180 }
181
182 if (rf_CheckLabels( raidPtr, cfgPtr )) {
183 printf("raid%d: There were fatal errors\n", raidPtr->raidid);
184 if (force != 0) {
185 printf("raid%d: Fatal errors being ignored.\n",
186 raidPtr->raidid);
187 } else {
188 ret = EINVAL;
189 goto fail;
190 }
191 }
192
193 for (c = 0; c < raidPtr->numCol; c++) {
194 if (disks[c].status == rf_ds_optimal) {
195 if (disks[c].blockSize != bs) {
196 RF_ERRORMSG1("Error: block size of disk at c %d different from disk at c 0\n", c);
197 ret = EINVAL;
198 goto fail;
199 }
200 if (disks[c].numBlocks != min_numblks) {
201 RF_ERRORMSG2("WARNING: truncating disk at c %d to %d blocks\n",
202 c, (int) min_numblks);
203 disks[c].numBlocks = min_numblks;
204 }
205 }
206 }
207
208 raidPtr->sectorsPerDisk = min_numblks;
209 raidPtr->logBytesPerSector = ffs(bs) - 1;
210 raidPtr->bytesPerSector = bs;
211 raidPtr->sectorMask = bs - 1;
212 return (0);
213
214 fail:
215
216 rf_UnconfigureVnodes( raidPtr );
217
218 return (ret);
219 }
220
221
222 /****************************************************************************
223 * set up the data structures describing the spare disks in the array
224 * recall from the above comment that the spare disk descriptors are stored
225 * in row zero, which is specially expanded to hold them.
226 ****************************************************************************/
227 int
228 rf_ConfigureSpareDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
229 RF_Config_t *cfgPtr)
230 {
231 int i, ret;
232 unsigned int bs;
233 RF_RaidDisk_t *disks;
234 int num_spares_done;
235
236 num_spares_done = 0;
237
238 /* The space for the spares should have already been allocated by
239 * ConfigureDisks() */
240
241 disks = &raidPtr->Disks[raidPtr->numCol];
242 for (i = 0; i < raidPtr->numSpare; i++) {
243 ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
244 &disks[i], raidPtr->numCol + i);
245 if (ret)
246 goto fail;
247 if (disks[i].status != rf_ds_optimal) {
248 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
249 &cfgPtr->spare_names[i][0]);
250 } else {
251 disks[i].status = rf_ds_spare; /* change status to
252 * spare */
253 DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n", i,
254 disks[i].devname,
255 disks[i].numBlocks, disks[i].blockSize,
256 (long int) disks[i].numBlocks *
257 disks[i].blockSize / 1024 / 1024);
258 }
259 num_spares_done++;
260 }
261
262 /* check sizes and block sizes on spare disks */
263 bs = 1 << raidPtr->logBytesPerSector;
264 for (i = 0; i < raidPtr->numSpare; i++) {
265 if (disks[i].blockSize != bs) {
266 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
267 ret = EINVAL;
268 goto fail;
269 }
270 if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
271 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n",
272 disks[i].devname, disks[i].blockSize,
273 raidPtr->sectorsPerDisk);
274 ret = EINVAL;
275 goto fail;
276 } else
277 if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
278 RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n",
279 disks[i].devname,
280 raidPtr->sectorsPerDisk,
281 disks[i].numBlocks);
282
283 disks[i].numBlocks = raidPtr->sectorsPerDisk;
284 }
285 }
286
287 return (0);
288
289 fail:
290
291 /* Release the hold on the main components. We've failed to allocate
292 * a spare, and since we're failing, we need to free things..
293
294 XXX failing to allocate a spare is *not* that big of a deal...
295 We *can* survive without it, if need be, esp. if we get hot
296 adding working.
297
298 If we don't fail out here, then we need a way to remove this spare...
299 that should be easier to do here than if we are "live"...
300
301 */
302
303 rf_UnconfigureVnodes( raidPtr );
304
305 return (ret);
306 }
307
308 static int
309 rf_AllocDiskStructures(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
310 {
311 int ret;
312
313 /* We allocate RF_MAXSPARE on the first row so that we
314 have room to do hot-swapping of spares */
315 RF_MallocAndAdd(raidPtr->Disks, (raidPtr->numCol + RF_MAXSPARE) *
316 sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
317 raidPtr->cleanupList);
318 if (raidPtr->Disks == NULL) {
319 ret = ENOMEM;
320 goto fail;
321 }
322
323 /* get space for device specific stuff.. */
324 RF_MallocAndAdd(raidPtr->raid_cinfo,
325 (raidPtr->numCol + RF_MAXSPARE) *
326 sizeof(struct raidcinfo), (struct raidcinfo *),
327 raidPtr->cleanupList);
328
329 if (raidPtr->raid_cinfo == NULL) {
330 ret = ENOMEM;
331 goto fail;
332 }
333
334 return(0);
335 fail:
336 rf_UnconfigureVnodes( raidPtr );
337
338 return(ret);
339 }
340
341
342 /* configure a single disk during auto-configuration at boot */
343 int
344 rf_AutoConfigureDisks(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr,
345 RF_AutoConfig_t *auto_config)
346 {
347 RF_RaidDisk_t *disks;
348 RF_RaidDisk_t *diskPtr;
349 RF_RowCol_t c;
350 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
351 int bs, ret;
352 int numFailuresThisRow;
353 RF_AutoConfig_t *ac;
354 int parity_good;
355 int mod_counter;
356 int mod_counter_found;
357
358 #if DEBUG
359 printf("Starting autoconfiguration of RAID set...\n");
360 #endif
361
362 ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
363 if (ret)
364 goto fail;
365
366 disks = raidPtr->Disks;
367
368 /* assume the parity will be fine.. */
369 parity_good = RF_RAID_CLEAN;
370
371 /* Check for mod_counters that are too low */
372 mod_counter_found = 0;
373 mod_counter = 0;
374 ac = auto_config;
375 while(ac!=NULL) {
376 if (mod_counter_found==0) {
377 mod_counter = ac->clabel->mod_counter;
378 mod_counter_found = 1;
379 } else {
380 if (ac->clabel->mod_counter > mod_counter) {
381 mod_counter = ac->clabel->mod_counter;
382 }
383 }
384 ac->flag = 0; /* clear the general purpose flag */
385 ac = ac->next;
386 }
387
388 bs = 0;
389
390 numFailuresThisRow = 0;
391 for (c = 0; c < raidPtr->numCol; c++) {
392 diskPtr = &disks[c];
393
394 /* find this row/col in the autoconfig */
395 #if DEBUG
396 printf("Looking for %d in autoconfig\n",c);
397 #endif
398 ac = auto_config;
399 while(ac!=NULL) {
400 if (ac->clabel==NULL) {
401 /* big-time bad news. */
402 goto fail;
403 }
404 if ((ac->clabel->column == c) &&
405 (ac->clabel->mod_counter == mod_counter)) {
406 /* it's this one... */
407 /* flag it as 'used', so we don't
408 free it later. */
409 ac->flag = 1;
410 #if DEBUG
411 printf("Found: %s at %d\n",
412 ac->devname,c);
413 #endif
414
415 break;
416 }
417 ac=ac->next;
418 }
419
420 if (ac==NULL) {
421 /* we didn't find an exact match with a
422 correct mod_counter above... can we find
423 one with an incorrect mod_counter to use
424 instead? (this one, if we find it, will be
425 marked as failed once the set configures)
426 */
427
428 ac = auto_config;
429 while(ac!=NULL) {
430 if (ac->clabel==NULL) {
431 /* big-time bad news. */
432 goto fail;
433 }
434 if (ac->clabel->column == c) {
435 /* it's this one...
436 flag it as 'used', so we
437 don't free it later. */
438 ac->flag = 1;
439 #if DEBUG
440 printf("Found(low mod_counter): %s at %d\n",
441 ac->devname,c);
442 #endif
443
444 break;
445 }
446 ac=ac->next;
447 }
448 }
449
450
451
452 if (ac!=NULL) {
453 /* Found it. Configure it.. */
454 diskPtr->blockSize = ac->clabel->blockSize;
455 diskPtr->numBlocks = ac->clabel->numBlocks;
456 /* Note: rf_protectedSectors is already
457 factored into numBlocks here */
458 raidPtr->raid_cinfo[c].ci_vp = ac->vp;
459 raidPtr->raid_cinfo[c].ci_dev = ac->dev;
460
461 memcpy(raidget_component_label(raidPtr, c),
462 ac->clabel, sizeof(*ac->clabel));
463 snprintf(diskPtr->devname, sizeof(diskPtr->devname),
464 "/dev/%s", ac->devname);
465
466 /* note the fact that this component was
467 autoconfigured. You'll need this info
468 later. Trust me :) */
469 diskPtr->auto_configured = 1;
470 diskPtr->dev = ac->dev;
471
472 /*
473 * we allow the user to specify that
474 * only a fraction of the disks should
475 * be used this is just for debug: it
476 * speeds up the parity scan
477 */
478
479 diskPtr->numBlocks = diskPtr->numBlocks *
480 rf_sizePercentage / 100;
481
482 /* XXX these will get set multiple times,
483 but since we're autoconfiguring, they'd
484 better be always the same each time!
485 If not, this is the least of your worries */
486
487 bs = diskPtr->blockSize;
488 min_numblks = diskPtr->numBlocks;
489
490 /* this gets done multiple times, but that's
491 fine -- the serial number will be the same
492 for all components, guaranteed */
493 raidPtr->serial_number = ac->clabel->serial_number;
494 /* check the last time the label was modified */
495
496 if (ac->clabel->mod_counter != mod_counter) {
497 /* Even though we've filled in all of
498 the above, we don't trust this
499 component since it's modification
500 counter is not in sync with the
501 rest, and we really consider it to
502 be failed. */
503 disks[c].status = rf_ds_failed;
504 numFailuresThisRow++;
505 } else {
506 if (ac->clabel->clean != RF_RAID_CLEAN) {
507 parity_good = RF_RAID_DIRTY;
508 }
509 }
510 } else {
511 /* Didn't find it at all!! Component must
512 really be dead */
513 disks[c].status = rf_ds_failed;
514 snprintf(disks[c].devname, sizeof(disks[c].devname),
515 "component%d", c);
516 numFailuresThisRow++;
517 }
518 }
519 /* XXX fix for n-fault tolerant */
520 /* XXX this should probably check to see how many failures
521 we can handle for this configuration! */
522 if (numFailuresThisRow > 0) {
523 raidPtr->status = rf_rs_degraded;
524 raidPtr->numFailures = numFailuresThisRow;
525 }
526
527 /* close the device for the ones that didn't get used */
528
529 ac = auto_config;
530 while(ac!=NULL) {
531 if (ac->flag == 0) {
532 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
533 VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
534 vput(ac->vp);
535 ac->vp = NULL;
536 #if DEBUG
537 printf("Released %s from auto-config set.\n",
538 ac->devname);
539 #endif
540 }
541 ac = ac->next;
542 }
543
544 raidPtr->mod_counter = mod_counter;
545
546 /* note the state of the parity, if any */
547 raidPtr->parity_good = parity_good;
548 raidPtr->sectorsPerDisk = min_numblks;
549 raidPtr->logBytesPerSector = ffs(bs) - 1;
550 raidPtr->bytesPerSector = bs;
551 raidPtr->sectorMask = bs - 1;
552 return (0);
553
554 fail:
555
556 rf_UnconfigureVnodes( raidPtr );
557
558 return (ret);
559
560 }
561
562 /* configure a single disk in the array */
563 int
564 rf_ConfigureDisk(RF_Raid_t *raidPtr, char *bf, RF_RaidDisk_t *diskPtr,
565 RF_RowCol_t col)
566 {
567 char *p;
568 struct vnode *vp;
569 struct vattr va;
570 int error;
571
572 p = rf_find_non_white(bf);
573 if (p[strlen(p) - 1] == '\n') {
574 /* strip off the newline */
575 p[strlen(p) - 1] = '\0';
576 }
577 (void) strcpy(diskPtr->devname, p);
578
579 /* Let's start by claiming the component is fine and well... */
580 diskPtr->status = rf_ds_optimal;
581
582 raidPtr->raid_cinfo[col].ci_vp = NULL;
583 raidPtr->raid_cinfo[col].ci_dev = 0;
584
585 if (!strcmp("absent", diskPtr->devname)) {
586 printf("Ignoring missing component at column %d\n", col);
587 sprintf(diskPtr->devname, "component%d", col);
588 diskPtr->status = rf_ds_failed;
589 return (0);
590 }
591
592 error = dk_lookup(diskPtr->devname, curlwp, &vp, UIO_SYSSPACE);
593 if (error) {
594 printf("dk_lookup on device: %s failed!\n", diskPtr->devname);
595 if (error == ENXIO) {
596 /* the component isn't there... must be dead :-( */
597 diskPtr->status = rf_ds_failed;
598 } else {
599 return (error);
600 }
601 }
602 if (diskPtr->status == rf_ds_optimal) {
603
604 if ((error = VOP_GETATTR(vp, &va, curlwp->l_cred)) != 0)
605 return (error);
606 if ((error = rf_getdisksize(vp, curlwp, diskPtr)) != 0)
607 return (error);
608
609 raidPtr->raid_cinfo[col].ci_vp = vp;
610 raidPtr->raid_cinfo[col].ci_dev = va.va_rdev;
611
612 /* This component was not automatically configured */
613 diskPtr->auto_configured = 0;
614 diskPtr->dev = va.va_rdev;
615
616 /* we allow the user to specify that only a fraction of the
617 * disks should be used this is just for debug: it speeds up
618 * the parity scan */
619 diskPtr->numBlocks = diskPtr->numBlocks *
620 rf_sizePercentage / 100;
621 }
622 return (0);
623 }
624
625 static void
626 rf_print_label_status(RF_Raid_t *raidPtr, int column, char *dev_name,
627 RF_ComponentLabel_t *ci_label)
628 {
629
630 printf("raid%d: Component %s being configured at col: %d\n",
631 raidPtr->raidid, dev_name, column );
632 printf(" Column: %d Num Columns: %d\n",
633 ci_label->column,
634 ci_label->num_columns);
635 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
636 ci_label->version, ci_label->serial_number,
637 ci_label->mod_counter);
638 printf(" Clean: %s Status: %d\n",
639 ci_label->clean ? "Yes" : "No", ci_label->status );
640 }
641
642 static int rf_check_label_vitals(RF_Raid_t *raidPtr, int row, int column,
643 char *dev_name, RF_ComponentLabel_t *ci_label,
644 int serial_number, int mod_counter)
645 {
646 int fatal_error = 0;
647
648 if (serial_number != ci_label->serial_number) {
649 printf("%s has a different serial number: %d %d\n",
650 dev_name, serial_number, ci_label->serial_number);
651 fatal_error = 1;
652 }
653 if (mod_counter != ci_label->mod_counter) {
654 printf("%s has a different modification count: %d %d\n",
655 dev_name, mod_counter, ci_label->mod_counter);
656 }
657
658 if (row != ci_label->row) {
659 printf("Row out of alignment for: %s\n", dev_name);
660 fatal_error = 1;
661 }
662 if (column != ci_label->column) {
663 printf("Column out of alignment for: %s\n", dev_name);
664 fatal_error = 1;
665 }
666 if (raidPtr->numCol != ci_label->num_columns) {
667 printf("Number of columns do not match for: %s\n", dev_name);
668 fatal_error = 1;
669 }
670 if (ci_label->clean == 0) {
671 /* it's not clean, but that's not fatal */
672 printf("%s is not clean!\n", dev_name);
673 }
674 return(fatal_error);
675 }
676
677
678 /*
679
680 rf_CheckLabels() - check all the component labels for consistency.
681 Return an error if there is anything major amiss.
682
683 */
684
685 int
686 rf_CheckLabels(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
687 {
688 int c;
689 char *dev_name;
690 RF_ComponentLabel_t *ci_label;
691 int serial_number = 0;
692 int mod_number = 0;
693 int fatal_error = 0;
694 int mod_values[4];
695 int mod_count[4];
696 int ser_values[4];
697 int ser_count[4];
698 int num_ser;
699 int num_mod;
700 int i;
701 int found;
702 int hosed_column;
703 int too_fatal;
704 int parity_good;
705 int force;
706
707 hosed_column = -1;
708 too_fatal = 0;
709 force = cfgPtr->force;
710
711 /*
712 We're going to try to be a little intelligent here. If one
713 component's label is bogus, and we can identify that it's the
714 *only* one that's gone, we'll mark it as "failed" and allow
715 the configuration to proceed. This will be the *only* case
716 that we'll proceed if there would be (otherwise) fatal errors.
717
718 Basically we simply keep a count of how many components had
719 what serial number. If all but one agree, we simply mark
720 the disagreeing component as being failed, and allow
721 things to come up "normally".
722
723 We do this first for serial numbers, and then for "mod_counter".
724
725 */
726
727 num_ser = 0;
728 num_mod = 0;
729
730 for (c = 0; c < raidPtr->numCol; c++) {
731 ci_label = raidget_component_label(raidPtr, c);
732 found=0;
733 for(i=0;i<num_ser;i++) {
734 if (ser_values[i] == ci_label->serial_number) {
735 ser_count[i]++;
736 found=1;
737 break;
738 }
739 }
740 if (!found) {
741 ser_values[num_ser] = ci_label->serial_number;
742 ser_count[num_ser] = 1;
743 num_ser++;
744 if (num_ser>2) {
745 fatal_error = 1;
746 break;
747 }
748 }
749 found=0;
750 for(i=0;i<num_mod;i++) {
751 if (mod_values[i] == ci_label->mod_counter) {
752 mod_count[i]++;
753 found=1;
754 break;
755 }
756 }
757 if (!found) {
758 mod_values[num_mod] = ci_label->mod_counter;
759 mod_count[num_mod] = 1;
760 num_mod++;
761 if (num_mod>2) {
762 fatal_error = 1;
763 break;
764 }
765 }
766 }
767 #if DEBUG
768 printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
769 for(i=0;i<num_ser;i++) {
770 printf("%d %d\n", ser_values[i], ser_count[i]);
771 }
772 printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
773 for(i=0;i<num_mod;i++) {
774 printf("%d %d\n", mod_values[i], mod_count[i]);
775 }
776 #endif
777 serial_number = ser_values[0];
778 if (num_ser == 2) {
779 if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
780 /* Locate the maverick component */
781 if (ser_count[1] > ser_count[0]) {
782 serial_number = ser_values[1];
783 }
784
785 for (c = 0; c < raidPtr->numCol; c++) {
786 ci_label = raidget_component_label(raidPtr, c);
787 if (serial_number != ci_label->serial_number) {
788 hosed_column = c;
789 break;
790 }
791 }
792 printf("Hosed component: %s\n",
793 &cfgPtr->devnames[0][hosed_column][0]);
794 if (!force) {
795 /* we'll fail this component, as if there are
796 other major errors, we arn't forcing things
797 and we'll abort the config anyways */
798 raidPtr->Disks[hosed_column].status
799 = rf_ds_failed;
800 raidPtr->numFailures++;
801 raidPtr->status = rf_rs_degraded;
802 }
803 } else {
804 too_fatal = 1;
805 }
806 if (cfgPtr->parityConfig == '0') {
807 /* We've identified two different serial numbers.
808 RAID 0 can't cope with that, so we'll punt */
809 too_fatal = 1;
810 }
811
812 }
813
814 /* record the serial number for later. If we bail later, setting
815 this doesn't matter, otherwise we've got the best guess at the
816 correct serial number */
817 raidPtr->serial_number = serial_number;
818
819 mod_number = mod_values[0];
820 if (num_mod == 2) {
821 if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
822 /* Locate the maverick component */
823 if (mod_count[1] > mod_count[0]) {
824 mod_number = mod_values[1];
825 } else if (mod_count[1] < mod_count[0]) {
826 mod_number = mod_values[0];
827 } else {
828 /* counts of different modification values
829 are the same. Assume greater value is
830 the correct one, all other things
831 considered */
832 if (mod_values[0] > mod_values[1]) {
833 mod_number = mod_values[0];
834 } else {
835 mod_number = mod_values[1];
836 }
837
838 }
839
840 for (c = 0; c < raidPtr->numCol; c++) {
841 ci_label = raidget_component_label(raidPtr, c);
842 if (mod_number != ci_label->mod_counter) {
843 if (hosed_column == c) {
844 /* same one. Can
845 deal with it. */
846 } else {
847 hosed_column = c;
848 if (num_ser != 1) {
849 too_fatal = 1;
850 break;
851 }
852 }
853 }
854 }
855 printf("Hosed component: %s\n",
856 &cfgPtr->devnames[0][hosed_column][0]);
857 if (!force) {
858 /* we'll fail this component, as if there are
859 other major errors, we arn't forcing things
860 and we'll abort the config anyways */
861 if (raidPtr->Disks[hosed_column].status != rf_ds_failed) {
862 raidPtr->Disks[hosed_column].status
863 = rf_ds_failed;
864 raidPtr->numFailures++;
865 raidPtr->status = rf_rs_degraded;
866 }
867 }
868 } else {
869 too_fatal = 1;
870 }
871 if (cfgPtr->parityConfig == '0') {
872 /* We've identified two different mod counters.
873 RAID 0 can't cope with that, so we'll punt */
874 too_fatal = 1;
875 }
876 }
877
878 raidPtr->mod_counter = mod_number;
879
880 if (too_fatal) {
881 /* we've had both a serial number mismatch, and a mod_counter
882 mismatch -- and they involved two different components!!
883 Bail -- make things fail so that the user must force
884 the issue... */
885 hosed_column = -1;
886 fatal_error = 1;
887 }
888
889 if (num_ser > 2) {
890 printf("raid%d: Too many different serial numbers!\n",
891 raidPtr->raidid);
892 fatal_error = 1;
893 }
894
895 if (num_mod > 2) {
896 printf("raid%d: Too many different mod counters!\n",
897 raidPtr->raidid);
898 fatal_error = 1;
899 }
900
901 /* we start by assuming the parity will be good, and flee from
902 that notion at the slightest sign of trouble */
903
904 parity_good = RF_RAID_CLEAN;
905
906 for (c = 0; c < raidPtr->numCol; c++) {
907 dev_name = &cfgPtr->devnames[0][c][0];
908 ci_label = raidget_component_label(raidPtr, c);
909
910 if (c == hosed_column) {
911 printf("raid%d: Ignoring %s\n",
912 raidPtr->raidid, dev_name);
913 } else {
914 rf_print_label_status( raidPtr, c, dev_name, ci_label);
915 if (rf_check_label_vitals( raidPtr, 0, c,
916 dev_name, ci_label,
917 serial_number,
918 mod_number )) {
919 fatal_error = 1;
920 }
921 if (ci_label->clean != RF_RAID_CLEAN) {
922 parity_good = RF_RAID_DIRTY;
923 }
924 }
925 }
926
927 if (fatal_error) {
928 parity_good = RF_RAID_DIRTY;
929 }
930
931 /* we note the state of the parity */
932 raidPtr->parity_good = parity_good;
933
934 return(fatal_error);
935 }
936
937 int
938 rf_add_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
939 {
940 RF_RaidDisk_t *disks;
941 RF_DiskQueue_t *spareQueues;
942 int ret;
943 unsigned int bs;
944 int spare_number;
945
946 ret=0;
947
948 if (raidPtr->numSpare >= RF_MAXSPARE) {
949 RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
950 return(EINVAL);
951 }
952
953 RF_LOCK_MUTEX(raidPtr->mutex);
954 while (raidPtr->adding_hot_spare==1) {
955 ltsleep(&(raidPtr->adding_hot_spare), PRIBIO, "raidhs", 0,
956 &(raidPtr->mutex));
957 }
958 raidPtr->adding_hot_spare=1;
959 RF_UNLOCK_MUTEX(raidPtr->mutex);
960
961 /* the beginning of the spares... */
962 disks = &raidPtr->Disks[raidPtr->numCol];
963
964 spare_number = raidPtr->numSpare;
965
966 ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
967 &disks[spare_number],
968 raidPtr->numCol + spare_number);
969
970 if (ret)
971 goto fail;
972 if (disks[spare_number].status != rf_ds_optimal) {
973 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
974 sparePtr->component_name);
975 rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
976 ret=EINVAL;
977 goto fail;
978 } else {
979 disks[spare_number].status = rf_ds_spare;
980 DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n",
981 spare_number,
982 disks[spare_number].devname,
983 disks[spare_number].numBlocks,
984 disks[spare_number].blockSize,
985 (long int) disks[spare_number].numBlocks *
986 disks[spare_number].blockSize / 1024 / 1024);
987 }
988
989
990 /* check sizes and block sizes on the spare disk */
991 bs = 1 << raidPtr->logBytesPerSector;
992 if (disks[spare_number].blockSize != bs) {
993 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
994 rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
995 ret = EINVAL;
996 goto fail;
997 }
998 if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
999 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n",
1000 disks[spare_number].devname,
1001 disks[spare_number].blockSize,
1002 raidPtr->sectorsPerDisk);
1003 rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
1004 ret = EINVAL;
1005 goto fail;
1006 } else {
1007 if (disks[spare_number].numBlocks >
1008 raidPtr->sectorsPerDisk) {
1009 RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n",
1010 disks[spare_number].devname,
1011 raidPtr->sectorsPerDisk,
1012 disks[spare_number].numBlocks);
1013
1014 disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
1015 }
1016 }
1017
1018 spareQueues = &raidPtr->Queues[raidPtr->numCol];
1019 ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
1020 raidPtr->numCol + spare_number,
1021 raidPtr->qType,
1022 raidPtr->sectorsPerDisk,
1023 raidPtr->Disks[raidPtr->numCol +
1024 spare_number].dev,
1025 raidPtr->maxOutstanding,
1026 &raidPtr->shutdownList,
1027 raidPtr->cleanupList);
1028
1029 RF_LOCK_MUTEX(raidPtr->mutex);
1030 raidPtr->numSpare++;
1031 RF_UNLOCK_MUTEX(raidPtr->mutex);
1032
1033 fail:
1034 RF_LOCK_MUTEX(raidPtr->mutex);
1035 raidPtr->adding_hot_spare=0;
1036 wakeup(&(raidPtr->adding_hot_spare));
1037 RF_UNLOCK_MUTEX(raidPtr->mutex);
1038
1039 return(ret);
1040 }
1041
1042 int
1043 rf_remove_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
1044 {
1045 int spare_number;
1046
1047
1048 if (raidPtr->numSpare==0) {
1049 printf("No spares to remove!\n");
1050 return(EINVAL);
1051 }
1052
1053 spare_number = sparePtr->column;
1054
1055 return(EINVAL); /* XXX not implemented yet */
1056 #if 0
1057 if (spare_number < 0 || spare_number > raidPtr->numSpare) {
1058 return(EINVAL);
1059 }
1060
1061 /* verify that this spare isn't in use... */
1062
1063
1064
1065
1066 /* it's gone.. */
1067
1068 raidPtr->numSpare--;
1069
1070 return(0);
1071 #endif
1072 }
1073
1074
1075 int
1076 rf_delete_component(RF_Raid_t *raidPtr, RF_SingleComponent_t *component)
1077 {
1078 RF_RaidDisk_t *disks;
1079
1080 if ((component->column < 0) ||
1081 (component->column >= raidPtr->numCol)) {
1082 return(EINVAL);
1083 }
1084
1085 disks = &raidPtr->Disks[component->column];
1086
1087 /* 1. This component must be marked as 'failed' */
1088
1089 return(EINVAL); /* Not implemented yet. */
1090 }
1091
1092 int
1093 rf_incorporate_hot_spare(RF_Raid_t *raidPtr,
1094 RF_SingleComponent_t *component)
1095 {
1096
1097 /* Issues here include how to 'move' this in if there is IO
1098 taking place (e.g. component queues and such) */
1099
1100 return(EINVAL); /* Not implemented yet. */
1101 }
1102