rf_disks.c revision 1.38.10.1 1 /* $NetBSD: rf_disks.c,v 1.38.10.1 2003/10/20 05:25:38 cyber Exp $ */
2 /*-
3 * Copyright (c) 1999 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1995 Carnegie-Mellon University.
40 * All rights reserved.
41 *
42 * Author: Mark Holland
43 *
44 * Permission to use, copy, modify and distribute this software and
45 * its documentation is hereby granted, provided that both the copyright
46 * notice and this permission notice appear in all copies of the
47 * software, derivative works or modified versions, and any portions
48 * thereof, and that both notices appear in supporting documentation.
49 *
50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53 *
54 * Carnegie Mellon requests users of this software to return to
55 *
56 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
57 * School of Computer Science
58 * Carnegie Mellon University
59 * Pittsburgh PA 15213-3890
60 *
61 * any improvements or extensions that they make and grant Carnegie the
62 * rights to redistribute these changes.
63 */
64
65 /***************************************************************
66 * rf_disks.c -- code to perform operations on the actual disks
67 ***************************************************************/
68
69 #include <sys/cdefs.h>
70 __KERNEL_RCSID(0, "$NetBSD: rf_disks.c,v 1.38.10.1 2003/10/20 05:25:38 cyber Exp $");
71
72 #include <dev/raidframe/raidframevar.h>
73
74 #include "rf_raid.h"
75 #include "rf_alloclist.h"
76 #include "rf_utils.h"
77 #include "rf_general.h"
78 #include "rf_options.h"
79 #include "rf_kintf.h"
80 #include "rf_netbsd.h"
81
82 #include <sys/param.h>
83 #include <sys/systm.h>
84 #include <sys/proc.h>
85 #include <sys/ioctl.h>
86 #include <sys/fcntl.h>
87 #include <sys/vnode.h>
88
89 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
90 static void rf_print_label_status( RF_Raid_t *, int, int, char *,
91 RF_ComponentLabel_t *);
92 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
93 RF_ComponentLabel_t *, int, int );
94
95 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
96 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
97
98 /**************************************************************************
99 *
100 * initialize the disks comprising the array
101 *
102 * We want the spare disks to have regular row,col numbers so that we can
103 * easily substitue a spare for a failed disk. But, the driver code assumes
104 * throughout that the array contains numRow by numCol _non-spare_ disks, so
105 * it's not clear how to fit in the spares. This is an unfortunate holdover
106 * from raidSim. The quick and dirty fix is to make row zero bigger than the
107 * rest, and put all the spares in it. This probably needs to get changed
108 * eventually.
109 *
110 **************************************************************************/
111
112 int
113 rf_ConfigureDisks( listp, raidPtr, cfgPtr )
114 RF_ShutdownList_t **listp;
115 RF_Raid_t *raidPtr;
116 RF_Config_t *cfgPtr;
117 {
118 RF_RaidDisk_t **disks;
119 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
120 RF_RowCol_t r, c;
121 int bs, ret;
122 unsigned i, count, foundone = 0, numFailuresThisRow;
123 int force;
124
125 force = cfgPtr->force;
126
127 ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
128 if (ret)
129 goto fail;
130
131 disks = raidPtr->Disks;
132
133 for (r = 0; r < raidPtr->numRow; r++) {
134 numFailuresThisRow = 0;
135 for (c = 0; c < raidPtr->numCol; c++) {
136 ret = rf_ConfigureDisk(raidPtr,
137 &cfgPtr->devnames[r][c][0],
138 &disks[r][c], r, c);
139
140 if (ret)
141 goto fail;
142
143 if (disks[r][c].status == rf_ds_optimal) {
144 raidread_component_label(
145 raidPtr->raid_cinfo[r][c].ci_dev,
146 raidPtr->raid_cinfo[r][c].ci_vp,
147 &raidPtr->raid_cinfo[r][c].ci_label);
148 }
149
150 if (disks[r][c].status != rf_ds_optimal) {
151 numFailuresThisRow++;
152 } else {
153 if (disks[r][c].numBlocks < min_numblks)
154 min_numblks = disks[r][c].numBlocks;
155 DPRINTF7("Disk at row %d col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n",
156 r, c, disks[r][c].devname,
157 (long int) disks[r][c].numBlocks,
158 disks[r][c].blockSize,
159 (long int) disks[r][c].numBlocks *
160 disks[r][c].blockSize / 1024 / 1024);
161 }
162 }
163 /* XXX fix for n-fault tolerant */
164 /* XXX this should probably check to see how many failures
165 we can handle for this configuration! */
166 if (numFailuresThisRow > 0)
167 raidPtr->status[r] = rf_rs_degraded;
168 }
169
170 /* all disks must be the same size & have the same block size, bs must
171 * be a power of 2 */
172 bs = 0;
173 for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) {
174 for (c = 0; !foundone && c < raidPtr->numCol; c++) {
175 if (disks[r][c].status == rf_ds_optimal) {
176 bs = disks[r][c].blockSize;
177 foundone = 1;
178 }
179 }
180 }
181 if (!foundone) {
182 RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
183 ret = EINVAL;
184 goto fail;
185 }
186 for (count = 0, i = 1; i; i <<= 1)
187 if (bs & i)
188 count++;
189 if (count != 1) {
190 RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
191 ret = EINVAL;
192 goto fail;
193 }
194
195 if (rf_CheckLabels( raidPtr, cfgPtr )) {
196 printf("raid%d: There were fatal errors\n", raidPtr->raidid);
197 if (force != 0) {
198 printf("raid%d: Fatal errors being ignored.\n",
199 raidPtr->raidid);
200 } else {
201 ret = EINVAL;
202 goto fail;
203 }
204 }
205
206 for (r = 0; r < raidPtr->numRow; r++) {
207 for (c = 0; c < raidPtr->numCol; c++) {
208 if (disks[r][c].status == rf_ds_optimal) {
209 if (disks[r][c].blockSize != bs) {
210 RF_ERRORMSG2("Error: block size of disk at r %d c %d different from disk at r 0 c 0\n", r, c);
211 ret = EINVAL;
212 goto fail;
213 }
214 if (disks[r][c].numBlocks != min_numblks) {
215 RF_ERRORMSG3("WARNING: truncating disk at r %d c %d to %d blocks\n",
216 r, c, (int) min_numblks);
217 disks[r][c].numBlocks = min_numblks;
218 }
219 }
220 }
221 }
222
223 raidPtr->sectorsPerDisk = min_numblks;
224 raidPtr->logBytesPerSector = ffs(bs) - 1;
225 raidPtr->bytesPerSector = bs;
226 raidPtr->sectorMask = bs - 1;
227 return (0);
228
229 fail:
230
231 rf_UnconfigureVnodes( raidPtr );
232
233 return (ret);
234 }
235
236
237 /****************************************************************************
238 * set up the data structures describing the spare disks in the array
239 * recall from the above comment that the spare disk descriptors are stored
240 * in row zero, which is specially expanded to hold them.
241 ****************************************************************************/
242 int
243 rf_ConfigureSpareDisks( listp, raidPtr, cfgPtr )
244 RF_ShutdownList_t ** listp;
245 RF_Raid_t * raidPtr;
246 RF_Config_t * cfgPtr;
247 {
248 int i, ret;
249 unsigned int bs;
250 RF_RaidDisk_t *disks;
251 int num_spares_done;
252
253 num_spares_done = 0;
254
255 /* The space for the spares should have already been allocated by
256 * ConfigureDisks() */
257
258 disks = &raidPtr->Disks[0][raidPtr->numCol];
259 for (i = 0; i < raidPtr->numSpare; i++) {
260 ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
261 &disks[i], 0, raidPtr->numCol + i);
262 if (ret)
263 goto fail;
264 if (disks[i].status != rf_ds_optimal) {
265 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
266 &cfgPtr->spare_names[i][0]);
267 } else {
268 disks[i].status = rf_ds_spare; /* change status to
269 * spare */
270 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", i,
271 disks[i].devname,
272 (long int) disks[i].numBlocks, disks[i].blockSize,
273 (long int) disks[i].numBlocks *
274 disks[i].blockSize / 1024 / 1024);
275 }
276 num_spares_done++;
277 }
278
279 /* check sizes and block sizes on spare disks */
280 bs = 1 << raidPtr->logBytesPerSector;
281 for (i = 0; i < raidPtr->numSpare; i++) {
282 if (disks[i].blockSize != bs) {
283 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
284 ret = EINVAL;
285 goto fail;
286 }
287 if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
288 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
289 disks[i].devname, disks[i].blockSize,
290 (long int) raidPtr->sectorsPerDisk);
291 ret = EINVAL;
292 goto fail;
293 } else
294 if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
295 RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[i].devname, (long int) raidPtr->sectorsPerDisk);
296
297 disks[i].numBlocks = raidPtr->sectorsPerDisk;
298 }
299 }
300
301 return (0);
302
303 fail:
304
305 /* Release the hold on the main components. We've failed to allocate
306 * a spare, and since we're failing, we need to free things..
307
308 XXX failing to allocate a spare is *not* that big of a deal...
309 We *can* survive without it, if need be, esp. if we get hot
310 adding working.
311
312 If we don't fail out here, then we need a way to remove this spare...
313 that should be easier to do here than if we are "live"...
314
315 */
316
317 rf_UnconfigureVnodes( raidPtr );
318
319 return (ret);
320 }
321
322 static int
323 rf_AllocDiskStructures(raidPtr, cfgPtr)
324 RF_Raid_t *raidPtr;
325 RF_Config_t *cfgPtr;
326 {
327 RF_RaidDisk_t **disks;
328 int ret;
329 int r;
330
331 RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *),
332 (RF_RaidDisk_t **), raidPtr->cleanupList);
333 if (disks == NULL) {
334 ret = ENOMEM;
335 goto fail;
336 }
337 raidPtr->Disks = disks;
338 /* get space for the device-specific stuff... */
339 RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
340 sizeof(struct raidcinfo *), (struct raidcinfo **),
341 raidPtr->cleanupList);
342 if (raidPtr->raid_cinfo == NULL) {
343 ret = ENOMEM;
344 goto fail;
345 }
346
347 for (r = 0; r < raidPtr->numRow; r++) {
348 /* We allocate RF_MAXSPARE on the first row so that we
349 have room to do hot-swapping of spares */
350 RF_CallocAndAdd(disks[r], raidPtr->numCol
351 + ((r == 0) ? RF_MAXSPARE : 0),
352 sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
353 raidPtr->cleanupList);
354 if (disks[r] == NULL) {
355 ret = ENOMEM;
356 goto fail;
357 }
358 /* get more space for device specific stuff.. */
359 RF_CallocAndAdd(raidPtr->raid_cinfo[r],
360 raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0),
361 sizeof(struct raidcinfo), (struct raidcinfo *),
362 raidPtr->cleanupList);
363 if (raidPtr->raid_cinfo[r] == NULL) {
364 ret = ENOMEM;
365 goto fail;
366 }
367 }
368 return(0);
369 fail:
370 rf_UnconfigureVnodes( raidPtr );
371
372 return(ret);
373 }
374
375
376 /* configure a single disk during auto-configuration at boot */
377 int
378 rf_AutoConfigureDisks(raidPtr, cfgPtr, auto_config)
379 RF_Raid_t *raidPtr;
380 RF_Config_t *cfgPtr;
381 RF_AutoConfig_t *auto_config;
382 {
383 RF_RaidDisk_t **disks;
384 RF_RaidDisk_t *diskPtr;
385 RF_RowCol_t r, c;
386 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
387 int bs, ret;
388 int numFailuresThisRow;
389 int force;
390 RF_AutoConfig_t *ac;
391 int parity_good;
392 int mod_counter;
393 int mod_counter_found;
394
395 #if DEBUG
396 printf("Starting autoconfiguration of RAID set...\n");
397 #endif
398 force = cfgPtr->force;
399
400 ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
401 if (ret)
402 goto fail;
403
404 disks = raidPtr->Disks;
405
406 /* assume the parity will be fine.. */
407 parity_good = RF_RAID_CLEAN;
408
409 /* Check for mod_counters that are too low */
410 mod_counter_found = 0;
411 mod_counter = 0;
412 ac = auto_config;
413 while(ac!=NULL) {
414 if (mod_counter_found==0) {
415 mod_counter = ac->clabel->mod_counter;
416 mod_counter_found = 1;
417 } else {
418 if (ac->clabel->mod_counter > mod_counter) {
419 mod_counter = ac->clabel->mod_counter;
420 }
421 }
422 ac->flag = 0; /* clear the general purpose flag */
423 ac = ac->next;
424 }
425
426 bs = 0;
427 for (r = 0; r < raidPtr->numRow; r++) {
428 numFailuresThisRow = 0;
429 for (c = 0; c < raidPtr->numCol; c++) {
430 diskPtr = &disks[r][c];
431
432 /* find this row/col in the autoconfig */
433 #if DEBUG
434 printf("Looking for %d,%d in autoconfig\n",r,c);
435 #endif
436 ac = auto_config;
437 while(ac!=NULL) {
438 if (ac->clabel==NULL) {
439 /* big-time bad news. */
440 goto fail;
441 }
442 if ((ac->clabel->row == r) &&
443 (ac->clabel->column == c) &&
444 (ac->clabel->mod_counter == mod_counter)) {
445 /* it's this one... */
446 /* flag it as 'used', so we don't
447 free it later. */
448 ac->flag = 1;
449 #if DEBUG
450 printf("Found: %s at %d,%d\n",
451 ac->devname,r,c);
452 #endif
453
454 break;
455 }
456 ac=ac->next;
457 }
458
459 if (ac==NULL) {
460 /* we didn't find an exact match with a
461 correct mod_counter above... can we
462 find one with an incorrect mod_counter
463 to use instead? (this one, if we find
464 it, will be marked as failed once the
465 set configures)
466 */
467
468 ac = auto_config;
469 while(ac!=NULL) {
470 if (ac->clabel==NULL) {
471 /* big-time bad news. */
472 goto fail;
473 }
474 if ((ac->clabel->row == r) &&
475 (ac->clabel->column == c)) {
476 /* it's this one...
477 flag it as 'used', so we
478 don't free it later. */
479 ac->flag = 1;
480 #if DEBUG
481 printf("Found(low mod_counter): %s at %d,%d\n",
482 ac->devname,r,c);
483 #endif
484
485 break;
486 }
487 ac=ac->next;
488 }
489 }
490
491
492
493 if (ac!=NULL) {
494 /* Found it. Configure it.. */
495 diskPtr->blockSize = ac->clabel->blockSize;
496 diskPtr->numBlocks = ac->clabel->numBlocks;
497 /* Note: rf_protectedSectors is already
498 factored into numBlocks here */
499 raidPtr->raid_cinfo[r][c].ci_vp = ac->vp;
500 raidPtr->raid_cinfo[r][c].ci_dev = ac->dev;
501
502 memcpy(&raidPtr->raid_cinfo[r][c].ci_label,
503 ac->clabel, sizeof(*ac->clabel));
504 sprintf(diskPtr->devname, "/dev/%s",
505 ac->devname);
506
507 /* note the fact that this component was
508 autoconfigured. You'll need this info
509 later. Trust me :) */
510 diskPtr->auto_configured = 1;
511 diskPtr->dev = ac->dev;
512
513 /*
514 * we allow the user to specify that
515 * only a fraction of the disks should
516 * be used this is just for debug: it
517 * speeds up the parity scan
518 */
519
520 diskPtr->numBlocks = diskPtr->numBlocks *
521 rf_sizePercentage / 100;
522
523 /* XXX these will get set multiple times,
524 but since we're autoconfiguring, they'd
525 better be always the same each time!
526 If not, this is the least of your worries */
527
528 bs = diskPtr->blockSize;
529 min_numblks = diskPtr->numBlocks;
530
531 /* this gets done multiple times, but that's
532 fine -- the serial number will be the same
533 for all components, guaranteed */
534 raidPtr->serial_number =
535 ac->clabel->serial_number;
536 /* check the last time the label
537 was modified */
538 if (ac->clabel->mod_counter !=
539 mod_counter) {
540 /* Even though we've filled in all
541 of the above, we don't trust
542 this component since it's
543 modification counter is not
544 in sync with the rest, and we really
545 consider it to be failed. */
546 disks[r][c].status = rf_ds_failed;
547 numFailuresThisRow++;
548 } else {
549 if (ac->clabel->clean !=
550 RF_RAID_CLEAN) {
551 parity_good = RF_RAID_DIRTY;
552 }
553 }
554 } else {
555 /* Didn't find it at all!!
556 Component must really be dead */
557 disks[r][c].status = rf_ds_failed;
558 sprintf(disks[r][c].devname,"component%d",
559 r * raidPtr->numCol + c);
560 numFailuresThisRow++;
561 }
562 }
563 /* XXX fix for n-fault tolerant */
564 /* XXX this should probably check to see how many failures
565 we can handle for this configuration! */
566 if (numFailuresThisRow > 0)
567 raidPtr->status[r] = rf_rs_degraded;
568 }
569
570 /* close the device for the ones that didn't get used */
571
572 ac = auto_config;
573 while(ac!=NULL) {
574 if (ac->flag == 0) {
575 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
576 VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED, 0);
577 vput(ac->vp);
578 ac->vp = NULL;
579 #if DEBUG
580 printf("Released %s from auto-config set.\n",
581 ac->devname);
582 #endif
583 }
584 ac = ac->next;
585 }
586
587 raidPtr->mod_counter = mod_counter;
588
589 /* note the state of the parity, if any */
590 raidPtr->parity_good = parity_good;
591 raidPtr->sectorsPerDisk = min_numblks;
592 raidPtr->logBytesPerSector = ffs(bs) - 1;
593 raidPtr->bytesPerSector = bs;
594 raidPtr->sectorMask = bs - 1;
595 return (0);
596
597 fail:
598
599 rf_UnconfigureVnodes( raidPtr );
600
601 return (ret);
602
603 }
604
605 /* configure a single disk in the array */
606 int
607 rf_ConfigureDisk(raidPtr, buf, diskPtr, row, col)
608 RF_Raid_t *raidPtr;
609 char *buf;
610 RF_RaidDisk_t *diskPtr;
611 RF_RowCol_t row;
612 RF_RowCol_t col;
613 {
614 char *p;
615 int retcode;
616
617 struct partinfo dpart;
618 struct vnode *vp;
619 struct vattr va;
620 struct proc *proc;
621 int error;
622
623 retcode = 0;
624 p = rf_find_non_white(buf);
625 if (p[strlen(p) - 1] == '\n') {
626 /* strip off the newline */
627 p[strlen(p) - 1] = '\0';
628 }
629 (void) strcpy(diskPtr->devname, p);
630
631 proc = raidPtr->engine_thread;
632
633 /* Let's start by claiming the component is fine and well... */
634 diskPtr->status = rf_ds_optimal;
635
636 raidPtr->raid_cinfo[row][col].ci_vp = NULL;
637 raidPtr->raid_cinfo[row][col].ci_dev = NULL;
638
639 error = raidlookup(diskPtr->devname, proc, &vp);
640 if (error) {
641 printf("raidlookup on device: %s failed!\n", diskPtr->devname);
642 if (error == ENXIO) {
643 /* the component isn't there... must be dead :-( */
644 diskPtr->status = rf_ds_failed;
645 } else {
646 return (error);
647 }
648 }
649 if (diskPtr->status == rf_ds_optimal) {
650
651 if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
652 return (error);
653 }
654 error = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart,
655 FREAD, proc->p_ucred, proc);
656 if (error) {
657 return (error);
658 }
659
660 diskPtr->blockSize = dpart.disklab->d_secsize;
661
662 diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
663 diskPtr->partitionSize = dpart.part->p_size;
664
665 raidPtr->raid_cinfo[row][col].ci_vp = vp;
666 raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev;
667
668 /* This component was not automatically configured */
669 diskPtr->auto_configured = 0;
670 diskPtr->dev = va.va_rdev;
671
672 /* we allow the user to specify that only a fraction of the
673 * disks should be used this is just for debug: it speeds up
674 * the parity scan */
675 diskPtr->numBlocks = diskPtr->numBlocks *
676 rf_sizePercentage / 100;
677 }
678 return (0);
679 }
680
681 static void
682 rf_print_label_status( raidPtr, row, column, dev_name, ci_label )
683 RF_Raid_t *raidPtr;
684 int row;
685 int column;
686 char *dev_name;
687 RF_ComponentLabel_t *ci_label;
688 {
689
690 printf("raid%d: Component %s being configured at row: %d col: %d\n",
691 raidPtr->raidid, dev_name, row, column );
692 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
693 ci_label->row, ci_label->column,
694 ci_label->num_rows, ci_label->num_columns);
695 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
696 ci_label->version, ci_label->serial_number,
697 ci_label->mod_counter);
698 printf(" Clean: %s Status: %d\n",
699 ci_label->clean ? "Yes" : "No", ci_label->status );
700 }
701
702 static int rf_check_label_vitals( raidPtr, row, column, dev_name, ci_label,
703 serial_number, mod_counter )
704 RF_Raid_t *raidPtr;
705 int row;
706 int column;
707 char *dev_name;
708 RF_ComponentLabel_t *ci_label;
709 int serial_number;
710 int mod_counter;
711 {
712 int fatal_error = 0;
713
714 if (serial_number != ci_label->serial_number) {
715 printf("%s has a different serial number: %d %d\n",
716 dev_name, serial_number, ci_label->serial_number);
717 fatal_error = 1;
718 }
719 if (mod_counter != ci_label->mod_counter) {
720 printf("%s has a different modfication count: %d %d\n",
721 dev_name, mod_counter, ci_label->mod_counter);
722 }
723
724 if (row != ci_label->row) {
725 printf("Row out of alignment for: %s\n", dev_name);
726 fatal_error = 1;
727 }
728 if (column != ci_label->column) {
729 printf("Column out of alignment for: %s\n", dev_name);
730 fatal_error = 1;
731 }
732 if (raidPtr->numRow != ci_label->num_rows) {
733 printf("Number of rows do not match for: %s\n", dev_name);
734 fatal_error = 1;
735 }
736 if (raidPtr->numCol != ci_label->num_columns) {
737 printf("Number of columns do not match for: %s\n", dev_name);
738 fatal_error = 1;
739 }
740 if (ci_label->clean == 0) {
741 /* it's not clean, but that's not fatal */
742 printf("%s is not clean!\n", dev_name);
743 }
744 return(fatal_error);
745 }
746
747
748 /*
749
750 rf_CheckLabels() - check all the component labels for consistency.
751 Return an error if there is anything major amiss.
752
753 */
754
755 int
756 rf_CheckLabels( raidPtr, cfgPtr )
757 RF_Raid_t *raidPtr;
758 RF_Config_t *cfgPtr;
759 {
760 int r,c;
761 char *dev_name;
762 RF_ComponentLabel_t *ci_label;
763 int serial_number = 0;
764 int mod_number = 0;
765 int fatal_error = 0;
766 int mod_values[4];
767 int mod_count[4];
768 int ser_values[4];
769 int ser_count[4];
770 int num_ser;
771 int num_mod;
772 int i;
773 int found;
774 int hosed_row;
775 int hosed_column;
776 int too_fatal;
777 int parity_good;
778 int force;
779
780 hosed_row = -1;
781 hosed_column = -1;
782 too_fatal = 0;
783 force = cfgPtr->force;
784
785 /*
786 We're going to try to be a little intelligent here. If one
787 component's label is bogus, and we can identify that it's the
788 *only* one that's gone, we'll mark it as "failed" and allow
789 the configuration to proceed. This will be the *only* case
790 that we'll proceed if there would be (otherwise) fatal errors.
791
792 Basically we simply keep a count of how many components had
793 what serial number. If all but one agree, we simply mark
794 the disagreeing component as being failed, and allow
795 things to come up "normally".
796
797 We do this first for serial numbers, and then for "mod_counter".
798
799 */
800
801 num_ser = 0;
802 num_mod = 0;
803 for (r = 0; r < raidPtr->numRow && !fatal_error ; r++) {
804 for (c = 0; c < raidPtr->numCol; c++) {
805 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
806 found=0;
807 for(i=0;i<num_ser;i++) {
808 if (ser_values[i] == ci_label->serial_number) {
809 ser_count[i]++;
810 found=1;
811 break;
812 }
813 }
814 if (!found) {
815 ser_values[num_ser] = ci_label->serial_number;
816 ser_count[num_ser] = 1;
817 num_ser++;
818 if (num_ser>2) {
819 fatal_error = 1;
820 break;
821 }
822 }
823 found=0;
824 for(i=0;i<num_mod;i++) {
825 if (mod_values[i] == ci_label->mod_counter) {
826 mod_count[i]++;
827 found=1;
828 break;
829 }
830 }
831 if (!found) {
832 mod_values[num_mod] = ci_label->mod_counter;
833 mod_count[num_mod] = 1;
834 num_mod++;
835 if (num_mod>2) {
836 fatal_error = 1;
837 break;
838 }
839 }
840 }
841 }
842 #if DEBUG
843 printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
844 for(i=0;i<num_ser;i++) {
845 printf("%d %d\n", ser_values[i], ser_count[i]);
846 }
847 printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
848 for(i=0;i<num_mod;i++) {
849 printf("%d %d\n", mod_values[i], mod_count[i]);
850 }
851 #endif
852 serial_number = ser_values[0];
853 if (num_ser == 2) {
854 if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
855 /* Locate the maverick component */
856 if (ser_count[1] > ser_count[0]) {
857 serial_number = ser_values[1];
858 }
859 for (r = 0; r < raidPtr->numRow; r++) {
860 for (c = 0; c < raidPtr->numCol; c++) {
861 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
862 if (serial_number !=
863 ci_label->serial_number) {
864 hosed_row = r;
865 hosed_column = c;
866 break;
867 }
868 }
869 }
870 printf("Hosed component: %s\n",
871 &cfgPtr->devnames[hosed_row][hosed_column][0]);
872 if (!force) {
873 /* we'll fail this component, as if there are
874 other major errors, we arn't forcing things
875 and we'll abort the config anyways */
876 raidPtr->Disks[hosed_row][hosed_column].status
877 = rf_ds_failed;
878 raidPtr->numFailures++;
879 raidPtr->status[hosed_row] = rf_rs_degraded;
880 }
881 } else {
882 too_fatal = 1;
883 }
884 if (cfgPtr->parityConfig == '0') {
885 /* We've identified two different serial numbers.
886 RAID 0 can't cope with that, so we'll punt */
887 too_fatal = 1;
888 }
889
890 }
891
892 /* record the serial number for later. If we bail later, setting
893 this doesn't matter, otherwise we've got the best guess at the
894 correct serial number */
895 raidPtr->serial_number = serial_number;
896
897 mod_number = mod_values[0];
898 if (num_mod == 2) {
899 if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
900 /* Locate the maverick component */
901 if (mod_count[1] > mod_count[0]) {
902 mod_number = mod_values[1];
903 } else if (mod_count[1] < mod_count[0]) {
904 mod_number = mod_values[0];
905 } else {
906 /* counts of different modification values
907 are the same. Assume greater value is
908 the correct one, all other things
909 considered */
910 if (mod_values[0] > mod_values[1]) {
911 mod_number = mod_values[0];
912 } else {
913 mod_number = mod_values[1];
914 }
915
916 }
917 for (r = 0; r < raidPtr->numRow && !too_fatal ; r++) {
918 for (c = 0; c < raidPtr->numCol; c++) {
919 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
920 if (mod_number !=
921 ci_label->mod_counter) {
922 if ( ( hosed_row == r ) &&
923 ( hosed_column == c )) {
924 /* same one. Can
925 deal with it. */
926 } else {
927 hosed_row = r;
928 hosed_column = c;
929 if (num_ser != 1) {
930 too_fatal = 1;
931 break;
932 }
933 }
934 }
935 }
936 }
937 printf("Hosed component: %s\n",
938 &cfgPtr->devnames[hosed_row][hosed_column][0]);
939 if (!force) {
940 /* we'll fail this component, as if there are
941 other major errors, we arn't forcing things
942 and we'll abort the config anyways */
943 if (raidPtr->Disks[hosed_row][hosed_column].status != rf_ds_failed) {
944 raidPtr->Disks[hosed_row][hosed_column].status
945 = rf_ds_failed;
946 raidPtr->numFailures++;
947 raidPtr->status[hosed_row] = rf_rs_degraded;
948 }
949 }
950 } else {
951 too_fatal = 1;
952 }
953 if (cfgPtr->parityConfig == '0') {
954 /* We've identified two different mod counters.
955 RAID 0 can't cope with that, so we'll punt */
956 too_fatal = 1;
957 }
958 }
959
960 raidPtr->mod_counter = mod_number;
961
962 if (too_fatal) {
963 /* we've had both a serial number mismatch, and a mod_counter
964 mismatch -- and they involved two different components!!
965 Bail -- make things fail so that the user must force
966 the issue... */
967 hosed_row = -1;
968 hosed_column = -1;
969 }
970
971 if (num_ser > 2) {
972 printf("raid%d: Too many different serial numbers!\n",
973 raidPtr->raidid);
974 }
975
976 if (num_mod > 2) {
977 printf("raid%d: Too many different mod counters!\n",
978 raidPtr->raidid);
979 }
980
981 /* we start by assuming the parity will be good, and flee from
982 that notion at the slightest sign of trouble */
983
984 parity_good = RF_RAID_CLEAN;
985 for (r = 0; r < raidPtr->numRow; r++) {
986 for (c = 0; c < raidPtr->numCol; c++) {
987 dev_name = &cfgPtr->devnames[r][c][0];
988 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
989
990 if ((r == hosed_row) && (c == hosed_column)) {
991 printf("raid%d: Ignoring %s\n",
992 raidPtr->raidid, dev_name);
993 } else {
994 rf_print_label_status( raidPtr, r, c,
995 dev_name, ci_label );
996 if (rf_check_label_vitals( raidPtr, r, c,
997 dev_name, ci_label,
998 serial_number,
999 mod_number )) {
1000 fatal_error = 1;
1001 }
1002 if (ci_label->clean != RF_RAID_CLEAN) {
1003 parity_good = RF_RAID_DIRTY;
1004 }
1005 }
1006 }
1007 }
1008 if (fatal_error) {
1009 parity_good = RF_RAID_DIRTY;
1010 }
1011
1012 /* we note the state of the parity */
1013 raidPtr->parity_good = parity_good;
1014
1015 return(fatal_error);
1016 }
1017
1018 int
1019 rf_add_hot_spare(raidPtr, sparePtr)
1020 RF_Raid_t *raidPtr;
1021 RF_SingleComponent_t *sparePtr;
1022 {
1023 RF_RaidDisk_t *disks;
1024 RF_DiskQueue_t *spareQueues;
1025 int ret;
1026 unsigned int bs;
1027 int spare_number;
1028
1029 if (raidPtr->numSpare >= RF_MAXSPARE) {
1030 RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
1031 return(EINVAL);
1032 }
1033
1034 RF_LOCK_MUTEX(raidPtr->mutex);
1035
1036 /* the beginning of the spares... */
1037 disks = &raidPtr->Disks[0][raidPtr->numCol];
1038
1039 spare_number = raidPtr->numSpare;
1040
1041 ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
1042 &disks[spare_number], 0,
1043 raidPtr->numCol + spare_number);
1044
1045 if (ret)
1046 goto fail;
1047 if (disks[spare_number].status != rf_ds_optimal) {
1048 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
1049 sparePtr->component_name);
1050 rf_close_component(raidPtr, raidPtr->raid_cinfo[0][raidPtr->numCol+spare_number].ci_vp, 0);
1051 ret=EINVAL;
1052 goto fail;
1053 } else {
1054 disks[spare_number].status = rf_ds_spare;
1055 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", spare_number,
1056 disks[spare_number].devname,
1057 (long int) disks[spare_number].numBlocks,
1058 disks[spare_number].blockSize,
1059 (long int) disks[spare_number].numBlocks *
1060 disks[spare_number].blockSize / 1024 / 1024);
1061 }
1062
1063
1064 /* check sizes and block sizes on the spare disk */
1065 bs = 1 << raidPtr->logBytesPerSector;
1066 if (disks[spare_number].blockSize != bs) {
1067 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
1068 rf_close_component(raidPtr, raidPtr->raid_cinfo[0][raidPtr->numCol+spare_number].ci_vp, 0);
1069 ret = EINVAL;
1070 goto fail;
1071 }
1072 if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
1073 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
1074 disks[spare_number].devname,
1075 disks[spare_number].blockSize,
1076 (long int) raidPtr->sectorsPerDisk);
1077 rf_close_component(raidPtr, raidPtr->raid_cinfo[0][raidPtr->numCol+spare_number].ci_vp, 0);
1078 ret = EINVAL;
1079 goto fail;
1080 } else {
1081 if (disks[spare_number].numBlocks >
1082 raidPtr->sectorsPerDisk) {
1083 RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[spare_number].devname,
1084 (long int) raidPtr->sectorsPerDisk);
1085
1086 disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
1087 }
1088 }
1089
1090 spareQueues = &raidPtr->Queues[0][raidPtr->numCol];
1091 ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
1092 0, raidPtr->numCol + spare_number,
1093 raidPtr->qType,
1094 raidPtr->sectorsPerDisk,
1095 raidPtr->Disks[0][raidPtr->numCol +
1096 spare_number].dev,
1097 raidPtr->maxOutstanding,
1098 &raidPtr->shutdownList,
1099 raidPtr->cleanupList);
1100
1101
1102 raidPtr->numSpare++;
1103 RF_UNLOCK_MUTEX(raidPtr->mutex);
1104 return (0);
1105
1106 fail:
1107 RF_UNLOCK_MUTEX(raidPtr->mutex);
1108 return(ret);
1109 }
1110
1111 int
1112 rf_remove_hot_spare(raidPtr,sparePtr)
1113 RF_Raid_t *raidPtr;
1114 RF_SingleComponent_t *sparePtr;
1115 {
1116 int spare_number;
1117
1118
1119 if (raidPtr->numSpare==0) {
1120 printf("No spares to remove!\n");
1121 return(EINVAL);
1122 }
1123
1124 spare_number = sparePtr->column;
1125
1126 return(EINVAL); /* XXX not implemented yet */
1127 #if 0
1128 if (spare_number < 0 || spare_number > raidPtr->numSpare) {
1129 return(EINVAL);
1130 }
1131
1132 /* verify that this spare isn't in use... */
1133
1134
1135
1136
1137 /* it's gone.. */
1138
1139 raidPtr->numSpare--;
1140
1141 return(0);
1142 #endif
1143 }
1144
1145
1146 int
1147 rf_delete_component(raidPtr,component)
1148 RF_Raid_t *raidPtr;
1149 RF_SingleComponent_t *component;
1150 {
1151 RF_RaidDisk_t *disks;
1152
1153 if ((component->row < 0) ||
1154 (component->row >= raidPtr->numRow) ||
1155 (component->column < 0) ||
1156 (component->column >= raidPtr->numCol)) {
1157 return(EINVAL);
1158 }
1159
1160 disks = &raidPtr->Disks[component->row][component->column];
1161
1162 /* 1. This component must be marked as 'failed' */
1163
1164 return(EINVAL); /* Not implemented yet. */
1165 }
1166
1167 int
1168 rf_incorporate_hot_spare(raidPtr,component)
1169 RF_Raid_t *raidPtr;
1170 RF_SingleComponent_t *component;
1171 {
1172
1173 /* Issues here include how to 'move' this in if there is IO
1174 taking place (e.g. component queues and such) */
1175
1176 return(EINVAL); /* Not implemented yet. */
1177 }
1178