rf_disks.c revision 1.34.2.2 1 /* $NetBSD: rf_disks.c,v 1.34.2.2 2001/11/14 19:15:48 nathanw Exp $ */
2 /*-
3 * Copyright (c) 1999 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1995 Carnegie-Mellon University.
40 * All rights reserved.
41 *
42 * Author: Mark Holland
43 *
44 * Permission to use, copy, modify and distribute this software and
45 * its documentation is hereby granted, provided that both the copyright
46 * notice and this permission notice appear in all copies of the
47 * software, derivative works or modified versions, and any portions
48 * thereof, and that both notices appear in supporting documentation.
49 *
50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53 *
54 * Carnegie Mellon requests users of this software to return to
55 *
56 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
57 * School of Computer Science
58 * Carnegie Mellon University
59 * Pittsburgh PA 15213-3890
60 *
61 * any improvements or extensions that they make and grant Carnegie the
62 * rights to redistribute these changes.
63 */
64
65 /***************************************************************
66 * rf_disks.c -- code to perform operations on the actual disks
67 ***************************************************************/
68
69 #include <sys/cdefs.h>
70 __KERNEL_RCSID(0, "$NetBSD: rf_disks.c,v 1.34.2.2 2001/11/14 19:15:48 nathanw Exp $");
71
72 #include <dev/raidframe/raidframevar.h>
73
74 #include "rf_raid.h"
75 #include "rf_alloclist.h"
76 #include "rf_utils.h"
77 #include "rf_general.h"
78 #include "rf_options.h"
79 #include "rf_kintf.h"
80 #include "rf_netbsd.h"
81
82 #include <sys/types.h>
83 #include <sys/param.h>
84 #include <sys/systm.h>
85 #include <sys/proc.h>
86 #include <sys/ioctl.h>
87 #include <sys/fcntl.h>
88 #include <sys/vnode.h>
89
90 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
91 static void rf_print_label_status( RF_Raid_t *, int, int, char *,
92 RF_ComponentLabel_t *);
93 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
94 RF_ComponentLabel_t *, int, int );
95
96 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
97 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
98
99 /**************************************************************************
100 *
101 * initialize the disks comprising the array
102 *
103 * We want the spare disks to have regular row,col numbers so that we can
104 * easily substitue a spare for a failed disk. But, the driver code assumes
105 * throughout that the array contains numRow by numCol _non-spare_ disks, so
106 * it's not clear how to fit in the spares. This is an unfortunate holdover
107 * from raidSim. The quick and dirty fix is to make row zero bigger than the
108 * rest, and put all the spares in it. This probably needs to get changed
109 * eventually.
110 *
111 **************************************************************************/
112
113 int
114 rf_ConfigureDisks( listp, raidPtr, cfgPtr )
115 RF_ShutdownList_t **listp;
116 RF_Raid_t *raidPtr;
117 RF_Config_t *cfgPtr;
118 {
119 RF_RaidDisk_t **disks;
120 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
121 RF_RowCol_t r, c;
122 int bs, ret;
123 unsigned i, count, foundone = 0, numFailuresThisRow;
124 int force;
125
126 force = cfgPtr->force;
127
128 ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
129 if (ret)
130 goto fail;
131
132 disks = raidPtr->Disks;
133
134 for (r = 0; r < raidPtr->numRow; r++) {
135 numFailuresThisRow = 0;
136 for (c = 0; c < raidPtr->numCol; c++) {
137 ret = rf_ConfigureDisk(raidPtr,
138 &cfgPtr->devnames[r][c][0],
139 &disks[r][c], r, c);
140
141 if (ret)
142 goto fail;
143
144 if (disks[r][c].status == rf_ds_optimal) {
145 raidread_component_label(
146 raidPtr->raid_cinfo[r][c].ci_dev,
147 raidPtr->raid_cinfo[r][c].ci_vp,
148 &raidPtr->raid_cinfo[r][c].ci_label);
149 }
150
151 if (disks[r][c].status != rf_ds_optimal) {
152 numFailuresThisRow++;
153 } else {
154 if (disks[r][c].numBlocks < min_numblks)
155 min_numblks = disks[r][c].numBlocks;
156 DPRINTF7("Disk at row %d col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n",
157 r, c, disks[r][c].devname,
158 (long int) disks[r][c].numBlocks,
159 disks[r][c].blockSize,
160 (long int) disks[r][c].numBlocks *
161 disks[r][c].blockSize / 1024 / 1024);
162 }
163 }
164 /* XXX fix for n-fault tolerant */
165 /* XXX this should probably check to see how many failures
166 we can handle for this configuration! */
167 if (numFailuresThisRow > 0)
168 raidPtr->status[r] = rf_rs_degraded;
169 }
170
171 /* all disks must be the same size & have the same block size, bs must
172 * be a power of 2 */
173 bs = 0;
174 for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) {
175 for (c = 0; !foundone && c < raidPtr->numCol; c++) {
176 if (disks[r][c].status == rf_ds_optimal) {
177 bs = disks[r][c].blockSize;
178 foundone = 1;
179 }
180 }
181 }
182 if (!foundone) {
183 RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
184 ret = EINVAL;
185 goto fail;
186 }
187 for (count = 0, i = 1; i; i <<= 1)
188 if (bs & i)
189 count++;
190 if (count != 1) {
191 RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
192 ret = EINVAL;
193 goto fail;
194 }
195
196 if (rf_CheckLabels( raidPtr, cfgPtr )) {
197 printf("raid%d: There were fatal errors\n", raidPtr->raidid);
198 if (force != 0) {
199 printf("raid%d: Fatal errors being ignored.\n",
200 raidPtr->raidid);
201 } else {
202 ret = EINVAL;
203 goto fail;
204 }
205 }
206
207 for (r = 0; r < raidPtr->numRow; r++) {
208 for (c = 0; c < raidPtr->numCol; c++) {
209 if (disks[r][c].status == rf_ds_optimal) {
210 if (disks[r][c].blockSize != bs) {
211 RF_ERRORMSG2("Error: block size of disk at r %d c %d different from disk at r 0 c 0\n", r, c);
212 ret = EINVAL;
213 goto fail;
214 }
215 if (disks[r][c].numBlocks != min_numblks) {
216 RF_ERRORMSG3("WARNING: truncating disk at r %d c %d to %d blocks\n",
217 r, c, (int) min_numblks);
218 disks[r][c].numBlocks = min_numblks;
219 }
220 }
221 }
222 }
223
224 raidPtr->sectorsPerDisk = min_numblks;
225 raidPtr->logBytesPerSector = ffs(bs) - 1;
226 raidPtr->bytesPerSector = bs;
227 raidPtr->sectorMask = bs - 1;
228 return (0);
229
230 fail:
231
232 rf_UnconfigureVnodes( raidPtr );
233
234 return (ret);
235 }
236
237
238 /****************************************************************************
239 * set up the data structures describing the spare disks in the array
240 * recall from the above comment that the spare disk descriptors are stored
241 * in row zero, which is specially expanded to hold them.
242 ****************************************************************************/
243 int
244 rf_ConfigureSpareDisks( listp, raidPtr, cfgPtr )
245 RF_ShutdownList_t ** listp;
246 RF_Raid_t * raidPtr;
247 RF_Config_t * cfgPtr;
248 {
249 int i, ret;
250 unsigned int bs;
251 RF_RaidDisk_t *disks;
252 int num_spares_done;
253
254 num_spares_done = 0;
255
256 /* The space for the spares should have already been allocated by
257 * ConfigureDisks() */
258
259 disks = &raidPtr->Disks[0][raidPtr->numCol];
260 for (i = 0; i < raidPtr->numSpare; i++) {
261 ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
262 &disks[i], 0, raidPtr->numCol + i);
263 if (ret)
264 goto fail;
265 if (disks[i].status != rf_ds_optimal) {
266 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
267 &cfgPtr->spare_names[i][0]);
268 } else {
269 disks[i].status = rf_ds_spare; /* change status to
270 * spare */
271 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", i,
272 disks[i].devname,
273 (long int) disks[i].numBlocks, disks[i].blockSize,
274 (long int) disks[i].numBlocks *
275 disks[i].blockSize / 1024 / 1024);
276 }
277 num_spares_done++;
278 }
279
280 /* check sizes and block sizes on spare disks */
281 bs = 1 << raidPtr->logBytesPerSector;
282 for (i = 0; i < raidPtr->numSpare; i++) {
283 if (disks[i].blockSize != bs) {
284 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
285 ret = EINVAL;
286 goto fail;
287 }
288 if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
289 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
290 disks[i].devname, disks[i].blockSize,
291 (long int) raidPtr->sectorsPerDisk);
292 ret = EINVAL;
293 goto fail;
294 } else
295 if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
296 RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[i].devname, (long int) raidPtr->sectorsPerDisk);
297
298 disks[i].numBlocks = raidPtr->sectorsPerDisk;
299 }
300 }
301
302 return (0);
303
304 fail:
305
306 /* Release the hold on the main components. We've failed to allocate
307 * a spare, and since we're failing, we need to free things..
308
309 XXX failing to allocate a spare is *not* that big of a deal...
310 We *can* survive without it, if need be, esp. if we get hot
311 adding working.
312
313 If we don't fail out here, then we need a way to remove this spare...
314 that should be easier to do here than if we are "live"...
315
316 */
317
318 rf_UnconfigureVnodes( raidPtr );
319
320 return (ret);
321 }
322
323 static int
324 rf_AllocDiskStructures(raidPtr, cfgPtr)
325 RF_Raid_t *raidPtr;
326 RF_Config_t *cfgPtr;
327 {
328 RF_RaidDisk_t **disks;
329 int ret;
330 int r;
331
332 RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *),
333 (RF_RaidDisk_t **), raidPtr->cleanupList);
334 if (disks == NULL) {
335 ret = ENOMEM;
336 goto fail;
337 }
338 raidPtr->Disks = disks;
339 /* get space for the device-specific stuff... */
340 RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
341 sizeof(struct raidcinfo *), (struct raidcinfo **),
342 raidPtr->cleanupList);
343 if (raidPtr->raid_cinfo == NULL) {
344 ret = ENOMEM;
345 goto fail;
346 }
347
348 for (r = 0; r < raidPtr->numRow; r++) {
349 /* We allocate RF_MAXSPARE on the first row so that we
350 have room to do hot-swapping of spares */
351 RF_CallocAndAdd(disks[r], raidPtr->numCol
352 + ((r == 0) ? RF_MAXSPARE : 0),
353 sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
354 raidPtr->cleanupList);
355 if (disks[r] == NULL) {
356 ret = ENOMEM;
357 goto fail;
358 }
359 /* get more space for device specific stuff.. */
360 RF_CallocAndAdd(raidPtr->raid_cinfo[r],
361 raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0),
362 sizeof(struct raidcinfo), (struct raidcinfo *),
363 raidPtr->cleanupList);
364 if (raidPtr->raid_cinfo[r] == NULL) {
365 ret = ENOMEM;
366 goto fail;
367 }
368 }
369 return(0);
370 fail:
371 rf_UnconfigureVnodes( raidPtr );
372
373 return(ret);
374 }
375
376
377 /* configure a single disk during auto-configuration at boot */
378 int
379 rf_AutoConfigureDisks(raidPtr, cfgPtr, auto_config)
380 RF_Raid_t *raidPtr;
381 RF_Config_t *cfgPtr;
382 RF_AutoConfig_t *auto_config;
383 {
384 RF_RaidDisk_t **disks;
385 RF_RaidDisk_t *diskPtr;
386 RF_RowCol_t r, c;
387 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
388 int bs, ret;
389 int numFailuresThisRow;
390 int force;
391 RF_AutoConfig_t *ac;
392 int parity_good;
393 int mod_counter;
394 int mod_counter_found;
395
396 #if DEBUG
397 printf("Starting autoconfiguration of RAID set...\n");
398 #endif
399 force = cfgPtr->force;
400
401 ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
402 if (ret)
403 goto fail;
404
405 disks = raidPtr->Disks;
406
407 /* assume the parity will be fine.. */
408 parity_good = RF_RAID_CLEAN;
409
410 /* Check for mod_counters that are too low */
411 mod_counter_found = 0;
412 mod_counter = 0;
413 ac = auto_config;
414 while(ac!=NULL) {
415 if (mod_counter_found==0) {
416 mod_counter = ac->clabel->mod_counter;
417 mod_counter_found = 1;
418 } else {
419 if (ac->clabel->mod_counter > mod_counter) {
420 mod_counter = ac->clabel->mod_counter;
421 }
422 }
423 ac->flag = 0; /* clear the general purpose flag */
424 ac = ac->next;
425 }
426
427 bs = 0;
428 for (r = 0; r < raidPtr->numRow; r++) {
429 numFailuresThisRow = 0;
430 for (c = 0; c < raidPtr->numCol; c++) {
431 diskPtr = &disks[r][c];
432
433 /* find this row/col in the autoconfig */
434 #if DEBUG
435 printf("Looking for %d,%d in autoconfig\n",r,c);
436 #endif
437 ac = auto_config;
438 while(ac!=NULL) {
439 if (ac->clabel==NULL) {
440 /* big-time bad news. */
441 goto fail;
442 }
443 if ((ac->clabel->row == r) &&
444 (ac->clabel->column == c) &&
445 (ac->clabel->mod_counter == mod_counter)) {
446 /* it's this one... */
447 /* flag it as 'used', so we don't
448 free it later. */
449 ac->flag = 1;
450 #if DEBUG
451 printf("Found: %s at %d,%d\n",
452 ac->devname,r,c);
453 #endif
454
455 break;
456 }
457 ac=ac->next;
458 }
459
460 if (ac==NULL) {
461 /* we didn't find an exact match with a
462 correct mod_counter above... can we
463 find one with an incorrect mod_counter
464 to use instead? (this one, if we find
465 it, will be marked as failed once the
466 set configures)
467 */
468
469 ac = auto_config;
470 while(ac!=NULL) {
471 if (ac->clabel==NULL) {
472 /* big-time bad news. */
473 goto fail;
474 }
475 if ((ac->clabel->row == r) &&
476 (ac->clabel->column == c)) {
477 /* it's this one...
478 flag it as 'used', so we
479 don't free it later. */
480 ac->flag = 1;
481 #if DEBUG
482 printf("Found(low mod_counter): %s at %d,%d\n",
483 ac->devname,r,c);
484 #endif
485
486 break;
487 }
488 ac=ac->next;
489 }
490 }
491
492
493
494 if (ac!=NULL) {
495 /* Found it. Configure it.. */
496 diskPtr->blockSize = ac->clabel->blockSize;
497 diskPtr->numBlocks = ac->clabel->numBlocks;
498 /* Note: rf_protectedSectors is already
499 factored into numBlocks here */
500 raidPtr->raid_cinfo[r][c].ci_vp = ac->vp;
501 raidPtr->raid_cinfo[r][c].ci_dev = ac->dev;
502
503 memcpy(&raidPtr->raid_cinfo[r][c].ci_label,
504 ac->clabel, sizeof(*ac->clabel));
505 sprintf(diskPtr->devname, "/dev/%s",
506 ac->devname);
507
508 /* note the fact that this component was
509 autoconfigured. You'll need this info
510 later. Trust me :) */
511 diskPtr->auto_configured = 1;
512 diskPtr->dev = ac->dev;
513
514 /*
515 * we allow the user to specify that
516 * only a fraction of the disks should
517 * be used this is just for debug: it
518 * speeds up the parity scan
519 */
520
521 diskPtr->numBlocks = diskPtr->numBlocks *
522 rf_sizePercentage / 100;
523
524 /* XXX these will get set multiple times,
525 but since we're autoconfiguring, they'd
526 better be always the same each time!
527 If not, this is the least of your worries */
528
529 bs = diskPtr->blockSize;
530 min_numblks = diskPtr->numBlocks;
531
532 /* this gets done multiple times, but that's
533 fine -- the serial number will be the same
534 for all components, guaranteed */
535 raidPtr->serial_number =
536 ac->clabel->serial_number;
537 /* check the last time the label
538 was modified */
539 if (ac->clabel->mod_counter !=
540 mod_counter) {
541 /* Even though we've filled in all
542 of the above, we don't trust
543 this component since it's
544 modification counter is not
545 in sync with the rest, and we really
546 consider it to be failed. */
547 disks[r][c].status = rf_ds_failed;
548 numFailuresThisRow++;
549 } else {
550 if (ac->clabel->clean !=
551 RF_RAID_CLEAN) {
552 parity_good = RF_RAID_DIRTY;
553 }
554 }
555 } else {
556 /* Didn't find it at all!!
557 Component must really be dead */
558 disks[r][c].status = rf_ds_failed;
559 sprintf(disks[r][c].devname,"component%d",
560 r * raidPtr->numCol + c);
561 numFailuresThisRow++;
562 }
563 }
564 /* XXX fix for n-fault tolerant */
565 /* XXX this should probably check to see how many failures
566 we can handle for this configuration! */
567 if (numFailuresThisRow > 0)
568 raidPtr->status[r] = rf_rs_degraded;
569 }
570
571 /* close the device for the ones that didn't get used */
572
573 ac = auto_config;
574 while(ac!=NULL) {
575 if (ac->flag == 0) {
576 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
577 VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED, 0);
578 vput(ac->vp);
579 ac->vp = NULL;
580 #if DEBUG
581 printf("Released %s from auto-config set.\n",
582 ac->devname);
583 #endif
584 }
585 ac = ac->next;
586 }
587
588 raidPtr->mod_counter = mod_counter;
589
590 /* note the state of the parity, if any */
591 raidPtr->parity_good = parity_good;
592 raidPtr->sectorsPerDisk = min_numblks;
593 raidPtr->logBytesPerSector = ffs(bs) - 1;
594 raidPtr->bytesPerSector = bs;
595 raidPtr->sectorMask = bs - 1;
596 return (0);
597
598 fail:
599
600 rf_UnconfigureVnodes( raidPtr );
601
602 return (ret);
603
604 }
605
606 /* configure a single disk in the array */
607 int
608 rf_ConfigureDisk(raidPtr, buf, diskPtr, row, col)
609 RF_Raid_t *raidPtr;
610 char *buf;
611 RF_RaidDisk_t *diskPtr;
612 RF_RowCol_t row;
613 RF_RowCol_t col;
614 {
615 char *p;
616 int retcode;
617
618 struct partinfo dpart;
619 struct vnode *vp;
620 struct vattr va;
621 struct proc *proc;
622 int error;
623
624 retcode = 0;
625 p = rf_find_non_white(buf);
626 if (p[strlen(p) - 1] == '\n') {
627 /* strip off the newline */
628 p[strlen(p) - 1] = '\0';
629 }
630 (void) strcpy(diskPtr->devname, p);
631
632 proc = raidPtr->engine_thread;
633
634 /* Let's start by claiming the component is fine and well... */
635 diskPtr->status = rf_ds_optimal;
636
637 raidPtr->raid_cinfo[row][col].ci_vp = NULL;
638 raidPtr->raid_cinfo[row][col].ci_dev = NULL;
639
640 error = raidlookup(diskPtr->devname, proc, &vp);
641 if (error) {
642 printf("raidlookup on device: %s failed!\n", diskPtr->devname);
643 if (error == ENXIO) {
644 /* the component isn't there... must be dead :-( */
645 diskPtr->status = rf_ds_failed;
646 } else {
647 return (error);
648 }
649 }
650 if (diskPtr->status == rf_ds_optimal) {
651
652 if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
653 return (error);
654 }
655 error = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart,
656 FREAD, proc->p_ucred, proc);
657 if (error) {
658 return (error);
659 }
660
661 diskPtr->blockSize = dpart.disklab->d_secsize;
662
663 diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
664 diskPtr->partitionSize = dpart.part->p_size;
665
666 raidPtr->raid_cinfo[row][col].ci_vp = vp;
667 raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev;
668
669 /* This component was not automatically configured */
670 diskPtr->auto_configured = 0;
671 diskPtr->dev = va.va_rdev;
672
673 /* we allow the user to specify that only a fraction of the
674 * disks should be used this is just for debug: it speeds up
675 * the parity scan */
676 diskPtr->numBlocks = diskPtr->numBlocks *
677 rf_sizePercentage / 100;
678 }
679 return (0);
680 }
681
682 static void
683 rf_print_label_status( raidPtr, row, column, dev_name, ci_label )
684 RF_Raid_t *raidPtr;
685 int row;
686 int column;
687 char *dev_name;
688 RF_ComponentLabel_t *ci_label;
689 {
690
691 printf("raid%d: Component %s being configured at row: %d col: %d\n",
692 raidPtr->raidid, dev_name, row, column );
693 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
694 ci_label->row, ci_label->column,
695 ci_label->num_rows, ci_label->num_columns);
696 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
697 ci_label->version, ci_label->serial_number,
698 ci_label->mod_counter);
699 printf(" Clean: %s Status: %d\n",
700 ci_label->clean ? "Yes" : "No", ci_label->status );
701 }
702
703 static int rf_check_label_vitals( raidPtr, row, column, dev_name, ci_label,
704 serial_number, mod_counter )
705 RF_Raid_t *raidPtr;
706 int row;
707 int column;
708 char *dev_name;
709 RF_ComponentLabel_t *ci_label;
710 int serial_number;
711 int mod_counter;
712 {
713 int fatal_error = 0;
714
715 if (serial_number != ci_label->serial_number) {
716 printf("%s has a different serial number: %d %d\n",
717 dev_name, serial_number, ci_label->serial_number);
718 fatal_error = 1;
719 }
720 if (mod_counter != ci_label->mod_counter) {
721 printf("%s has a different modfication count: %d %d\n",
722 dev_name, mod_counter, ci_label->mod_counter);
723 }
724
725 if (row != ci_label->row) {
726 printf("Row out of alignment for: %s\n", dev_name);
727 fatal_error = 1;
728 }
729 if (column != ci_label->column) {
730 printf("Column out of alignment for: %s\n", dev_name);
731 fatal_error = 1;
732 }
733 if (raidPtr->numRow != ci_label->num_rows) {
734 printf("Number of rows do not match for: %s\n", dev_name);
735 fatal_error = 1;
736 }
737 if (raidPtr->numCol != ci_label->num_columns) {
738 printf("Number of columns do not match for: %s\n", dev_name);
739 fatal_error = 1;
740 }
741 if (ci_label->clean == 0) {
742 /* it's not clean, but that's not fatal */
743 printf("%s is not clean!\n", dev_name);
744 }
745 return(fatal_error);
746 }
747
748
749 /*
750
751 rf_CheckLabels() - check all the component labels for consistency.
752 Return an error if there is anything major amiss.
753
754 */
755
756 int
757 rf_CheckLabels( raidPtr, cfgPtr )
758 RF_Raid_t *raidPtr;
759 RF_Config_t *cfgPtr;
760 {
761 int r,c;
762 char *dev_name;
763 RF_ComponentLabel_t *ci_label;
764 int serial_number = 0;
765 int mod_number = 0;
766 int fatal_error = 0;
767 int mod_values[4];
768 int mod_count[4];
769 int ser_values[4];
770 int ser_count[4];
771 int num_ser;
772 int num_mod;
773 int i;
774 int found;
775 int hosed_row;
776 int hosed_column;
777 int too_fatal;
778 int parity_good;
779 int force;
780
781 hosed_row = -1;
782 hosed_column = -1;
783 too_fatal = 0;
784 force = cfgPtr->force;
785
786 /*
787 We're going to try to be a little intelligent here. If one
788 component's label is bogus, and we can identify that it's the
789 *only* one that's gone, we'll mark it as "failed" and allow
790 the configuration to proceed. This will be the *only* case
791 that we'll proceed if there would be (otherwise) fatal errors.
792
793 Basically we simply keep a count of how many components had
794 what serial number. If all but one agree, we simply mark
795 the disagreeing component as being failed, and allow
796 things to come up "normally".
797
798 We do this first for serial numbers, and then for "mod_counter".
799
800 */
801
802 num_ser = 0;
803 num_mod = 0;
804 for (r = 0; r < raidPtr->numRow && !fatal_error ; r++) {
805 for (c = 0; c < raidPtr->numCol; c++) {
806 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
807 found=0;
808 for(i=0;i<num_ser;i++) {
809 if (ser_values[i] == ci_label->serial_number) {
810 ser_count[i]++;
811 found=1;
812 break;
813 }
814 }
815 if (!found) {
816 ser_values[num_ser] = ci_label->serial_number;
817 ser_count[num_ser] = 1;
818 num_ser++;
819 if (num_ser>2) {
820 fatal_error = 1;
821 break;
822 }
823 }
824 found=0;
825 for(i=0;i<num_mod;i++) {
826 if (mod_values[i] == ci_label->mod_counter) {
827 mod_count[i]++;
828 found=1;
829 break;
830 }
831 }
832 if (!found) {
833 mod_values[num_mod] = ci_label->mod_counter;
834 mod_count[num_mod] = 1;
835 num_mod++;
836 if (num_mod>2) {
837 fatal_error = 1;
838 break;
839 }
840 }
841 }
842 }
843 #if DEBUG
844 printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
845 for(i=0;i<num_ser;i++) {
846 printf("%d %d\n", ser_values[i], ser_count[i]);
847 }
848 printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
849 for(i=0;i<num_mod;i++) {
850 printf("%d %d\n", mod_values[i], mod_count[i]);
851 }
852 #endif
853 serial_number = ser_values[0];
854 if (num_ser == 2) {
855 if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
856 /* Locate the maverick component */
857 if (ser_count[1] > ser_count[0]) {
858 serial_number = ser_values[1];
859 }
860 for (r = 0; r < raidPtr->numRow; r++) {
861 for (c = 0; c < raidPtr->numCol; c++) {
862 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
863 if (serial_number !=
864 ci_label->serial_number) {
865 hosed_row = r;
866 hosed_column = c;
867 break;
868 }
869 }
870 }
871 printf("Hosed component: %s\n",
872 &cfgPtr->devnames[hosed_row][hosed_column][0]);
873 if (!force) {
874 /* we'll fail this component, as if there are
875 other major errors, we arn't forcing things
876 and we'll abort the config anyways */
877 raidPtr->Disks[hosed_row][hosed_column].status
878 = rf_ds_failed;
879 raidPtr->numFailures++;
880 raidPtr->status[hosed_row] = rf_rs_degraded;
881 }
882 } else {
883 too_fatal = 1;
884 }
885 if (cfgPtr->parityConfig == '0') {
886 /* We've identified two different serial numbers.
887 RAID 0 can't cope with that, so we'll punt */
888 too_fatal = 1;
889 }
890
891 }
892
893 /* record the serial number for later. If we bail later, setting
894 this doesn't matter, otherwise we've got the best guess at the
895 correct serial number */
896 raidPtr->serial_number = serial_number;
897
898 mod_number = mod_values[0];
899 if (num_mod == 2) {
900 if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
901 /* Locate the maverick component */
902 if (mod_count[1] > mod_count[0]) {
903 mod_number = mod_values[1];
904 } else if (mod_count[1] < mod_count[0]) {
905 mod_number = mod_values[0];
906 } else {
907 /* counts of different modification values
908 are the same. Assume greater value is
909 the correct one, all other things
910 considered */
911 if (mod_values[0] > mod_values[1]) {
912 mod_number = mod_values[0];
913 } else {
914 mod_number = mod_values[1];
915 }
916
917 }
918 for (r = 0; r < raidPtr->numRow && !too_fatal ; r++) {
919 for (c = 0; c < raidPtr->numCol; c++) {
920 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
921 if (mod_number !=
922 ci_label->mod_counter) {
923 if ( ( hosed_row == r ) &&
924 ( hosed_column == c )) {
925 /* same one. Can
926 deal with it. */
927 } else {
928 hosed_row = r;
929 hosed_column = c;
930 if (num_ser != 1) {
931 too_fatal = 1;
932 break;
933 }
934 }
935 }
936 }
937 }
938 printf("Hosed component: %s\n",
939 &cfgPtr->devnames[hosed_row][hosed_column][0]);
940 if (!force) {
941 /* we'll fail this component, as if there are
942 other major errors, we arn't forcing things
943 and we'll abort the config anyways */
944 if (raidPtr->Disks[hosed_row][hosed_column].status != rf_ds_failed) {
945 raidPtr->Disks[hosed_row][hosed_column].status
946 = rf_ds_failed;
947 raidPtr->numFailures++;
948 raidPtr->status[hosed_row] = rf_rs_degraded;
949 }
950 }
951 } else {
952 too_fatal = 1;
953 }
954 if (cfgPtr->parityConfig == '0') {
955 /* We've identified two different mod counters.
956 RAID 0 can't cope with that, so we'll punt */
957 too_fatal = 1;
958 }
959 }
960
961 raidPtr->mod_counter = mod_number;
962
963 if (too_fatal) {
964 /* we've had both a serial number mismatch, and a mod_counter
965 mismatch -- and they involved two different components!!
966 Bail -- make things fail so that the user must force
967 the issue... */
968 hosed_row = -1;
969 hosed_column = -1;
970 }
971
972 if (num_ser > 2) {
973 printf("raid%d: Too many different serial numbers!\n",
974 raidPtr->raidid);
975 }
976
977 if (num_mod > 2) {
978 printf("raid%d: Too many different mod counters!\n",
979 raidPtr->raidid);
980 }
981
982 /* we start by assuming the parity will be good, and flee from
983 that notion at the slightest sign of trouble */
984
985 parity_good = RF_RAID_CLEAN;
986 for (r = 0; r < raidPtr->numRow; r++) {
987 for (c = 0; c < raidPtr->numCol; c++) {
988 dev_name = &cfgPtr->devnames[r][c][0];
989 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
990
991 if ((r == hosed_row) && (c == hosed_column)) {
992 printf("raid%d: Ignoring %s\n",
993 raidPtr->raidid, dev_name);
994 } else {
995 rf_print_label_status( raidPtr, r, c,
996 dev_name, ci_label );
997 if (rf_check_label_vitals( raidPtr, r, c,
998 dev_name, ci_label,
999 serial_number,
1000 mod_number )) {
1001 fatal_error = 1;
1002 }
1003 if (ci_label->clean != RF_RAID_CLEAN) {
1004 parity_good = RF_RAID_DIRTY;
1005 }
1006 }
1007 }
1008 }
1009 if (fatal_error) {
1010 parity_good = RF_RAID_DIRTY;
1011 }
1012
1013 /* we note the state of the parity */
1014 raidPtr->parity_good = parity_good;
1015
1016 return(fatal_error);
1017 }
1018
1019 int
1020 rf_add_hot_spare(raidPtr, sparePtr)
1021 RF_Raid_t *raidPtr;
1022 RF_SingleComponent_t *sparePtr;
1023 {
1024 RF_RaidDisk_t *disks;
1025 RF_DiskQueue_t *spareQueues;
1026 int ret;
1027 unsigned int bs;
1028 int spare_number;
1029
1030 if (raidPtr->numSpare >= RF_MAXSPARE) {
1031 RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
1032 return(EINVAL);
1033 }
1034
1035 RF_LOCK_MUTEX(raidPtr->mutex);
1036
1037 /* the beginning of the spares... */
1038 disks = &raidPtr->Disks[0][raidPtr->numCol];
1039
1040 spare_number = raidPtr->numSpare;
1041
1042 ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
1043 &disks[spare_number], 0,
1044 raidPtr->numCol + spare_number);
1045
1046 if (ret)
1047 goto fail;
1048 if (disks[spare_number].status != rf_ds_optimal) {
1049 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
1050 sparePtr->component_name);
1051 ret=EINVAL;
1052 goto fail;
1053 } else {
1054 disks[spare_number].status = rf_ds_spare;
1055 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", spare_number,
1056 disks[spare_number].devname,
1057 (long int) disks[spare_number].numBlocks,
1058 disks[spare_number].blockSize,
1059 (long int) disks[spare_number].numBlocks *
1060 disks[spare_number].blockSize / 1024 / 1024);
1061 }
1062
1063
1064 /* check sizes and block sizes on the spare disk */
1065 bs = 1 << raidPtr->logBytesPerSector;
1066 if (disks[spare_number].blockSize != bs) {
1067 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
1068 ret = EINVAL;
1069 goto fail;
1070 }
1071 if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
1072 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
1073 disks[spare_number].devname,
1074 disks[spare_number].blockSize,
1075 (long int) raidPtr->sectorsPerDisk);
1076 ret = EINVAL;
1077 goto fail;
1078 } else {
1079 if (disks[spare_number].numBlocks >
1080 raidPtr->sectorsPerDisk) {
1081 RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[spare_number].devname,
1082 (long int) raidPtr->sectorsPerDisk);
1083
1084 disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
1085 }
1086 }
1087
1088 spareQueues = &raidPtr->Queues[0][raidPtr->numCol];
1089 ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
1090 0, raidPtr->numCol + spare_number,
1091 raidPtr->qType,
1092 raidPtr->sectorsPerDisk,
1093 raidPtr->Disks[0][raidPtr->numCol +
1094 spare_number].dev,
1095 raidPtr->maxOutstanding,
1096 &raidPtr->shutdownList,
1097 raidPtr->cleanupList);
1098
1099
1100 raidPtr->numSpare++;
1101 RF_UNLOCK_MUTEX(raidPtr->mutex);
1102 return (0);
1103
1104 fail:
1105 RF_UNLOCK_MUTEX(raidPtr->mutex);
1106 return(ret);
1107 }
1108
1109 int
1110 rf_remove_hot_spare(raidPtr,sparePtr)
1111 RF_Raid_t *raidPtr;
1112 RF_SingleComponent_t *sparePtr;
1113 {
1114 int spare_number;
1115
1116
1117 if (raidPtr->numSpare==0) {
1118 printf("No spares to remove!\n");
1119 return(EINVAL);
1120 }
1121
1122 spare_number = sparePtr->column;
1123
1124 return(EINVAL); /* XXX not implemented yet */
1125 #if 0
1126 if (spare_number < 0 || spare_number > raidPtr->numSpare) {
1127 return(EINVAL);
1128 }
1129
1130 /* verify that this spare isn't in use... */
1131
1132
1133
1134
1135 /* it's gone.. */
1136
1137 raidPtr->numSpare--;
1138
1139 return(0);
1140 #endif
1141 }
1142
1143
1144 int
1145 rf_delete_component(raidPtr,component)
1146 RF_Raid_t *raidPtr;
1147 RF_SingleComponent_t *component;
1148 {
1149 RF_RaidDisk_t *disks;
1150
1151 if ((component->row < 0) ||
1152 (component->row >= raidPtr->numRow) ||
1153 (component->column < 0) ||
1154 (component->column >= raidPtr->numCol)) {
1155 return(EINVAL);
1156 }
1157
1158 disks = &raidPtr->Disks[component->row][component->column];
1159
1160 /* 1. This component must be marked as 'failed' */
1161
1162 return(EINVAL); /* Not implemented yet. */
1163 }
1164
1165 int
1166 rf_incorporate_hot_spare(raidPtr,component)
1167 RF_Raid_t *raidPtr;
1168 RF_SingleComponent_t *component;
1169 {
1170
1171 /* Issues here include how to 'move' this in if there is IO
1172 taking place (e.g. component queues and such) */
1173
1174 return(EINVAL); /* Not implemented yet. */
1175 }
1176