rf_disks.c revision 1.29 1 /* $NetBSD: rf_disks.c,v 1.29 2000/05/28 22:53:49 oster Exp $ */
2 /*-
3 * Copyright (c) 1999 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1995 Carnegie-Mellon University.
40 * All rights reserved.
41 *
42 * Author: Mark Holland
43 *
44 * Permission to use, copy, modify and distribute this software and
45 * its documentation is hereby granted, provided that both the copyright
46 * notice and this permission notice appear in all copies of the
47 * software, derivative works or modified versions, and any portions
48 * thereof, and that both notices appear in supporting documentation.
49 *
50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53 *
54 * Carnegie Mellon requests users of this software to return to
55 *
56 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
57 * School of Computer Science
58 * Carnegie Mellon University
59 * Pittsburgh PA 15213-3890
60 *
61 * any improvements or extensions that they make and grant Carnegie the
62 * rights to redistribute these changes.
63 */
64
65 /***************************************************************
66 * rf_disks.c -- code to perform operations on the actual disks
67 ***************************************************************/
68
69 #include "rf_types.h"
70 #include "rf_raid.h"
71 #include "rf_alloclist.h"
72 #include "rf_utils.h"
73 #include "rf_configure.h"
74 #include "rf_general.h"
75 #include "rf_options.h"
76 #include "rf_kintf.h"
77 #include "rf_netbsd.h"
78
79 #include <sys/types.h>
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/proc.h>
83 #include <sys/ioctl.h>
84 #include <sys/fcntl.h>
85 #include <sys/vnode.h>
86
87 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
88 static void rf_print_label_status( RF_Raid_t *, int, int, char *,
89 RF_ComponentLabel_t *);
90 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
91 RF_ComponentLabel_t *, int, int );
92
93 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
94 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
95
96 /**************************************************************************
97 *
98 * initialize the disks comprising the array
99 *
100 * We want the spare disks to have regular row,col numbers so that we can
101 * easily substitue a spare for a failed disk. But, the driver code assumes
102 * throughout that the array contains numRow by numCol _non-spare_ disks, so
103 * it's not clear how to fit in the spares. This is an unfortunate holdover
104 * from raidSim. The quick and dirty fix is to make row zero bigger than the
105 * rest, and put all the spares in it. This probably needs to get changed
106 * eventually.
107 *
108 **************************************************************************/
109
110 int
111 rf_ConfigureDisks( listp, raidPtr, cfgPtr )
112 RF_ShutdownList_t **listp;
113 RF_Raid_t *raidPtr;
114 RF_Config_t *cfgPtr;
115 {
116 RF_RaidDisk_t **disks;
117 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
118 RF_RowCol_t r, c;
119 int bs, ret;
120 unsigned i, count, foundone = 0, numFailuresThisRow;
121 int force;
122
123 force = cfgPtr->force;
124
125 ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
126 if (ret)
127 goto fail;
128
129 disks = raidPtr->Disks;
130
131 for (r = 0; r < raidPtr->numRow; r++) {
132 numFailuresThisRow = 0;
133 for (c = 0; c < raidPtr->numCol; c++) {
134 ret = rf_ConfigureDisk(raidPtr,
135 &cfgPtr->devnames[r][c][0],
136 &disks[r][c], r, c);
137
138 if (ret)
139 goto fail;
140
141 if (disks[r][c].status == rf_ds_optimal) {
142 raidread_component_label(
143 raidPtr->raid_cinfo[r][c].ci_dev,
144 raidPtr->raid_cinfo[r][c].ci_vp,
145 &raidPtr->raid_cinfo[r][c].ci_label);
146 }
147
148 if (disks[r][c].status != rf_ds_optimal) {
149 numFailuresThisRow++;
150 } else {
151 if (disks[r][c].numBlocks < min_numblks)
152 min_numblks = disks[r][c].numBlocks;
153 DPRINTF7("Disk at row %d col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n",
154 r, c, disks[r][c].devname,
155 (long int) disks[r][c].numBlocks,
156 disks[r][c].blockSize,
157 (long int) disks[r][c].numBlocks *
158 disks[r][c].blockSize / 1024 / 1024);
159 }
160 }
161 /* XXX fix for n-fault tolerant */
162 /* XXX this should probably check to see how many failures
163 we can handle for this configuration! */
164 if (numFailuresThisRow > 0)
165 raidPtr->status[r] = rf_rs_degraded;
166 }
167
168 /* all disks must be the same size & have the same block size, bs must
169 * be a power of 2 */
170 bs = 0;
171 for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) {
172 for (c = 0; !foundone && c < raidPtr->numCol; c++) {
173 if (disks[r][c].status == rf_ds_optimal) {
174 bs = disks[r][c].blockSize;
175 foundone = 1;
176 }
177 }
178 }
179 if (!foundone) {
180 RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
181 ret = EINVAL;
182 goto fail;
183 }
184 for (count = 0, i = 1; i; i <<= 1)
185 if (bs & i)
186 count++;
187 if (count != 1) {
188 RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
189 ret = EINVAL;
190 goto fail;
191 }
192
193 if (rf_CheckLabels( raidPtr, cfgPtr )) {
194 printf("raid%d: There were fatal errors\n", raidPtr->raidid);
195 if (force != 0) {
196 printf("raid%d: Fatal errors being ignored.\n",
197 raidPtr->raidid);
198 } else {
199 ret = EINVAL;
200 goto fail;
201 }
202 }
203
204 for (r = 0; r < raidPtr->numRow; r++) {
205 for (c = 0; c < raidPtr->numCol; c++) {
206 if (disks[r][c].status == rf_ds_optimal) {
207 if (disks[r][c].blockSize != bs) {
208 RF_ERRORMSG2("Error: block size of disk at r %d c %d different from disk at r 0 c 0\n", r, c);
209 ret = EINVAL;
210 goto fail;
211 }
212 if (disks[r][c].numBlocks != min_numblks) {
213 RF_ERRORMSG3("WARNING: truncating disk at r %d c %d to %d blocks\n",
214 r, c, (int) min_numblks);
215 disks[r][c].numBlocks = min_numblks;
216 }
217 }
218 }
219 }
220
221 raidPtr->sectorsPerDisk = min_numblks;
222 raidPtr->logBytesPerSector = ffs(bs) - 1;
223 raidPtr->bytesPerSector = bs;
224 raidPtr->sectorMask = bs - 1;
225 return (0);
226
227 fail:
228
229 rf_UnconfigureVnodes( raidPtr );
230
231 return (ret);
232 }
233
234
235 /****************************************************************************
236 * set up the data structures describing the spare disks in the array
237 * recall from the above comment that the spare disk descriptors are stored
238 * in row zero, which is specially expanded to hold them.
239 ****************************************************************************/
240 int
241 rf_ConfigureSpareDisks( listp, raidPtr, cfgPtr )
242 RF_ShutdownList_t ** listp;
243 RF_Raid_t * raidPtr;
244 RF_Config_t * cfgPtr;
245 {
246 int i, ret;
247 unsigned int bs;
248 RF_RaidDisk_t *disks;
249 int num_spares_done;
250
251 num_spares_done = 0;
252
253 /* The space for the spares should have already been allocated by
254 * ConfigureDisks() */
255
256 disks = &raidPtr->Disks[0][raidPtr->numCol];
257 for (i = 0; i < raidPtr->numSpare; i++) {
258 ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
259 &disks[i], 0, raidPtr->numCol + i);
260 if (ret)
261 goto fail;
262 if (disks[i].status != rf_ds_optimal) {
263 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
264 &cfgPtr->spare_names[i][0]);
265 } else {
266 disks[i].status = rf_ds_spare; /* change status to
267 * spare */
268 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", i,
269 disks[i].devname,
270 (long int) disks[i].numBlocks, disks[i].blockSize,
271 (long int) disks[i].numBlocks *
272 disks[i].blockSize / 1024 / 1024);
273 }
274 num_spares_done++;
275 }
276
277 /* check sizes and block sizes on spare disks */
278 bs = 1 << raidPtr->logBytesPerSector;
279 for (i = 0; i < raidPtr->numSpare; i++) {
280 if (disks[i].blockSize != bs) {
281 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
282 ret = EINVAL;
283 goto fail;
284 }
285 if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
286 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
287 disks[i].devname, disks[i].blockSize,
288 (long int) raidPtr->sectorsPerDisk);
289 ret = EINVAL;
290 goto fail;
291 } else
292 if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
293 RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[i].devname, (long int) raidPtr->sectorsPerDisk);
294
295 disks[i].numBlocks = raidPtr->sectorsPerDisk;
296 }
297 }
298
299 return (0);
300
301 fail:
302
303 /* Release the hold on the main components. We've failed to allocate
304 * a spare, and since we're failing, we need to free things..
305
306 XXX failing to allocate a spare is *not* that big of a deal...
307 We *can* survive without it, if need be, esp. if we get hot
308 adding working.
309
310 If we don't fail out here, then we need a way to remove this spare...
311 that should be easier to do here than if we are "live"...
312
313 */
314
315 rf_UnconfigureVnodes( raidPtr );
316
317 return (ret);
318 }
319
320 static int
321 rf_AllocDiskStructures(raidPtr, cfgPtr)
322 RF_Raid_t *raidPtr;
323 RF_Config_t *cfgPtr;
324 {
325 RF_RaidDisk_t **disks;
326 int ret;
327 int r;
328
329 RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *),
330 (RF_RaidDisk_t **), raidPtr->cleanupList);
331 if (disks == NULL) {
332 ret = ENOMEM;
333 goto fail;
334 }
335 raidPtr->Disks = disks;
336 /* get space for the device-specific stuff... */
337 RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
338 sizeof(struct raidcinfo *), (struct raidcinfo **),
339 raidPtr->cleanupList);
340 if (raidPtr->raid_cinfo == NULL) {
341 ret = ENOMEM;
342 goto fail;
343 }
344
345 for (r = 0; r < raidPtr->numRow; r++) {
346 /* We allocate RF_MAXSPARE on the first row so that we
347 have room to do hot-swapping of spares */
348 RF_CallocAndAdd(disks[r], raidPtr->numCol
349 + ((r == 0) ? RF_MAXSPARE : 0),
350 sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
351 raidPtr->cleanupList);
352 if (disks[r] == NULL) {
353 ret = ENOMEM;
354 goto fail;
355 }
356 /* get more space for device specific stuff.. */
357 RF_CallocAndAdd(raidPtr->raid_cinfo[r],
358 raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0),
359 sizeof(struct raidcinfo), (struct raidcinfo *),
360 raidPtr->cleanupList);
361 if (raidPtr->raid_cinfo[r] == NULL) {
362 ret = ENOMEM;
363 goto fail;
364 }
365 }
366 return(0);
367 fail:
368 rf_UnconfigureVnodes( raidPtr );
369
370 return(ret);
371 }
372
373
374 /* configure a single disk during auto-configuration at boot */
375 int
376 rf_AutoConfigureDisks(raidPtr, cfgPtr, auto_config)
377 RF_Raid_t *raidPtr;
378 RF_Config_t *cfgPtr;
379 RF_AutoConfig_t *auto_config;
380 {
381 RF_RaidDisk_t **disks;
382 RF_RaidDisk_t *diskPtr;
383 RF_RowCol_t r, c;
384 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
385 int bs, ret;
386 int numFailuresThisRow;
387 int force;
388 RF_AutoConfig_t *ac;
389 int parity_good;
390 int mod_counter;
391
392 #if DEBUG
393 printf("Starting autoconfiguration of RAID set...\n");
394 #endif
395 force = cfgPtr->force;
396
397 ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
398 if (ret)
399 goto fail;
400
401 disks = raidPtr->Disks;
402
403 /* assume the parity will be fine.. */
404 parity_good = RF_RAID_CLEAN;
405
406 /* Check for mod_counters that are too low */
407 mod_counter = -1;
408 ac = auto_config;
409 while(ac!=NULL) {
410 if (ac->clabel->mod_counter > mod_counter) {
411 mod_counter = ac->clabel->mod_counter;
412 }
413 ac->flag = 0; /* clear the general purpose flag */
414 ac = ac->next;
415 }
416
417 if (mod_counter == -1) {
418 /* mod_counters were all negative!?!?!?
419 Ok, we can deal with that. */
420 #if 0
421 ac = auto_config;
422 while(ac!=NULL) {
423 if (ac->clabel->mod_counter > mod_counter) {
424 mod_counter = ac->clabel->mod_counter;
425 }
426 ac = ac->next;
427 }
428 #endif
429 }
430
431 for (r = 0; r < raidPtr->numRow; r++) {
432 numFailuresThisRow = 0;
433 for (c = 0; c < raidPtr->numCol; c++) {
434 diskPtr = &disks[r][c];
435
436 /* find this row/col in the autoconfig */
437 #if DEBUG
438 printf("Looking for %d,%d in autoconfig\n",r,c);
439 #endif
440 ac = auto_config;
441 while(ac!=NULL) {
442 if (ac->clabel==NULL) {
443 /* big-time bad news. */
444 goto fail;
445 }
446 if ((ac->clabel->row == r) &&
447 (ac->clabel->column == c) &&
448 (ac->clabel->mod_counter == mod_counter)) {
449 /* it's this one... */
450 /* flag it as 'used', so we don't
451 free it later. */
452 ac->flag = 1;
453 #if DEBUG
454 printf("Found: %s at %d,%d\n",
455 ac->devname,r,c);
456 #endif
457
458 break;
459 }
460 ac=ac->next;
461 }
462
463 if (ac==NULL) {
464 /* we didn't find an exact match with a
465 correct mod_counter above... can we
466 find one with an incorrect mod_counter
467 to use instead? (this one, if we find
468 it, will be marked as failed once the
469 set configures)
470 */
471
472 ac = auto_config;
473 while(ac!=NULL) {
474 if (ac->clabel==NULL) {
475 /* big-time bad news. */
476 goto fail;
477 }
478 if ((ac->clabel->row == r) &&
479 (ac->clabel->column == c)) {
480 /* it's this one...
481 flag it as 'used', so we
482 don't free it later. */
483 ac->flag = 1;
484 #if DEBUG
485 printf("Found(low mod_counter): %s at %d,%d\n",
486 ac->devname,r,c);
487 #endif
488
489 break;
490 }
491 ac=ac->next;
492 }
493 }
494
495
496
497 if (ac!=NULL) {
498 /* Found it. Configure it.. */
499 diskPtr->blockSize = ac->clabel->blockSize;
500 diskPtr->numBlocks = ac->clabel->numBlocks;
501 /* Note: rf_protectedSectors is already
502 factored into numBlocks here */
503 raidPtr->raid_cinfo[r][c].ci_vp = ac->vp;
504 raidPtr->raid_cinfo[r][c].ci_dev = ac->dev;
505
506 memcpy(&raidPtr->raid_cinfo[r][c].ci_label,
507 ac->clabel, sizeof(*ac->clabel));
508 sprintf(diskPtr->devname, "/dev/%s",
509 ac->devname);
510
511 /* note the fact that this component was
512 autoconfigured. You'll need this info
513 later. Trust me :) */
514 diskPtr->auto_configured = 1;
515 diskPtr->dev = ac->dev;
516
517 /*
518 * we allow the user to specify that
519 * only a fraction of the disks should
520 * be used this is just for debug: it
521 * speeds up the parity scan
522 */
523
524 diskPtr->numBlocks = diskPtr->numBlocks *
525 rf_sizePercentage / 100;
526
527 /* XXX these will get set multiple times,
528 but since we're autoconfiguring, they'd
529 better be always the same each time!
530 If not, this is the least of your worries */
531
532 bs = diskPtr->blockSize;
533 min_numblks = diskPtr->numBlocks;
534
535 /* this gets done multiple times, but that's
536 fine -- the serial number will be the same
537 for all components, guaranteed */
538 raidPtr->serial_number =
539 ac->clabel->serial_number;
540 /* check the last time the label
541 was modified */
542 if (ac->clabel->mod_counter !=
543 mod_counter) {
544 /* Even though we've filled in all
545 of the above, we don't trust
546 this component since it's
547 modification counter is not
548 in sync with the rest, and we really
549 consider it to be failed. */
550 disks[r][c].status = rf_ds_failed;
551 numFailuresThisRow++;
552 } else {
553 if (ac->clabel->clean !=
554 RF_RAID_CLEAN) {
555 parity_good = RF_RAID_DIRTY;
556 }
557 }
558 } else {
559 /* Didn't find it at all!!
560 Component must really be dead */
561 disks[r][c].status = rf_ds_failed;
562 numFailuresThisRow++;
563 }
564 }
565 /* XXX fix for n-fault tolerant */
566 /* XXX this should probably check to see how many failures
567 we can handle for this configuration! */
568 if (numFailuresThisRow > 0)
569 raidPtr->status[r] = rf_rs_degraded;
570 }
571
572 /* close the device for the ones that didn't get used */
573
574 ac = auto_config;
575 while(ac!=NULL) {
576 if (ac->flag == 0) {
577 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
578 vput(ac->vp);
579 ac->vp = NULL;
580 #if DEBUG
581 printf("Released %s from auto-config set.\n",
582 ac->devname);
583 #endif
584 }
585 ac = ac->next;
586 }
587
588 raidPtr->mod_counter = mod_counter;
589
590 /* note the state of the parity, if any */
591 raidPtr->parity_good = parity_good;
592 raidPtr->sectorsPerDisk = min_numblks;
593 raidPtr->logBytesPerSector = ffs(bs) - 1;
594 raidPtr->bytesPerSector = bs;
595 raidPtr->sectorMask = bs - 1;
596 return (0);
597
598 fail:
599
600 rf_UnconfigureVnodes( raidPtr );
601
602 return (ret);
603
604 }
605
606 /* configure a single disk in the array */
607 int
608 rf_ConfigureDisk(raidPtr, buf, diskPtr, row, col)
609 RF_Raid_t *raidPtr;
610 char *buf;
611 RF_RaidDisk_t *diskPtr;
612 RF_RowCol_t row;
613 RF_RowCol_t col;
614 {
615 char *p;
616 int retcode;
617
618 struct partinfo dpart;
619 struct vnode *vp;
620 struct vattr va;
621 struct proc *proc;
622 int error;
623
624 retcode = 0;
625 p = rf_find_non_white(buf);
626 if (p[strlen(p) - 1] == '\n') {
627 /* strip off the newline */
628 p[strlen(p) - 1] = '\0';
629 }
630 (void) strcpy(diskPtr->devname, p);
631
632 proc = raidPtr->engine_thread;
633
634 /* Let's start by claiming the component is fine and well... */
635 diskPtr->status = rf_ds_optimal;
636
637 raidPtr->raid_cinfo[row][col].ci_vp = NULL;
638 raidPtr->raid_cinfo[row][col].ci_dev = NULL;
639
640 error = raidlookup(diskPtr->devname, proc, &vp);
641 if (error) {
642 printf("raidlookup on device: %s failed!\n", diskPtr->devname);
643 if (error == ENXIO) {
644 /* the component isn't there... must be dead :-( */
645 diskPtr->status = rf_ds_failed;
646 } else {
647 return (error);
648 }
649 }
650 if (diskPtr->status == rf_ds_optimal) {
651
652 if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
653 return (error);
654 }
655 error = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart,
656 FREAD, proc->p_ucred, proc);
657 if (error) {
658 return (error);
659 }
660
661 diskPtr->blockSize = dpart.disklab->d_secsize;
662
663 diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
664 diskPtr->partitionSize = dpart.part->p_size;
665
666 raidPtr->raid_cinfo[row][col].ci_vp = vp;
667 raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev;
668
669 /* This component was not automatically configured */
670 diskPtr->auto_configured = 0;
671 diskPtr->dev = va.va_rdev;
672
673 /* we allow the user to specify that only a fraction of the
674 * disks should be used this is just for debug: it speeds up
675 * the parity scan */
676 diskPtr->numBlocks = diskPtr->numBlocks *
677 rf_sizePercentage / 100;
678 }
679 return (0);
680 }
681
682 static void
683 rf_print_label_status( raidPtr, row, column, dev_name, ci_label )
684 RF_Raid_t *raidPtr;
685 int row;
686 int column;
687 char *dev_name;
688 RF_ComponentLabel_t *ci_label;
689 {
690
691 printf("raid%d: Component %s being configured at row: %d col: %d\n",
692 raidPtr->raidid, dev_name, row, column );
693 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
694 ci_label->row, ci_label->column,
695 ci_label->num_rows, ci_label->num_columns);
696 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
697 ci_label->version, ci_label->serial_number,
698 ci_label->mod_counter);
699 printf(" Clean: %s Status: %d\n",
700 ci_label->clean ? "Yes" : "No", ci_label->status );
701 }
702
703 static int rf_check_label_vitals( raidPtr, row, column, dev_name, ci_label,
704 serial_number, mod_counter )
705 RF_Raid_t *raidPtr;
706 int row;
707 int column;
708 char *dev_name;
709 RF_ComponentLabel_t *ci_label;
710 int serial_number;
711 int mod_counter;
712 {
713 int fatal_error = 0;
714
715 if (serial_number != ci_label->serial_number) {
716 printf("%s has a different serial number: %d %d\n",
717 dev_name, serial_number, ci_label->serial_number);
718 fatal_error = 1;
719 }
720 if (mod_counter != ci_label->mod_counter) {
721 printf("%s has a different modfication count: %d %d\n",
722 dev_name, mod_counter, ci_label->mod_counter);
723 }
724
725 if (row != ci_label->row) {
726 printf("Row out of alignment for: %s\n", dev_name);
727 fatal_error = 1;
728 }
729 if (column != ci_label->column) {
730 printf("Column out of alignment for: %s\n", dev_name);
731 fatal_error = 1;
732 }
733 if (raidPtr->numRow != ci_label->num_rows) {
734 printf("Number of rows do not match for: %s\n", dev_name);
735 fatal_error = 1;
736 }
737 if (raidPtr->numCol != ci_label->num_columns) {
738 printf("Number of columns do not match for: %s\n", dev_name);
739 fatal_error = 1;
740 }
741 if (ci_label->clean == 0) {
742 /* it's not clean, but that's not fatal */
743 printf("%s is not clean!\n", dev_name);
744 }
745 return(fatal_error);
746 }
747
748
749 /*
750
751 rf_CheckLabels() - check all the component labels for consistency.
752 Return an error if there is anything major amiss.
753
754 */
755
756 int
757 rf_CheckLabels( raidPtr, cfgPtr )
758 RF_Raid_t *raidPtr;
759 RF_Config_t *cfgPtr;
760 {
761 int r,c;
762 char *dev_name;
763 RF_ComponentLabel_t *ci_label;
764 int serial_number = 0;
765 int mod_number = 0;
766 int fatal_error = 0;
767 int mod_values[4];
768 int mod_count[4];
769 int ser_values[4];
770 int ser_count[4];
771 int num_ser;
772 int num_mod;
773 int i;
774 int found;
775 int hosed_row;
776 int hosed_column;
777 int too_fatal;
778 int parity_good;
779 int force;
780
781 hosed_row = -1;
782 hosed_column = -1;
783 too_fatal = 0;
784 force = cfgPtr->force;
785
786 /*
787 We're going to try to be a little intelligent here. If one
788 component's label is bogus, and we can identify that it's the
789 *only* one that's gone, we'll mark it as "failed" and allow
790 the configuration to proceed. This will be the *only* case
791 that we'll proceed if there would be (otherwise) fatal errors.
792
793 Basically we simply keep a count of how many components had
794 what serial number. If all but one agree, we simply mark
795 the disagreeing component as being failed, and allow
796 things to come up "normally".
797
798 We do this first for serial numbers, and then for "mod_counter".
799
800 */
801
802 num_ser = 0;
803 num_mod = 0;
804 for (r = 0; r < raidPtr->numRow && !fatal_error ; r++) {
805 for (c = 0; c < raidPtr->numCol; c++) {
806 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
807 found=0;
808 for(i=0;i<num_ser;i++) {
809 if (ser_values[i] == ci_label->serial_number) {
810 ser_count[i]++;
811 found=1;
812 break;
813 }
814 }
815 if (!found) {
816 ser_values[num_ser] = ci_label->serial_number;
817 ser_count[num_ser] = 1;
818 num_ser++;
819 if (num_ser>2) {
820 fatal_error = 1;
821 break;
822 }
823 }
824 found=0;
825 for(i=0;i<num_mod;i++) {
826 if (mod_values[i] == ci_label->mod_counter) {
827 mod_count[i]++;
828 found=1;
829 break;
830 }
831 }
832 if (!found) {
833 mod_values[num_mod] = ci_label->mod_counter;
834 mod_count[num_mod] = 1;
835 num_mod++;
836 if (num_mod>2) {
837 fatal_error = 1;
838 break;
839 }
840 }
841 }
842 }
843 #if DEBUG
844 printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
845 for(i=0;i<num_ser;i++) {
846 printf("%d %d\n", ser_values[i], ser_count[i]);
847 }
848 printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
849 for(i=0;i<num_mod;i++) {
850 printf("%d %d\n", mod_values[i], mod_count[i]);
851 }
852 #endif
853 serial_number = ser_values[0];
854 if (num_ser == 2) {
855 if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
856 /* Locate the maverick component */
857 if (ser_count[1] > ser_count[0]) {
858 serial_number = ser_values[1];
859 }
860 for (r = 0; r < raidPtr->numRow; r++) {
861 for (c = 0; c < raidPtr->numCol; c++) {
862 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
863 if (serial_number !=
864 ci_label->serial_number) {
865 hosed_row = r;
866 hosed_column = c;
867 break;
868 }
869 }
870 }
871 printf("Hosed component: %s\n",
872 &cfgPtr->devnames[hosed_row][hosed_column][0]);
873 if (!force) {
874 /* we'll fail this component, as if there are
875 other major errors, we arn't forcing things
876 and we'll abort the config anyways */
877 raidPtr->Disks[hosed_row][hosed_column].status
878 = rf_ds_failed;
879 raidPtr->numFailures++;
880 raidPtr->status[hosed_row] = rf_rs_degraded;
881 }
882 } else {
883 too_fatal = 1;
884 }
885 if (cfgPtr->parityConfig == '0') {
886 /* We've identified two different serial numbers.
887 RAID 0 can't cope with that, so we'll punt */
888 too_fatal = 1;
889 }
890
891 }
892
893 /* record the serial number for later. If we bail later, setting
894 this doesn't matter, otherwise we've got the best guess at the
895 correct serial number */
896 raidPtr->serial_number = serial_number;
897
898 mod_number = mod_values[0];
899 if (num_mod == 2) {
900 if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
901 /* Locate the maverick component */
902 if (mod_count[1] > mod_count[0]) {
903 mod_number = mod_values[1];
904 } else if (mod_count[1] < mod_count[0]) {
905 mod_number = mod_values[0];
906 } else {
907 /* counts of different modification values
908 are the same. Assume greater value is
909 the correct one, all other things
910 considered */
911 if (mod_values[0] > mod_values[1]) {
912 mod_number = mod_values[0];
913 } else {
914 mod_number = mod_values[1];
915 }
916
917 }
918 for (r = 0; r < raidPtr->numRow && !too_fatal ; r++) {
919 for (c = 0; c < raidPtr->numCol; c++) {
920 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
921 if (mod_number !=
922 ci_label->mod_counter) {
923 if ( ( hosed_row == r ) &&
924 ( hosed_column == c )) {
925 /* same one. Can
926 deal with it. */
927 } else {
928 hosed_row = r;
929 hosed_column = c;
930 if (num_ser != 1) {
931 too_fatal = 1;
932 break;
933 }
934 }
935 }
936 }
937 }
938 printf("Hosed component: %s\n",
939 &cfgPtr->devnames[hosed_row][hosed_column][0]);
940 if (!force) {
941 /* we'll fail this component, as if there are
942 other major errors, we arn't forcing things
943 and we'll abort the config anyways */
944 if (raidPtr->Disks[hosed_row][hosed_column].status != rf_ds_failed) {
945 raidPtr->Disks[hosed_row][hosed_column].status
946 = rf_ds_failed;
947 raidPtr->numFailures++;
948 raidPtr->status[hosed_row] = rf_rs_degraded;
949 }
950 }
951 } else {
952 too_fatal = 1;
953 }
954 if (cfgPtr->parityConfig == '0') {
955 /* We've identified two different mod counters.
956 RAID 0 can't cope with that, so we'll punt */
957 too_fatal = 1;
958 }
959 }
960
961 raidPtr->mod_counter = mod_number;
962
963 if (too_fatal) {
964 /* we've had both a serial number mismatch, and a mod_counter
965 mismatch -- and they involved two different components!!
966 Bail -- make things fail so that the user must force
967 the issue... */
968 hosed_row = -1;
969 hosed_column = -1;
970 }
971
972 if (num_ser > 2) {
973 printf("raid%d: Too many different serial numbers!\n",
974 raidPtr->raidid);
975 }
976
977 if (num_mod > 2) {
978 printf("raid%d: Too many different mod counters!\n",
979 raidPtr->raidid);
980 }
981
982 /* we start by assuming the parity will be good, and flee from
983 that notion at the slightest sign of trouble */
984
985 parity_good = RF_RAID_CLEAN;
986 for (r = 0; r < raidPtr->numRow; r++) {
987 for (c = 0; c < raidPtr->numCol; c++) {
988 dev_name = &cfgPtr->devnames[r][c][0];
989 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
990
991 if ((r == hosed_row) && (c == hosed_column)) {
992 printf("raid%d: Ignoring %s\n",
993 raidPtr->raidid, dev_name);
994 } else {
995 rf_print_label_status( raidPtr, r, c,
996 dev_name, ci_label );
997 if (rf_check_label_vitals( raidPtr, r, c,
998 dev_name, ci_label,
999 serial_number,
1000 mod_number )) {
1001 fatal_error = 1;
1002 }
1003 if (ci_label->clean != RF_RAID_CLEAN) {
1004 parity_good = RF_RAID_DIRTY;
1005 }
1006 }
1007 }
1008 }
1009 if (fatal_error) {
1010 parity_good = RF_RAID_DIRTY;
1011 }
1012
1013 /* we note the state of the parity */
1014 raidPtr->parity_good = parity_good;
1015
1016 return(fatal_error);
1017 }
1018
1019 int
1020 rf_add_hot_spare(raidPtr, sparePtr)
1021 RF_Raid_t *raidPtr;
1022 RF_SingleComponent_t *sparePtr;
1023 {
1024 RF_RaidDisk_t *disks;
1025 RF_DiskQueue_t *spareQueues;
1026 int ret;
1027 unsigned int bs;
1028 int spare_number;
1029
1030 #if 0
1031 printf("Just in rf_add_hot_spare: %d\n",raidPtr->numSpare);
1032 printf("Num col: %d\n",raidPtr->numCol);
1033 #endif
1034 if (raidPtr->numSpare >= RF_MAXSPARE) {
1035 RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
1036 return(EINVAL);
1037 }
1038
1039 RF_LOCK_MUTEX(raidPtr->mutex);
1040
1041 /* the beginning of the spares... */
1042 disks = &raidPtr->Disks[0][raidPtr->numCol];
1043
1044 spare_number = raidPtr->numSpare;
1045
1046 ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
1047 &disks[spare_number], 0,
1048 raidPtr->numCol + spare_number);
1049
1050 if (ret)
1051 goto fail;
1052 if (disks[spare_number].status != rf_ds_optimal) {
1053 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
1054 sparePtr->component_name);
1055 ret=EINVAL;
1056 goto fail;
1057 } else {
1058 disks[spare_number].status = rf_ds_spare;
1059 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", spare_number,
1060 disks[spare_number].devname,
1061 (long int) disks[spare_number].numBlocks,
1062 disks[spare_number].blockSize,
1063 (long int) disks[spare_number].numBlocks *
1064 disks[spare_number].blockSize / 1024 / 1024);
1065 }
1066
1067
1068 /* check sizes and block sizes on the spare disk */
1069 bs = 1 << raidPtr->logBytesPerSector;
1070 if (disks[spare_number].blockSize != bs) {
1071 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
1072 ret = EINVAL;
1073 goto fail;
1074 }
1075 if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
1076 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
1077 disks[spare_number].devname,
1078 disks[spare_number].blockSize,
1079 (long int) raidPtr->sectorsPerDisk);
1080 ret = EINVAL;
1081 goto fail;
1082 } else {
1083 if (disks[spare_number].numBlocks >
1084 raidPtr->sectorsPerDisk) {
1085 RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[spare_number].devname,
1086 (long int) raidPtr->sectorsPerDisk);
1087
1088 disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
1089 }
1090 }
1091
1092 spareQueues = &raidPtr->Queues[0][raidPtr->numCol];
1093 ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
1094 0, raidPtr->numCol + spare_number,
1095 raidPtr->qType,
1096 raidPtr->sectorsPerDisk,
1097 raidPtr->Disks[0][raidPtr->numCol +
1098 spare_number].dev,
1099 raidPtr->maxOutstanding,
1100 &raidPtr->shutdownList,
1101 raidPtr->cleanupList);
1102
1103
1104 raidPtr->numSpare++;
1105 RF_UNLOCK_MUTEX(raidPtr->mutex);
1106 return (0);
1107
1108 fail:
1109 RF_UNLOCK_MUTEX(raidPtr->mutex);
1110 return(ret);
1111 }
1112
1113 int
1114 rf_remove_hot_spare(raidPtr,sparePtr)
1115 RF_Raid_t *raidPtr;
1116 RF_SingleComponent_t *sparePtr;
1117 {
1118 int spare_number;
1119
1120
1121 if (raidPtr->numSpare==0) {
1122 printf("No spares to remove!\n");
1123 return(EINVAL);
1124 }
1125
1126 spare_number = sparePtr->column;
1127
1128 return(EINVAL); /* XXX not implemented yet */
1129 #if 0
1130 if (spare_number < 0 || spare_number > raidPtr->numSpare) {
1131 return(EINVAL);
1132 }
1133
1134 /* verify that this spare isn't in use... */
1135
1136
1137
1138
1139 /* it's gone.. */
1140
1141 raidPtr->numSpare--;
1142
1143 return(0);
1144 #endif
1145 }
1146
1147
1148 int
1149 rf_delete_component(raidPtr,component)
1150 RF_Raid_t *raidPtr;
1151 RF_SingleComponent_t *component;
1152 {
1153 RF_RaidDisk_t *disks;
1154
1155 if ((component->row < 0) ||
1156 (component->row >= raidPtr->numRow) ||
1157 (component->column < 0) ||
1158 (component->column >= raidPtr->numCol)) {
1159 return(EINVAL);
1160 }
1161
1162 disks = &raidPtr->Disks[component->row][component->column];
1163
1164 /* 1. This component must be marked as 'failed' */
1165
1166 return(EINVAL); /* Not implemented yet. */
1167 }
1168
1169 int
1170 rf_incorporate_hot_spare(raidPtr,component)
1171 RF_Raid_t *raidPtr;
1172 RF_SingleComponent_t *component;
1173 {
1174
1175 /* Issues here include how to 'move' this in if there is IO
1176 taking place (e.g. component queues and such) */
1177
1178 return(EINVAL); /* Not implemented yet. */
1179 }
1180