rf_disks.c revision 1.22 1 /* $NetBSD: rf_disks.c,v 1.22 2000/03/03 03:47:17 oster Exp $ */
2 /*-
3 * Copyright (c) 1999 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1995 Carnegie-Mellon University.
40 * All rights reserved.
41 *
42 * Author: Mark Holland
43 *
44 * Permission to use, copy, modify and distribute this software and
45 * its documentation is hereby granted, provided that both the copyright
46 * notice and this permission notice appear in all copies of the
47 * software, derivative works or modified versions, and any portions
48 * thereof, and that both notices appear in supporting documentation.
49 *
50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53 *
54 * Carnegie Mellon requests users of this software to return to
55 *
56 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
57 * School of Computer Science
58 * Carnegie Mellon University
59 * Pittsburgh PA 15213-3890
60 *
61 * any improvements or extensions that they make and grant Carnegie the
62 * rights to redistribute these changes.
63 */
64
65 /***************************************************************
66 * rf_disks.c -- code to perform operations on the actual disks
67 ***************************************************************/
68
69 #include "rf_types.h"
70 #include "rf_raid.h"
71 #include "rf_alloclist.h"
72 #include "rf_utils.h"
73 #include "rf_configure.h"
74 #include "rf_general.h"
75 #include "rf_options.h"
76 #include "rf_kintf.h"
77 #include "rf_netbsd.h"
78
79 #include <sys/types.h>
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/proc.h>
83 #include <sys/ioctl.h>
84 #include <sys/fcntl.h>
85 #include <sys/vnode.h>
86
87 /* XXX these should be in a header file somewhere */
88 void rf_UnconfigureVnodes( RF_Raid_t * );
89 int rf_CheckLabels( RF_Raid_t *, RF_Config_t *);
90 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
91
92 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
93 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
94
95 /**************************************************************************
96 *
97 * initialize the disks comprising the array
98 *
99 * We want the spare disks to have regular row,col numbers so that we can
100 * easily substitue a spare for a failed disk. But, the driver code assumes
101 * throughout that the array contains numRow by numCol _non-spare_ disks, so
102 * it's not clear how to fit in the spares. This is an unfortunate holdover
103 * from raidSim. The quick and dirty fix is to make row zero bigger than the
104 * rest, and put all the spares in it. This probably needs to get changed
105 * eventually.
106 *
107 **************************************************************************/
108
109 int
110 rf_ConfigureDisks( listp, raidPtr, cfgPtr )
111 RF_ShutdownList_t **listp;
112 RF_Raid_t *raidPtr;
113 RF_Config_t *cfgPtr;
114 {
115 RF_RaidDisk_t **disks;
116 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
117 RF_RowCol_t r, c;
118 int bs, ret;
119 unsigned i, count, foundone = 0, numFailuresThisRow;
120 int force;
121
122 force = cfgPtr->force;
123
124 ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
125 if (ret)
126 goto fail;
127
128 disks = raidPtr->Disks;
129
130 for (r = 0; r < raidPtr->numRow; r++) {
131 numFailuresThisRow = 0;
132 for (c = 0; c < raidPtr->numCol; c++) {
133 ret = rf_ConfigureDisk(raidPtr,
134 &cfgPtr->devnames[r][c][0],
135 &disks[r][c], r, c);
136
137 if (ret)
138 goto fail;
139
140 if (disks[r][c].status == rf_ds_optimal) {
141 raidread_component_label(
142 raidPtr->raid_cinfo[r][c].ci_dev,
143 raidPtr->raid_cinfo[r][c].ci_vp,
144 &raidPtr->raid_cinfo[r][c].ci_label);
145 }
146
147 if (disks[r][c].status != rf_ds_optimal) {
148 numFailuresThisRow++;
149 } else {
150 if (disks[r][c].numBlocks < min_numblks)
151 min_numblks = disks[r][c].numBlocks;
152 DPRINTF7("Disk at row %d col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n",
153 r, c, disks[r][c].devname,
154 (long int) disks[r][c].numBlocks,
155 disks[r][c].blockSize,
156 (long int) disks[r][c].numBlocks *
157 disks[r][c].blockSize / 1024 / 1024);
158 }
159 }
160 /* XXX fix for n-fault tolerant */
161 /* XXX this should probably check to see how many failures
162 we can handle for this configuration! */
163 if (numFailuresThisRow > 0)
164 raidPtr->status[r] = rf_rs_degraded;
165 }
166
167 /* all disks must be the same size & have the same block size, bs must
168 * be a power of 2 */
169 bs = 0;
170 for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) {
171 for (c = 0; !foundone && c < raidPtr->numCol; c++) {
172 if (disks[r][c].status == rf_ds_optimal) {
173 bs = disks[r][c].blockSize;
174 foundone = 1;
175 }
176 }
177 }
178 if (!foundone) {
179 RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
180 ret = EINVAL;
181 goto fail;
182 }
183 for (count = 0, i = 1; i; i <<= 1)
184 if (bs & i)
185 count++;
186 if (count != 1) {
187 RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
188 ret = EINVAL;
189 goto fail;
190 }
191
192 if (rf_CheckLabels( raidPtr, cfgPtr )) {
193 printf("raid%d: There were fatal errors\n", raidPtr->raidid);
194 if (force != 0) {
195 printf("raid%d: Fatal errors being ignored.\n",
196 raidPtr->raidid);
197 } else {
198 ret = EINVAL;
199 goto fail;
200 }
201 }
202
203 for (r = 0; r < raidPtr->numRow; r++) {
204 for (c = 0; c < raidPtr->numCol; c++) {
205 if (disks[r][c].status == rf_ds_optimal) {
206 if (disks[r][c].blockSize != bs) {
207 RF_ERRORMSG2("Error: block size of disk at r %d c %d different from disk at r 0 c 0\n", r, c);
208 ret = EINVAL;
209 goto fail;
210 }
211 if (disks[r][c].numBlocks != min_numblks) {
212 RF_ERRORMSG3("WARNING: truncating disk at r %d c %d to %d blocks\n",
213 r, c, (int) min_numblks);
214 disks[r][c].numBlocks = min_numblks;
215 }
216 }
217 }
218 }
219
220 raidPtr->sectorsPerDisk = min_numblks;
221 raidPtr->logBytesPerSector = ffs(bs) - 1;
222 raidPtr->bytesPerSector = bs;
223 raidPtr->sectorMask = bs - 1;
224 return (0);
225
226 fail:
227
228 rf_UnconfigureVnodes( raidPtr );
229
230 return (ret);
231 }
232
233
234 /****************************************************************************
235 * set up the data structures describing the spare disks in the array
236 * recall from the above comment that the spare disk descriptors are stored
237 * in row zero, which is specially expanded to hold them.
238 ****************************************************************************/
239 int
240 rf_ConfigureSpareDisks( listp, raidPtr, cfgPtr )
241 RF_ShutdownList_t ** listp;
242 RF_Raid_t * raidPtr;
243 RF_Config_t * cfgPtr;
244 {
245 int i, ret;
246 unsigned int bs;
247 RF_RaidDisk_t *disks;
248 int num_spares_done;
249
250 num_spares_done = 0;
251
252 /* The space for the spares should have already been allocated by
253 * ConfigureDisks() */
254
255 disks = &raidPtr->Disks[0][raidPtr->numCol];
256 for (i = 0; i < raidPtr->numSpare; i++) {
257 ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
258 &disks[i], 0, raidPtr->numCol + i);
259 if (ret)
260 goto fail;
261 if (disks[i].status != rf_ds_optimal) {
262 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
263 &cfgPtr->spare_names[i][0]);
264 } else {
265 disks[i].status = rf_ds_spare; /* change status to
266 * spare */
267 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", i,
268 disks[i].devname,
269 (long int) disks[i].numBlocks, disks[i].blockSize,
270 (long int) disks[i].numBlocks *
271 disks[i].blockSize / 1024 / 1024);
272 }
273 num_spares_done++;
274 }
275
276 /* check sizes and block sizes on spare disks */
277 bs = 1 << raidPtr->logBytesPerSector;
278 for (i = 0; i < raidPtr->numSpare; i++) {
279 if (disks[i].blockSize != bs) {
280 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
281 ret = EINVAL;
282 goto fail;
283 }
284 if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
285 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
286 disks[i].devname, disks[i].blockSize,
287 (long int) raidPtr->sectorsPerDisk);
288 ret = EINVAL;
289 goto fail;
290 } else
291 if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
292 RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[i].devname, (long int) raidPtr->sectorsPerDisk);
293
294 disks[i].numBlocks = raidPtr->sectorsPerDisk;
295 }
296 }
297
298 return (0);
299
300 fail:
301
302 /* Release the hold on the main components. We've failed to allocate
303 * a spare, and since we're failing, we need to free things..
304
305 XXX failing to allocate a spare is *not* that big of a deal...
306 We *can* survive without it, if need be, esp. if we get hot
307 adding working.
308
309 If we don't fail out here, then we need a way to remove this spare...
310 that should be easier to do here than if we are "live"...
311
312 */
313
314 rf_UnconfigureVnodes( raidPtr );
315
316 return (ret);
317 }
318
319 static int
320 rf_AllocDiskStructures(raidPtr, cfgPtr)
321 RF_Raid_t *raidPtr;
322 RF_Config_t *cfgPtr;
323 {
324 RF_RaidDisk_t **disks;
325 int ret;
326 int r;
327
328 RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *),
329 (RF_RaidDisk_t **), raidPtr->cleanupList);
330 if (disks == NULL) {
331 ret = ENOMEM;
332 goto fail;
333 }
334 raidPtr->Disks = disks;
335 /* get space for the device-specific stuff... */
336 RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
337 sizeof(struct raidcinfo *), (struct raidcinfo **),
338 raidPtr->cleanupList);
339 if (raidPtr->raid_cinfo == NULL) {
340 ret = ENOMEM;
341 goto fail;
342 }
343
344 for (r = 0; r < raidPtr->numRow; r++) {
345 /* We allocate RF_MAXSPARE on the first row so that we
346 have room to do hot-swapping of spares */
347 RF_CallocAndAdd(disks[r], raidPtr->numCol
348 + ((r == 0) ? RF_MAXSPARE : 0),
349 sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
350 raidPtr->cleanupList);
351 if (disks[r] == NULL) {
352 ret = ENOMEM;
353 goto fail;
354 }
355 /* get more space for device specific stuff.. */
356 RF_CallocAndAdd(raidPtr->raid_cinfo[r],
357 raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0),
358 sizeof(struct raidcinfo), (struct raidcinfo *),
359 raidPtr->cleanupList);
360 if (raidPtr->raid_cinfo[r] == NULL) {
361 ret = ENOMEM;
362 goto fail;
363 }
364 }
365 return(0);
366 fail:
367 rf_UnconfigureVnodes( raidPtr );
368
369 return(ret);
370 }
371
372
373 /* configure a single disk during auto-configuration at boot */
374 int
375 rf_AutoConfigureDisks(raidPtr, cfgPtr, auto_config)
376 RF_Raid_t *raidPtr;
377 RF_Config_t *cfgPtr;
378 RF_AutoConfig_t *auto_config;
379 {
380 RF_RaidDisk_t **disks;
381 RF_RaidDisk_t *diskPtr;
382 RF_RowCol_t r, c;
383 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
384 int bs, ret;
385 int numFailuresThisRow;
386 int force;
387 RF_AutoConfig_t *ac;
388 int parity_good;
389 int mod_counter;
390
391 #if DEBUG
392 printf("Starting autoconfiguration of RAID set...\n");
393 #endif
394 force = cfgPtr->force;
395
396 ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
397 if (ret)
398 goto fail;
399
400 disks = raidPtr->Disks;
401
402 /* assume the parity will be fine.. */
403 parity_good = RF_RAID_CLEAN;
404
405 /* Check for mod_counters that are too low */
406 mod_counter = -1;
407 ac = auto_config;
408 while(ac!=NULL) {
409 if (ac->clabel->mod_counter > mod_counter) {
410 mod_counter = ac->clabel->mod_counter;
411 }
412 ac = ac->next;
413 }
414 if (mod_counter == -1) {
415 /* mod_counters were all negative!?!?!?
416 Ok, we can deal with that. */
417 #if 0
418 ac = auto_config;
419 while(ac!=NULL) {
420 if (ac->clabel->mod_counter > mod_counter) {
421 mod_counter = ac->clabel->mod_counter;
422 }
423 ac = ac->next;
424 }
425 #endif
426 }
427
428 for (r = 0; r < raidPtr->numRow; r++) {
429 numFailuresThisRow = 0;
430 for (c = 0; c < raidPtr->numCol; c++) {
431 diskPtr = &disks[r][c];
432
433 /* find this row/col in the autoconfig */
434 #if DEBUG
435 printf("Looking for %d,%d in autoconfig\n",r,c);
436 #endif
437 ac = auto_config;
438 while(ac!=NULL) {
439 if (ac->clabel==NULL) {
440 /* big-time bad news. */
441 goto fail;
442 }
443 if ((ac->clabel->row == r) &&
444 (ac->clabel->column == c)) {
445 /* it's this one... */
446 #if DEBUG
447 printf("Found: %s at %d,%d\n",
448 ac->devname,r,c);
449 #endif
450
451 break;
452 }
453 ac=ac->next;
454 }
455
456 if (ac!=NULL) {
457 /* Found it. Configure it.. */
458 diskPtr->blockSize = ac->clabel->blockSize;
459 diskPtr->numBlocks = ac->clabel->numBlocks;
460 /* Note: rf_protectedSectors is already
461 factored into numBlocks here */
462 raidPtr->raid_cinfo[r][c].ci_vp = ac->vp;
463 raidPtr->raid_cinfo[r][c].ci_dev = ac->dev;
464
465 memcpy(&raidPtr->raid_cinfo[r][c].ci_label,
466 ac->clabel, sizeof(*ac->clabel));
467 sprintf(diskPtr->devname, "/dev/%s",
468 ac->devname);
469
470 /* note the fact that this component was
471 autoconfigured. You'll need this info
472 later. Trust me :) */
473 diskPtr->auto_configured = 1;
474 diskPtr->dev = ac->dev;
475
476 /*
477 * we allow the user to specify that
478 * only a fraction of the disks should
479 * be used this is just for debug: it
480 * speeds up the parity scan
481 */
482
483 diskPtr->numBlocks = diskPtr->numBlocks *
484 rf_sizePercentage / 100;
485
486 /* XXX these will get set multiple times,
487 but since we're autoconfiguring, they'd
488 better be always the same each time!
489 If not, this is the least of your worries */
490
491 bs = diskPtr->blockSize;
492 min_numblks = diskPtr->numBlocks;
493
494 /* this gets done multiple times, but that's
495 fine -- the serial number will be the same
496 for all components, guaranteed */
497 raidPtr->serial_number =
498 ac->clabel->serial_number;
499 /* check the last time the label
500 was modified */
501 if (ac->clabel->mod_counter !=
502 mod_counter) {
503 /* Even though we've filled in all
504 of the above, we don't trust
505 this component since it's
506 modification counter is not
507 in sync with the rest, and we really
508 consider it to be failed. */
509 disks[r][c].status = rf_ds_failed;
510 numFailuresThisRow++;
511 } else {
512 if (ac->clabel->clean !=
513 RF_RAID_CLEAN) {
514 parity_good = RF_RAID_DIRTY;
515 }
516 }
517 } else {
518 /* Didn't find it at all!!
519 Component must really be dead */
520 disks[r][c].status = rf_ds_failed;
521 numFailuresThisRow++;
522 }
523 }
524 /* XXX fix for n-fault tolerant */
525 /* XXX this should probably check to see how many failures
526 we can handle for this configuration! */
527 if (numFailuresThisRow > 0)
528 raidPtr->status[r] = rf_rs_degraded;
529 }
530
531 raidPtr->mod_counter = mod_counter;
532
533 /* note the state of the parity, if any */
534 raidPtr->parity_good = parity_good;
535 raidPtr->sectorsPerDisk = min_numblks;
536 raidPtr->logBytesPerSector = ffs(bs) - 1;
537 raidPtr->bytesPerSector = bs;
538 raidPtr->sectorMask = bs - 1;
539 return (0);
540
541 fail:
542
543 rf_UnconfigureVnodes( raidPtr );
544
545 return (ret);
546
547 }
548
549 /* configure a single disk in the array */
550 int
551 rf_ConfigureDisk(raidPtr, buf, diskPtr, row, col)
552 RF_Raid_t *raidPtr;
553 char *buf;
554 RF_RaidDisk_t *diskPtr;
555 RF_RowCol_t row;
556 RF_RowCol_t col;
557 {
558 char *p;
559 int retcode;
560
561 struct partinfo dpart;
562 struct vnode *vp;
563 struct vattr va;
564 struct proc *proc;
565 int error;
566
567 retcode = 0;
568 p = rf_find_non_white(buf);
569 if (p[strlen(p) - 1] == '\n') {
570 /* strip off the newline */
571 p[strlen(p) - 1] = '\0';
572 }
573 (void) strcpy(diskPtr->devname, p);
574
575 proc = raidPtr->engine_thread;
576
577 /* Let's start by claiming the component is fine and well... */
578 diskPtr->status = rf_ds_optimal;
579
580 raidPtr->raid_cinfo[row][col].ci_vp = NULL;
581 raidPtr->raid_cinfo[row][col].ci_dev = NULL;
582
583 error = raidlookup(diskPtr->devname, proc, &vp);
584 if (error) {
585 printf("raidlookup on device: %s failed!\n", diskPtr->devname);
586 if (error == ENXIO) {
587 /* the component isn't there... must be dead :-( */
588 diskPtr->status = rf_ds_failed;
589 } else {
590 return (error);
591 }
592 }
593 if (diskPtr->status == rf_ds_optimal) {
594
595 if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
596 return (error);
597 }
598 error = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart,
599 FREAD, proc->p_ucred, proc);
600 if (error) {
601 return (error);
602 }
603
604 diskPtr->blockSize = dpart.disklab->d_secsize;
605
606 diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
607 diskPtr->partitionSize = dpart.part->p_size;
608
609 raidPtr->raid_cinfo[row][col].ci_vp = vp;
610 raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev;
611
612 /* This component was not automatically configured */
613 diskPtr->auto_configured = 0;
614 diskPtr->dev = va.va_rdev;
615
616 /* we allow the user to specify that only a fraction of the
617 * disks should be used this is just for debug: it speeds up
618 * the parity scan */
619 diskPtr->numBlocks = diskPtr->numBlocks *
620 rf_sizePercentage / 100;
621 }
622 return (0);
623 }
624
625 static void rf_print_label_status( RF_Raid_t *, int, int, char *,
626 RF_ComponentLabel_t *);
627
628 static void
629 rf_print_label_status( raidPtr, row, column, dev_name, ci_label )
630 RF_Raid_t *raidPtr;
631 int row;
632 int column;
633 char *dev_name;
634 RF_ComponentLabel_t *ci_label;
635 {
636
637 printf("raid%d: Component %s being configured at row: %d col: %d\n",
638 raidPtr->raidid, dev_name, row, column );
639 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
640 ci_label->row, ci_label->column,
641 ci_label->num_rows, ci_label->num_columns);
642 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
643 ci_label->version, ci_label->serial_number,
644 ci_label->mod_counter);
645 printf(" Clean: %s Status: %d\n",
646 ci_label->clean ? "Yes" : "No", ci_label->status );
647 }
648
649 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
650 RF_ComponentLabel_t *, int, int );
651 static int rf_check_label_vitals( raidPtr, row, column, dev_name, ci_label,
652 serial_number, mod_counter )
653 RF_Raid_t *raidPtr;
654 int row;
655 int column;
656 char *dev_name;
657 RF_ComponentLabel_t *ci_label;
658 int serial_number;
659 int mod_counter;
660 {
661 int fatal_error = 0;
662
663 if (serial_number != ci_label->serial_number) {
664 printf("%s has a different serial number: %d %d\n",
665 dev_name, serial_number, ci_label->serial_number);
666 fatal_error = 1;
667 }
668 if (mod_counter != ci_label->mod_counter) {
669 printf("%s has a different modfication count: %d %d\n",
670 dev_name, mod_counter, ci_label->mod_counter);
671 }
672
673 if (row != ci_label->row) {
674 printf("Row out of alignment for: %s\n", dev_name);
675 fatal_error = 1;
676 }
677 if (column != ci_label->column) {
678 printf("Column out of alignment for: %s\n", dev_name);
679 fatal_error = 1;
680 }
681 if (raidPtr->numRow != ci_label->num_rows) {
682 printf("Number of rows do not match for: %s\n", dev_name);
683 fatal_error = 1;
684 }
685 if (raidPtr->numCol != ci_label->num_columns) {
686 printf("Number of columns do not match for: %s\n", dev_name);
687 fatal_error = 1;
688 }
689 if (ci_label->clean == 0) {
690 /* it's not clean, but that's not fatal */
691 printf("%s is not clean!\n", dev_name);
692 }
693 return(fatal_error);
694 }
695
696
697 /*
698
699 rf_CheckLabels() - check all the component labels for consistency.
700 Return an error if there is anything major amiss.
701
702 */
703
704 int
705 rf_CheckLabels( raidPtr, cfgPtr )
706 RF_Raid_t *raidPtr;
707 RF_Config_t *cfgPtr;
708 {
709 int r,c;
710 char *dev_name;
711 RF_ComponentLabel_t *ci_label;
712 int serial_number = 0;
713 int mod_number = 0;
714 int fatal_error = 0;
715 int mod_values[4];
716 int mod_count[4];
717 int ser_values[4];
718 int ser_count[4];
719 int num_ser;
720 int num_mod;
721 int i;
722 int found;
723 int hosed_row;
724 int hosed_column;
725 int too_fatal;
726 int parity_good;
727 int force;
728
729 hosed_row = -1;
730 hosed_column = -1;
731 too_fatal = 0;
732 force = cfgPtr->force;
733
734 /*
735 We're going to try to be a little intelligent here. If one
736 component's label is bogus, and we can identify that it's the
737 *only* one that's gone, we'll mark it as "failed" and allow
738 the configuration to proceed. This will be the *only* case
739 that we'll proceed if there would be (otherwise) fatal errors.
740
741 Basically we simply keep a count of how many components had
742 what serial number. If all but one agree, we simply mark
743 the disagreeing component as being failed, and allow
744 things to come up "normally".
745
746 We do this first for serial numbers, and then for "mod_counter".
747
748 */
749
750 num_ser = 0;
751 num_mod = 0;
752 for (r = 0; r < raidPtr->numRow && !fatal_error ; r++) {
753 for (c = 0; c < raidPtr->numCol; c++) {
754 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
755 found=0;
756 for(i=0;i<num_ser;i++) {
757 if (ser_values[i] == ci_label->serial_number) {
758 ser_count[i]++;
759 found=1;
760 break;
761 }
762 }
763 if (!found) {
764 ser_values[num_ser] = ci_label->serial_number;
765 ser_count[num_ser] = 1;
766 num_ser++;
767 if (num_ser>2) {
768 fatal_error = 1;
769 break;
770 }
771 }
772 found=0;
773 for(i=0;i<num_mod;i++) {
774 if (mod_values[i] == ci_label->mod_counter) {
775 mod_count[i]++;
776 found=1;
777 break;
778 }
779 }
780 if (!found) {
781 mod_values[num_mod] = ci_label->mod_counter;
782 mod_count[num_mod] = 1;
783 num_mod++;
784 if (num_mod>2) {
785 fatal_error = 1;
786 break;
787 }
788 }
789 }
790 }
791 #if DEBUG
792 printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
793 for(i=0;i<num_ser;i++) {
794 printf("%d %d\n", ser_values[i], ser_count[i]);
795 }
796 printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
797 for(i=0;i<num_mod;i++) {
798 printf("%d %d\n", mod_values[i], mod_count[i]);
799 }
800 #endif
801 serial_number = ser_values[0];
802 if (num_ser == 2) {
803 if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
804 /* Locate the maverick component */
805 if (ser_count[1] > ser_count[0]) {
806 serial_number = ser_values[1];
807 }
808 for (r = 0; r < raidPtr->numRow; r++) {
809 for (c = 0; c < raidPtr->numCol; c++) {
810 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
811 if (serial_number !=
812 ci_label->serial_number) {
813 hosed_row = r;
814 hosed_column = c;
815 break;
816 }
817 }
818 }
819 printf("Hosed component: %s\n",
820 &cfgPtr->devnames[hosed_row][hosed_column][0]);
821 if (!force) {
822 /* we'll fail this component, as if there are
823 other major errors, we arn't forcing things
824 and we'll abort the config anyways */
825 raidPtr->Disks[hosed_row][hosed_column].status
826 = rf_ds_failed;
827 raidPtr->numFailures++;
828 raidPtr->status[hosed_row] = rf_rs_degraded;
829 }
830 } else {
831 too_fatal = 1;
832 }
833 if (cfgPtr->parityConfig == '0') {
834 /* We've identified two different serial numbers.
835 RAID 0 can't cope with that, so we'll punt */
836 too_fatal = 1;
837 }
838
839 }
840
841 /* record the serial number for later. If we bail later, setting
842 this doesn't matter, otherwise we've got the best guess at the
843 correct serial number */
844 raidPtr->serial_number = serial_number;
845
846 mod_number = mod_values[0];
847 if (num_mod == 2) {
848 if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
849 /* Locate the maverick component */
850 if (mod_count[1] > mod_count[0]) {
851 mod_number = mod_values[1];
852 } else if (mod_count[1] < mod_count[0]) {
853 mod_number = mod_values[0];
854 } else {
855 /* counts of different modification values
856 are the same. Assume greater value is
857 the correct one, all other things
858 considered */
859 if (mod_values[0] > mod_values[1]) {
860 mod_number = mod_values[0];
861 } else {
862 mod_number = mod_values[1];
863 }
864
865 }
866 for (r = 0; r < raidPtr->numRow && !too_fatal ; r++) {
867 for (c = 0; c < raidPtr->numCol; c++) {
868 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
869 if (mod_number !=
870 ci_label->mod_counter) {
871 if ( ( hosed_row == r ) &&
872 ( hosed_column == c )) {
873 /* same one. Can
874 deal with it. */
875 } else {
876 hosed_row = r;
877 hosed_column = c;
878 if (num_ser != 1) {
879 too_fatal = 1;
880 break;
881 }
882 }
883 }
884 }
885 }
886 printf("Hosed component: %s\n",
887 &cfgPtr->devnames[hosed_row][hosed_column][0]);
888 if (!force) {
889 /* we'll fail this component, as if there are
890 other major errors, we arn't forcing things
891 and we'll abort the config anyways */
892 if (raidPtr->Disks[hosed_row][hosed_column].status != rf_ds_failed) {
893 raidPtr->Disks[hosed_row][hosed_column].status
894 = rf_ds_failed;
895 raidPtr->numFailures++;
896 raidPtr->status[hosed_row] = rf_rs_degraded;
897 }
898 }
899 } else {
900 too_fatal = 1;
901 }
902 if (cfgPtr->parityConfig == '0') {
903 /* We've identified two different mod counters.
904 RAID 0 can't cope with that, so we'll punt */
905 too_fatal = 1;
906 }
907 }
908
909 raidPtr->mod_counter = mod_number;
910
911 if (too_fatal) {
912 /* we've had both a serial number mismatch, and a mod_counter
913 mismatch -- and they involved two different components!!
914 Bail -- make things fail so that the user must force
915 the issue... */
916 hosed_row = -1;
917 hosed_column = -1;
918 }
919
920 if (num_ser > 2) {
921 printf("raid%d: Too many different serial numbers!\n",
922 raidPtr->raidid);
923 }
924
925 if (num_mod > 2) {
926 printf("raid%d: Too many different mod counters!\n",
927 raidPtr->raidid);
928 }
929
930 /* we start by assuming the parity will be good, and flee from
931 that notion at the slightest sign of trouble */
932
933 parity_good = RF_RAID_CLEAN;
934 for (r = 0; r < raidPtr->numRow; r++) {
935 for (c = 0; c < raidPtr->numCol; c++) {
936 dev_name = &cfgPtr->devnames[r][c][0];
937 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
938
939 if ((r == hosed_row) && (c == hosed_column)) {
940 printf("raid%d: Ignoring %s\n",
941 raidPtr->raidid, dev_name);
942 } else {
943 rf_print_label_status( raidPtr, r, c,
944 dev_name, ci_label );
945 if (rf_check_label_vitals( raidPtr, r, c,
946 dev_name, ci_label,
947 serial_number,
948 mod_number )) {
949 fatal_error = 1;
950 }
951 if (ci_label->clean != RF_RAID_CLEAN) {
952 parity_good = RF_RAID_DIRTY;
953 }
954 }
955 }
956 }
957 if (fatal_error) {
958 parity_good = RF_RAID_DIRTY;
959 }
960
961 /* we note the state of the parity */
962 raidPtr->parity_good = parity_good;
963
964 return(fatal_error);
965 }
966
967 int rf_add_hot_spare(RF_Raid_t *, RF_SingleComponent_t *);
968 int
969 rf_add_hot_spare(raidPtr, sparePtr)
970 RF_Raid_t *raidPtr;
971 RF_SingleComponent_t *sparePtr;
972 {
973 RF_RaidDisk_t *disks;
974 RF_DiskQueue_t *spareQueues;
975 int ret;
976 unsigned int bs;
977 int spare_number;
978
979 printf("Just in rf_add_hot_spare: %d\n",raidPtr->numSpare);
980 printf("Num col: %d\n",raidPtr->numCol);
981 if (raidPtr->numSpare >= RF_MAXSPARE) {
982 RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
983 return(EINVAL);
984 }
985
986 RF_LOCK_MUTEX(raidPtr->mutex);
987
988 /* the beginning of the spares... */
989 disks = &raidPtr->Disks[0][raidPtr->numCol];
990
991 spare_number = raidPtr->numSpare;
992
993 ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
994 &disks[spare_number], 0,
995 raidPtr->numCol + spare_number);
996
997 if (ret)
998 goto fail;
999 if (disks[spare_number].status != rf_ds_optimal) {
1000 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
1001 sparePtr->component_name);
1002 ret=EINVAL;
1003 goto fail;
1004 } else {
1005 disks[spare_number].status = rf_ds_spare;
1006 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", spare_number,
1007 disks[spare_number].devname,
1008 (long int) disks[spare_number].numBlocks,
1009 disks[spare_number].blockSize,
1010 (long int) disks[spare_number].numBlocks *
1011 disks[spare_number].blockSize / 1024 / 1024);
1012 }
1013
1014
1015 /* check sizes and block sizes on the spare disk */
1016 bs = 1 << raidPtr->logBytesPerSector;
1017 if (disks[spare_number].blockSize != bs) {
1018 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
1019 ret = EINVAL;
1020 goto fail;
1021 }
1022 if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
1023 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
1024 disks[spare_number].devname,
1025 disks[spare_number].blockSize,
1026 (long int) raidPtr->sectorsPerDisk);
1027 ret = EINVAL;
1028 goto fail;
1029 } else {
1030 if (disks[spare_number].numBlocks >
1031 raidPtr->sectorsPerDisk) {
1032 RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[spare_number].devname,
1033 (long int) raidPtr->sectorsPerDisk);
1034
1035 disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
1036 }
1037 }
1038
1039 spareQueues = &raidPtr->Queues[0][raidPtr->numCol];
1040 ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
1041 0, raidPtr->numCol + spare_number,
1042 raidPtr->qType,
1043 raidPtr->sectorsPerDisk,
1044 raidPtr->Disks[0][raidPtr->numCol +
1045 spare_number].dev,
1046 raidPtr->maxOutstanding,
1047 &raidPtr->shutdownList,
1048 raidPtr->cleanupList);
1049
1050
1051 raidPtr->numSpare++;
1052 RF_UNLOCK_MUTEX(raidPtr->mutex);
1053 return (0);
1054
1055 fail:
1056 RF_UNLOCK_MUTEX(raidPtr->mutex);
1057 return(ret);
1058 }
1059
1060 int
1061 rf_remove_hot_spare(raidPtr,sparePtr)
1062 RF_Raid_t *raidPtr;
1063 RF_SingleComponent_t *sparePtr;
1064 {
1065 int spare_number;
1066
1067
1068 if (raidPtr->numSpare==0) {
1069 printf("No spares to remove!\n");
1070 return(EINVAL);
1071 }
1072
1073 spare_number = sparePtr->column;
1074
1075 return(EINVAL); /* XXX not implemented yet */
1076 #if 0
1077 if (spare_number < 0 || spare_number > raidPtr->numSpare) {
1078 return(EINVAL);
1079 }
1080
1081 /* verify that this spare isn't in use... */
1082
1083
1084
1085
1086 /* it's gone.. */
1087
1088 raidPtr->numSpare--;
1089
1090 return(0);
1091 #endif
1092 }
1093
1094
1095