rf_disks.c revision 1.26 1 /* $NetBSD: rf_disks.c,v 1.26 2000/03/27 03:25:17 oster Exp $ */
2 /*-
3 * Copyright (c) 1999 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1995 Carnegie-Mellon University.
40 * All rights reserved.
41 *
42 * Author: Mark Holland
43 *
44 * Permission to use, copy, modify and distribute this software and
45 * its documentation is hereby granted, provided that both the copyright
46 * notice and this permission notice appear in all copies of the
47 * software, derivative works or modified versions, and any portions
48 * thereof, and that both notices appear in supporting documentation.
49 *
50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53 *
54 * Carnegie Mellon requests users of this software to return to
55 *
56 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
57 * School of Computer Science
58 * Carnegie Mellon University
59 * Pittsburgh PA 15213-3890
60 *
61 * any improvements or extensions that they make and grant Carnegie the
62 * rights to redistribute these changes.
63 */
64
65 /***************************************************************
66 * rf_disks.c -- code to perform operations on the actual disks
67 ***************************************************************/
68
69 #include "rf_types.h"
70 #include "rf_raid.h"
71 #include "rf_alloclist.h"
72 #include "rf_utils.h"
73 #include "rf_configure.h"
74 #include "rf_general.h"
75 #include "rf_options.h"
76 #include "rf_kintf.h"
77 #include "rf_netbsd.h"
78
79 #include <sys/types.h>
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/proc.h>
83 #include <sys/ioctl.h>
84 #include <sys/fcntl.h>
85 #include <sys/vnode.h>
86
87 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
88 static void rf_print_label_status( RF_Raid_t *, int, int, char *,
89 RF_ComponentLabel_t *);
90 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
91 RF_ComponentLabel_t *, int, int );
92
93 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
94 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
95
96 /**************************************************************************
97 *
98 * initialize the disks comprising the array
99 *
100 * We want the spare disks to have regular row,col numbers so that we can
101 * easily substitue a spare for a failed disk. But, the driver code assumes
102 * throughout that the array contains numRow by numCol _non-spare_ disks, so
103 * it's not clear how to fit in the spares. This is an unfortunate holdover
104 * from raidSim. The quick and dirty fix is to make row zero bigger than the
105 * rest, and put all the spares in it. This probably needs to get changed
106 * eventually.
107 *
108 **************************************************************************/
109
110 int
111 rf_ConfigureDisks( listp, raidPtr, cfgPtr )
112 RF_ShutdownList_t **listp;
113 RF_Raid_t *raidPtr;
114 RF_Config_t *cfgPtr;
115 {
116 RF_RaidDisk_t **disks;
117 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
118 RF_RowCol_t r, c;
119 int bs, ret;
120 unsigned i, count, foundone = 0, numFailuresThisRow;
121 int force;
122
123 force = cfgPtr->force;
124
125 ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
126 if (ret)
127 goto fail;
128
129 disks = raidPtr->Disks;
130
131 for (r = 0; r < raidPtr->numRow; r++) {
132 numFailuresThisRow = 0;
133 for (c = 0; c < raidPtr->numCol; c++) {
134 ret = rf_ConfigureDisk(raidPtr,
135 &cfgPtr->devnames[r][c][0],
136 &disks[r][c], r, c);
137
138 if (ret)
139 goto fail;
140
141 if (disks[r][c].status == rf_ds_optimal) {
142 raidread_component_label(
143 raidPtr->raid_cinfo[r][c].ci_dev,
144 raidPtr->raid_cinfo[r][c].ci_vp,
145 &raidPtr->raid_cinfo[r][c].ci_label);
146 }
147
148 if (disks[r][c].status != rf_ds_optimal) {
149 numFailuresThisRow++;
150 } else {
151 if (disks[r][c].numBlocks < min_numblks)
152 min_numblks = disks[r][c].numBlocks;
153 DPRINTF7("Disk at row %d col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n",
154 r, c, disks[r][c].devname,
155 (long int) disks[r][c].numBlocks,
156 disks[r][c].blockSize,
157 (long int) disks[r][c].numBlocks *
158 disks[r][c].blockSize / 1024 / 1024);
159 }
160 }
161 /* XXX fix for n-fault tolerant */
162 /* XXX this should probably check to see how many failures
163 we can handle for this configuration! */
164 if (numFailuresThisRow > 0)
165 raidPtr->status[r] = rf_rs_degraded;
166 }
167
168 /* all disks must be the same size & have the same block size, bs must
169 * be a power of 2 */
170 bs = 0;
171 for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) {
172 for (c = 0; !foundone && c < raidPtr->numCol; c++) {
173 if (disks[r][c].status == rf_ds_optimal) {
174 bs = disks[r][c].blockSize;
175 foundone = 1;
176 }
177 }
178 }
179 if (!foundone) {
180 RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
181 ret = EINVAL;
182 goto fail;
183 }
184 for (count = 0, i = 1; i; i <<= 1)
185 if (bs & i)
186 count++;
187 if (count != 1) {
188 RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
189 ret = EINVAL;
190 goto fail;
191 }
192
193 if (rf_CheckLabels( raidPtr, cfgPtr )) {
194 printf("raid%d: There were fatal errors\n", raidPtr->raidid);
195 if (force != 0) {
196 printf("raid%d: Fatal errors being ignored.\n",
197 raidPtr->raidid);
198 } else {
199 ret = EINVAL;
200 goto fail;
201 }
202 }
203
204 for (r = 0; r < raidPtr->numRow; r++) {
205 for (c = 0; c < raidPtr->numCol; c++) {
206 if (disks[r][c].status == rf_ds_optimal) {
207 if (disks[r][c].blockSize != bs) {
208 RF_ERRORMSG2("Error: block size of disk at r %d c %d different from disk at r 0 c 0\n", r, c);
209 ret = EINVAL;
210 goto fail;
211 }
212 if (disks[r][c].numBlocks != min_numblks) {
213 RF_ERRORMSG3("WARNING: truncating disk at r %d c %d to %d blocks\n",
214 r, c, (int) min_numblks);
215 disks[r][c].numBlocks = min_numblks;
216 }
217 }
218 }
219 }
220
221 raidPtr->sectorsPerDisk = min_numblks;
222 raidPtr->logBytesPerSector = ffs(bs) - 1;
223 raidPtr->bytesPerSector = bs;
224 raidPtr->sectorMask = bs - 1;
225 return (0);
226
227 fail:
228
229 rf_UnconfigureVnodes( raidPtr );
230
231 return (ret);
232 }
233
234
235 /****************************************************************************
236 * set up the data structures describing the spare disks in the array
237 * recall from the above comment that the spare disk descriptors are stored
238 * in row zero, which is specially expanded to hold them.
239 ****************************************************************************/
240 int
241 rf_ConfigureSpareDisks( listp, raidPtr, cfgPtr )
242 RF_ShutdownList_t ** listp;
243 RF_Raid_t * raidPtr;
244 RF_Config_t * cfgPtr;
245 {
246 int i, ret;
247 unsigned int bs;
248 RF_RaidDisk_t *disks;
249 int num_spares_done;
250
251 num_spares_done = 0;
252
253 /* The space for the spares should have already been allocated by
254 * ConfigureDisks() */
255
256 disks = &raidPtr->Disks[0][raidPtr->numCol];
257 for (i = 0; i < raidPtr->numSpare; i++) {
258 ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
259 &disks[i], 0, raidPtr->numCol + i);
260 if (ret)
261 goto fail;
262 if (disks[i].status != rf_ds_optimal) {
263 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
264 &cfgPtr->spare_names[i][0]);
265 } else {
266 disks[i].status = rf_ds_spare; /* change status to
267 * spare */
268 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", i,
269 disks[i].devname,
270 (long int) disks[i].numBlocks, disks[i].blockSize,
271 (long int) disks[i].numBlocks *
272 disks[i].blockSize / 1024 / 1024);
273 }
274 num_spares_done++;
275 }
276
277 /* check sizes and block sizes on spare disks */
278 bs = 1 << raidPtr->logBytesPerSector;
279 for (i = 0; i < raidPtr->numSpare; i++) {
280 if (disks[i].blockSize != bs) {
281 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
282 ret = EINVAL;
283 goto fail;
284 }
285 if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
286 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
287 disks[i].devname, disks[i].blockSize,
288 (long int) raidPtr->sectorsPerDisk);
289 ret = EINVAL;
290 goto fail;
291 } else
292 if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
293 RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[i].devname, (long int) raidPtr->sectorsPerDisk);
294
295 disks[i].numBlocks = raidPtr->sectorsPerDisk;
296 }
297 }
298
299 return (0);
300
301 fail:
302
303 /* Release the hold on the main components. We've failed to allocate
304 * a spare, and since we're failing, we need to free things..
305
306 XXX failing to allocate a spare is *not* that big of a deal...
307 We *can* survive without it, if need be, esp. if we get hot
308 adding working.
309
310 If we don't fail out here, then we need a way to remove this spare...
311 that should be easier to do here than if we are "live"...
312
313 */
314
315 rf_UnconfigureVnodes( raidPtr );
316
317 return (ret);
318 }
319
320 static int
321 rf_AllocDiskStructures(raidPtr, cfgPtr)
322 RF_Raid_t *raidPtr;
323 RF_Config_t *cfgPtr;
324 {
325 RF_RaidDisk_t **disks;
326 int ret;
327 int r;
328
329 RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *),
330 (RF_RaidDisk_t **), raidPtr->cleanupList);
331 if (disks == NULL) {
332 ret = ENOMEM;
333 goto fail;
334 }
335 raidPtr->Disks = disks;
336 /* get space for the device-specific stuff... */
337 RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
338 sizeof(struct raidcinfo *), (struct raidcinfo **),
339 raidPtr->cleanupList);
340 if (raidPtr->raid_cinfo == NULL) {
341 ret = ENOMEM;
342 goto fail;
343 }
344
345 for (r = 0; r < raidPtr->numRow; r++) {
346 /* We allocate RF_MAXSPARE on the first row so that we
347 have room to do hot-swapping of spares */
348 RF_CallocAndAdd(disks[r], raidPtr->numCol
349 + ((r == 0) ? RF_MAXSPARE : 0),
350 sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
351 raidPtr->cleanupList);
352 if (disks[r] == NULL) {
353 ret = ENOMEM;
354 goto fail;
355 }
356 /* get more space for device specific stuff.. */
357 RF_CallocAndAdd(raidPtr->raid_cinfo[r],
358 raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0),
359 sizeof(struct raidcinfo), (struct raidcinfo *),
360 raidPtr->cleanupList);
361 if (raidPtr->raid_cinfo[r] == NULL) {
362 ret = ENOMEM;
363 goto fail;
364 }
365 }
366 return(0);
367 fail:
368 rf_UnconfigureVnodes( raidPtr );
369
370 return(ret);
371 }
372
373
374 /* configure a single disk during auto-configuration at boot */
375 int
376 rf_AutoConfigureDisks(raidPtr, cfgPtr, auto_config)
377 RF_Raid_t *raidPtr;
378 RF_Config_t *cfgPtr;
379 RF_AutoConfig_t *auto_config;
380 {
381 RF_RaidDisk_t **disks;
382 RF_RaidDisk_t *diskPtr;
383 RF_RowCol_t r, c;
384 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
385 int bs, ret;
386 int numFailuresThisRow;
387 int force;
388 RF_AutoConfig_t *ac;
389 int parity_good;
390 int mod_counter;
391
392 #if DEBUG
393 printf("Starting autoconfiguration of RAID set...\n");
394 #endif
395 force = cfgPtr->force;
396
397 ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
398 if (ret)
399 goto fail;
400
401 disks = raidPtr->Disks;
402
403 /* assume the parity will be fine.. */
404 parity_good = RF_RAID_CLEAN;
405
406 /* Check for mod_counters that are too low */
407 mod_counter = -1;
408 ac = auto_config;
409 while(ac!=NULL) {
410 if (ac->clabel->mod_counter > mod_counter) {
411 mod_counter = ac->clabel->mod_counter;
412 }
413 ac = ac->next;
414 }
415 if (mod_counter == -1) {
416 /* mod_counters were all negative!?!?!?
417 Ok, we can deal with that. */
418 #if 0
419 ac = auto_config;
420 while(ac!=NULL) {
421 if (ac->clabel->mod_counter > mod_counter) {
422 mod_counter = ac->clabel->mod_counter;
423 }
424 ac = ac->next;
425 }
426 #endif
427 }
428
429 for (r = 0; r < raidPtr->numRow; r++) {
430 numFailuresThisRow = 0;
431 for (c = 0; c < raidPtr->numCol; c++) {
432 diskPtr = &disks[r][c];
433
434 /* find this row/col in the autoconfig */
435 #if DEBUG
436 printf("Looking for %d,%d in autoconfig\n",r,c);
437 #endif
438 ac = auto_config;
439 while(ac!=NULL) {
440 if (ac->clabel==NULL) {
441 /* big-time bad news. */
442 goto fail;
443 }
444 if ((ac->clabel->row == r) &&
445 (ac->clabel->column == c)) {
446 /* it's this one... */
447 #if DEBUG
448 printf("Found: %s at %d,%d\n",
449 ac->devname,r,c);
450 #endif
451
452 break;
453 }
454 ac=ac->next;
455 }
456
457 if (ac!=NULL) {
458 /* Found it. Configure it.. */
459 diskPtr->blockSize = ac->clabel->blockSize;
460 diskPtr->numBlocks = ac->clabel->numBlocks;
461 /* Note: rf_protectedSectors is already
462 factored into numBlocks here */
463 raidPtr->raid_cinfo[r][c].ci_vp = ac->vp;
464 raidPtr->raid_cinfo[r][c].ci_dev = ac->dev;
465
466 memcpy(&raidPtr->raid_cinfo[r][c].ci_label,
467 ac->clabel, sizeof(*ac->clabel));
468 sprintf(diskPtr->devname, "/dev/%s",
469 ac->devname);
470
471 /* note the fact that this component was
472 autoconfigured. You'll need this info
473 later. Trust me :) */
474 diskPtr->auto_configured = 1;
475 diskPtr->dev = ac->dev;
476
477 /*
478 * we allow the user to specify that
479 * only a fraction of the disks should
480 * be used this is just for debug: it
481 * speeds up the parity scan
482 */
483
484 diskPtr->numBlocks = diskPtr->numBlocks *
485 rf_sizePercentage / 100;
486
487 /* XXX these will get set multiple times,
488 but since we're autoconfiguring, they'd
489 better be always the same each time!
490 If not, this is the least of your worries */
491
492 bs = diskPtr->blockSize;
493 min_numblks = diskPtr->numBlocks;
494
495 /* this gets done multiple times, but that's
496 fine -- the serial number will be the same
497 for all components, guaranteed */
498 raidPtr->serial_number =
499 ac->clabel->serial_number;
500 /* check the last time the label
501 was modified */
502 if (ac->clabel->mod_counter !=
503 mod_counter) {
504 /* Even though we've filled in all
505 of the above, we don't trust
506 this component since it's
507 modification counter is not
508 in sync with the rest, and we really
509 consider it to be failed. */
510 disks[r][c].status = rf_ds_failed;
511 numFailuresThisRow++;
512 } else {
513 if (ac->clabel->clean !=
514 RF_RAID_CLEAN) {
515 parity_good = RF_RAID_DIRTY;
516 }
517 }
518 } else {
519 /* Didn't find it at all!!
520 Component must really be dead */
521 disks[r][c].status = rf_ds_failed;
522 numFailuresThisRow++;
523 }
524 }
525 /* XXX fix for n-fault tolerant */
526 /* XXX this should probably check to see how many failures
527 we can handle for this configuration! */
528 if (numFailuresThisRow > 0)
529 raidPtr->status[r] = rf_rs_degraded;
530 }
531
532 raidPtr->mod_counter = mod_counter;
533
534 /* note the state of the parity, if any */
535 raidPtr->parity_good = parity_good;
536 raidPtr->sectorsPerDisk = min_numblks;
537 raidPtr->logBytesPerSector = ffs(bs) - 1;
538 raidPtr->bytesPerSector = bs;
539 raidPtr->sectorMask = bs - 1;
540 return (0);
541
542 fail:
543
544 rf_UnconfigureVnodes( raidPtr );
545
546 return (ret);
547
548 }
549
550 /* configure a single disk in the array */
551 int
552 rf_ConfigureDisk(raidPtr, buf, diskPtr, row, col)
553 RF_Raid_t *raidPtr;
554 char *buf;
555 RF_RaidDisk_t *diskPtr;
556 RF_RowCol_t row;
557 RF_RowCol_t col;
558 {
559 char *p;
560 int retcode;
561
562 struct partinfo dpart;
563 struct vnode *vp;
564 struct vattr va;
565 struct proc *proc;
566 int error;
567
568 retcode = 0;
569 p = rf_find_non_white(buf);
570 if (p[strlen(p) - 1] == '\n') {
571 /* strip off the newline */
572 p[strlen(p) - 1] = '\0';
573 }
574 (void) strcpy(diskPtr->devname, p);
575
576 proc = raidPtr->engine_thread;
577
578 /* Let's start by claiming the component is fine and well... */
579 diskPtr->status = rf_ds_optimal;
580
581 raidPtr->raid_cinfo[row][col].ci_vp = NULL;
582 raidPtr->raid_cinfo[row][col].ci_dev = NULL;
583
584 error = raidlookup(diskPtr->devname, proc, &vp);
585 if (error) {
586 printf("raidlookup on device: %s failed!\n", diskPtr->devname);
587 if (error == ENXIO) {
588 /* the component isn't there... must be dead :-( */
589 diskPtr->status = rf_ds_failed;
590 } else {
591 return (error);
592 }
593 }
594 if (diskPtr->status == rf_ds_optimal) {
595
596 if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
597 return (error);
598 }
599 error = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart,
600 FREAD, proc->p_ucred, proc);
601 if (error) {
602 return (error);
603 }
604
605 diskPtr->blockSize = dpart.disklab->d_secsize;
606
607 diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
608 diskPtr->partitionSize = dpart.part->p_size;
609
610 raidPtr->raid_cinfo[row][col].ci_vp = vp;
611 raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev;
612
613 /* This component was not automatically configured */
614 diskPtr->auto_configured = 0;
615 diskPtr->dev = va.va_rdev;
616
617 /* we allow the user to specify that only a fraction of the
618 * disks should be used this is just for debug: it speeds up
619 * the parity scan */
620 diskPtr->numBlocks = diskPtr->numBlocks *
621 rf_sizePercentage / 100;
622 }
623 return (0);
624 }
625
626 static void
627 rf_print_label_status( raidPtr, row, column, dev_name, ci_label )
628 RF_Raid_t *raidPtr;
629 int row;
630 int column;
631 char *dev_name;
632 RF_ComponentLabel_t *ci_label;
633 {
634
635 printf("raid%d: Component %s being configured at row: %d col: %d\n",
636 raidPtr->raidid, dev_name, row, column );
637 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
638 ci_label->row, ci_label->column,
639 ci_label->num_rows, ci_label->num_columns);
640 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
641 ci_label->version, ci_label->serial_number,
642 ci_label->mod_counter);
643 printf(" Clean: %s Status: %d\n",
644 ci_label->clean ? "Yes" : "No", ci_label->status );
645 }
646
647 static int rf_check_label_vitals( raidPtr, row, column, dev_name, ci_label,
648 serial_number, mod_counter )
649 RF_Raid_t *raidPtr;
650 int row;
651 int column;
652 char *dev_name;
653 RF_ComponentLabel_t *ci_label;
654 int serial_number;
655 int mod_counter;
656 {
657 int fatal_error = 0;
658
659 if (serial_number != ci_label->serial_number) {
660 printf("%s has a different serial number: %d %d\n",
661 dev_name, serial_number, ci_label->serial_number);
662 fatal_error = 1;
663 }
664 if (mod_counter != ci_label->mod_counter) {
665 printf("%s has a different modfication count: %d %d\n",
666 dev_name, mod_counter, ci_label->mod_counter);
667 }
668
669 if (row != ci_label->row) {
670 printf("Row out of alignment for: %s\n", dev_name);
671 fatal_error = 1;
672 }
673 if (column != ci_label->column) {
674 printf("Column out of alignment for: %s\n", dev_name);
675 fatal_error = 1;
676 }
677 if (raidPtr->numRow != ci_label->num_rows) {
678 printf("Number of rows do not match for: %s\n", dev_name);
679 fatal_error = 1;
680 }
681 if (raidPtr->numCol != ci_label->num_columns) {
682 printf("Number of columns do not match for: %s\n", dev_name);
683 fatal_error = 1;
684 }
685 if (ci_label->clean == 0) {
686 /* it's not clean, but that's not fatal */
687 printf("%s is not clean!\n", dev_name);
688 }
689 return(fatal_error);
690 }
691
692
693 /*
694
695 rf_CheckLabels() - check all the component labels for consistency.
696 Return an error if there is anything major amiss.
697
698 */
699
700 int
701 rf_CheckLabels( raidPtr, cfgPtr )
702 RF_Raid_t *raidPtr;
703 RF_Config_t *cfgPtr;
704 {
705 int r,c;
706 char *dev_name;
707 RF_ComponentLabel_t *ci_label;
708 int serial_number = 0;
709 int mod_number = 0;
710 int fatal_error = 0;
711 int mod_values[4];
712 int mod_count[4];
713 int ser_values[4];
714 int ser_count[4];
715 int num_ser;
716 int num_mod;
717 int i;
718 int found;
719 int hosed_row;
720 int hosed_column;
721 int too_fatal;
722 int parity_good;
723 int force;
724
725 hosed_row = -1;
726 hosed_column = -1;
727 too_fatal = 0;
728 force = cfgPtr->force;
729
730 /*
731 We're going to try to be a little intelligent here. If one
732 component's label is bogus, and we can identify that it's the
733 *only* one that's gone, we'll mark it as "failed" and allow
734 the configuration to proceed. This will be the *only* case
735 that we'll proceed if there would be (otherwise) fatal errors.
736
737 Basically we simply keep a count of how many components had
738 what serial number. If all but one agree, we simply mark
739 the disagreeing component as being failed, and allow
740 things to come up "normally".
741
742 We do this first for serial numbers, and then for "mod_counter".
743
744 */
745
746 num_ser = 0;
747 num_mod = 0;
748 for (r = 0; r < raidPtr->numRow && !fatal_error ; r++) {
749 for (c = 0; c < raidPtr->numCol; c++) {
750 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
751 found=0;
752 for(i=0;i<num_ser;i++) {
753 if (ser_values[i] == ci_label->serial_number) {
754 ser_count[i]++;
755 found=1;
756 break;
757 }
758 }
759 if (!found) {
760 ser_values[num_ser] = ci_label->serial_number;
761 ser_count[num_ser] = 1;
762 num_ser++;
763 if (num_ser>2) {
764 fatal_error = 1;
765 break;
766 }
767 }
768 found=0;
769 for(i=0;i<num_mod;i++) {
770 if (mod_values[i] == ci_label->mod_counter) {
771 mod_count[i]++;
772 found=1;
773 break;
774 }
775 }
776 if (!found) {
777 mod_values[num_mod] = ci_label->mod_counter;
778 mod_count[num_mod] = 1;
779 num_mod++;
780 if (num_mod>2) {
781 fatal_error = 1;
782 break;
783 }
784 }
785 }
786 }
787 #if DEBUG
788 printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
789 for(i=0;i<num_ser;i++) {
790 printf("%d %d\n", ser_values[i], ser_count[i]);
791 }
792 printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
793 for(i=0;i<num_mod;i++) {
794 printf("%d %d\n", mod_values[i], mod_count[i]);
795 }
796 #endif
797 serial_number = ser_values[0];
798 if (num_ser == 2) {
799 if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
800 /* Locate the maverick component */
801 if (ser_count[1] > ser_count[0]) {
802 serial_number = ser_values[1];
803 }
804 for (r = 0; r < raidPtr->numRow; r++) {
805 for (c = 0; c < raidPtr->numCol; c++) {
806 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
807 if (serial_number !=
808 ci_label->serial_number) {
809 hosed_row = r;
810 hosed_column = c;
811 break;
812 }
813 }
814 }
815 printf("Hosed component: %s\n",
816 &cfgPtr->devnames[hosed_row][hosed_column][0]);
817 if (!force) {
818 /* we'll fail this component, as if there are
819 other major errors, we arn't forcing things
820 and we'll abort the config anyways */
821 raidPtr->Disks[hosed_row][hosed_column].status
822 = rf_ds_failed;
823 raidPtr->numFailures++;
824 raidPtr->status[hosed_row] = rf_rs_degraded;
825 }
826 } else {
827 too_fatal = 1;
828 }
829 if (cfgPtr->parityConfig == '0') {
830 /* We've identified two different serial numbers.
831 RAID 0 can't cope with that, so we'll punt */
832 too_fatal = 1;
833 }
834
835 }
836
837 /* record the serial number for later. If we bail later, setting
838 this doesn't matter, otherwise we've got the best guess at the
839 correct serial number */
840 raidPtr->serial_number = serial_number;
841
842 mod_number = mod_values[0];
843 if (num_mod == 2) {
844 if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
845 /* Locate the maverick component */
846 if (mod_count[1] > mod_count[0]) {
847 mod_number = mod_values[1];
848 } else if (mod_count[1] < mod_count[0]) {
849 mod_number = mod_values[0];
850 } else {
851 /* counts of different modification values
852 are the same. Assume greater value is
853 the correct one, all other things
854 considered */
855 if (mod_values[0] > mod_values[1]) {
856 mod_number = mod_values[0];
857 } else {
858 mod_number = mod_values[1];
859 }
860
861 }
862 for (r = 0; r < raidPtr->numRow && !too_fatal ; r++) {
863 for (c = 0; c < raidPtr->numCol; c++) {
864 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
865 if (mod_number !=
866 ci_label->mod_counter) {
867 if ( ( hosed_row == r ) &&
868 ( hosed_column == c )) {
869 /* same one. Can
870 deal with it. */
871 } else {
872 hosed_row = r;
873 hosed_column = c;
874 if (num_ser != 1) {
875 too_fatal = 1;
876 break;
877 }
878 }
879 }
880 }
881 }
882 printf("Hosed component: %s\n",
883 &cfgPtr->devnames[hosed_row][hosed_column][0]);
884 if (!force) {
885 /* we'll fail this component, as if there are
886 other major errors, we arn't forcing things
887 and we'll abort the config anyways */
888 if (raidPtr->Disks[hosed_row][hosed_column].status != rf_ds_failed) {
889 raidPtr->Disks[hosed_row][hosed_column].status
890 = rf_ds_failed;
891 raidPtr->numFailures++;
892 raidPtr->status[hosed_row] = rf_rs_degraded;
893 }
894 }
895 } else {
896 too_fatal = 1;
897 }
898 if (cfgPtr->parityConfig == '0') {
899 /* We've identified two different mod counters.
900 RAID 0 can't cope with that, so we'll punt */
901 too_fatal = 1;
902 }
903 }
904
905 raidPtr->mod_counter = mod_number;
906
907 if (too_fatal) {
908 /* we've had both a serial number mismatch, and a mod_counter
909 mismatch -- and they involved two different components!!
910 Bail -- make things fail so that the user must force
911 the issue... */
912 hosed_row = -1;
913 hosed_column = -1;
914 }
915
916 if (num_ser > 2) {
917 printf("raid%d: Too many different serial numbers!\n",
918 raidPtr->raidid);
919 }
920
921 if (num_mod > 2) {
922 printf("raid%d: Too many different mod counters!\n",
923 raidPtr->raidid);
924 }
925
926 /* we start by assuming the parity will be good, and flee from
927 that notion at the slightest sign of trouble */
928
929 parity_good = RF_RAID_CLEAN;
930 for (r = 0; r < raidPtr->numRow; r++) {
931 for (c = 0; c < raidPtr->numCol; c++) {
932 dev_name = &cfgPtr->devnames[r][c][0];
933 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
934
935 if ((r == hosed_row) && (c == hosed_column)) {
936 printf("raid%d: Ignoring %s\n",
937 raidPtr->raidid, dev_name);
938 } else {
939 rf_print_label_status( raidPtr, r, c,
940 dev_name, ci_label );
941 if (rf_check_label_vitals( raidPtr, r, c,
942 dev_name, ci_label,
943 serial_number,
944 mod_number )) {
945 fatal_error = 1;
946 }
947 if (ci_label->clean != RF_RAID_CLEAN) {
948 parity_good = RF_RAID_DIRTY;
949 }
950 }
951 }
952 }
953 if (fatal_error) {
954 parity_good = RF_RAID_DIRTY;
955 }
956
957 /* we note the state of the parity */
958 raidPtr->parity_good = parity_good;
959
960 return(fatal_error);
961 }
962
963 int
964 rf_add_hot_spare(raidPtr, sparePtr)
965 RF_Raid_t *raidPtr;
966 RF_SingleComponent_t *sparePtr;
967 {
968 RF_RaidDisk_t *disks;
969 RF_DiskQueue_t *spareQueues;
970 int ret;
971 unsigned int bs;
972 int spare_number;
973
974 printf("Just in rf_add_hot_spare: %d\n",raidPtr->numSpare);
975 printf("Num col: %d\n",raidPtr->numCol);
976 if (raidPtr->numSpare >= RF_MAXSPARE) {
977 RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
978 return(EINVAL);
979 }
980
981 RF_LOCK_MUTEX(raidPtr->mutex);
982
983 /* the beginning of the spares... */
984 disks = &raidPtr->Disks[0][raidPtr->numCol];
985
986 spare_number = raidPtr->numSpare;
987
988 ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
989 &disks[spare_number], 0,
990 raidPtr->numCol + spare_number);
991
992 if (ret)
993 goto fail;
994 if (disks[spare_number].status != rf_ds_optimal) {
995 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
996 sparePtr->component_name);
997 ret=EINVAL;
998 goto fail;
999 } else {
1000 disks[spare_number].status = rf_ds_spare;
1001 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", spare_number,
1002 disks[spare_number].devname,
1003 (long int) disks[spare_number].numBlocks,
1004 disks[spare_number].blockSize,
1005 (long int) disks[spare_number].numBlocks *
1006 disks[spare_number].blockSize / 1024 / 1024);
1007 }
1008
1009
1010 /* check sizes and block sizes on the spare disk */
1011 bs = 1 << raidPtr->logBytesPerSector;
1012 if (disks[spare_number].blockSize != bs) {
1013 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
1014 ret = EINVAL;
1015 goto fail;
1016 }
1017 if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
1018 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
1019 disks[spare_number].devname,
1020 disks[spare_number].blockSize,
1021 (long int) raidPtr->sectorsPerDisk);
1022 ret = EINVAL;
1023 goto fail;
1024 } else {
1025 if (disks[spare_number].numBlocks >
1026 raidPtr->sectorsPerDisk) {
1027 RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[spare_number].devname,
1028 (long int) raidPtr->sectorsPerDisk);
1029
1030 disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
1031 }
1032 }
1033
1034 spareQueues = &raidPtr->Queues[0][raidPtr->numCol];
1035 ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
1036 0, raidPtr->numCol + spare_number,
1037 raidPtr->qType,
1038 raidPtr->sectorsPerDisk,
1039 raidPtr->Disks[0][raidPtr->numCol +
1040 spare_number].dev,
1041 raidPtr->maxOutstanding,
1042 &raidPtr->shutdownList,
1043 raidPtr->cleanupList);
1044
1045
1046 raidPtr->numSpare++;
1047 RF_UNLOCK_MUTEX(raidPtr->mutex);
1048 return (0);
1049
1050 fail:
1051 RF_UNLOCK_MUTEX(raidPtr->mutex);
1052 return(ret);
1053 }
1054
1055 int
1056 rf_remove_hot_spare(raidPtr,sparePtr)
1057 RF_Raid_t *raidPtr;
1058 RF_SingleComponent_t *sparePtr;
1059 {
1060 int spare_number;
1061
1062
1063 if (raidPtr->numSpare==0) {
1064 printf("No spares to remove!\n");
1065 return(EINVAL);
1066 }
1067
1068 spare_number = sparePtr->column;
1069
1070 return(EINVAL); /* XXX not implemented yet */
1071 #if 0
1072 if (spare_number < 0 || spare_number > raidPtr->numSpare) {
1073 return(EINVAL);
1074 }
1075
1076 /* verify that this spare isn't in use... */
1077
1078
1079
1080
1081 /* it's gone.. */
1082
1083 raidPtr->numSpare--;
1084
1085 return(0);
1086 #endif
1087 }
1088
1089
1090 int
1091 rf_delete_component(raidPtr,component)
1092 RF_Raid_t *raidPtr;
1093 RF_SingleComponent_t *component;
1094 {
1095 RF_RaidDisk_t *disks;
1096
1097 if ((component->row < 0) ||
1098 (component->row >= raidPtr->numRow) ||
1099 (component->column < 0) ||
1100 (component->column >= raidPtr->numCol)) {
1101 return(EINVAL);
1102 }
1103
1104 disks = &raidPtr->Disks[component->row][component->column];
1105
1106 /* 1. This component must be marked as 'failed' */
1107
1108 return(EINVAL); /* Not implemented yet. */
1109 }
1110
1111 int
1112 rf_incorporate_hot_spare(raidPtr,component)
1113 RF_Raid_t *raidPtr;
1114 RF_SingleComponent_t *component;
1115 {
1116
1117 /* Issues here include how to 'move' this in if there is IO
1118 taking place (e.g. component queues and such) */
1119
1120 return(EINVAL); /* Not implemented yet. */
1121 }
1122