rf_disks.c revision 1.16 1 /* $NetBSD: rf_disks.c,v 1.16 2000/02/23 02:01:55 oster Exp $ */
2 /*-
3 * Copyright (c) 1999 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1995 Carnegie-Mellon University.
40 * All rights reserved.
41 *
42 * Author: Mark Holland
43 *
44 * Permission to use, copy, modify and distribute this software and
45 * its documentation is hereby granted, provided that both the copyright
46 * notice and this permission notice appear in all copies of the
47 * software, derivative works or modified versions, and any portions
48 * thereof, and that both notices appear in supporting documentation.
49 *
50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53 *
54 * Carnegie Mellon requests users of this software to return to
55 *
56 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
57 * School of Computer Science
58 * Carnegie Mellon University
59 * Pittsburgh PA 15213-3890
60 *
61 * any improvements or extensions that they make and grant Carnegie the
62 * rights to redistribute these changes.
63 */
64
65 /***************************************************************
66 * rf_disks.c -- code to perform operations on the actual disks
67 ***************************************************************/
68
69 #include "rf_types.h"
70 #include "rf_raid.h"
71 #include "rf_alloclist.h"
72 #include "rf_utils.h"
73 #include "rf_configure.h"
74 #include "rf_general.h"
75 #include "rf_options.h"
76 #include "rf_kintf.h"
77 #include "rf_netbsd.h"
78
79 #include <sys/types.h>
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/proc.h>
83 #include <sys/ioctl.h>
84 #include <sys/fcntl.h>
85 #include <sys/vnode.h>
86
87 /* XXX these should be in a header file somewhere */
88 void rf_UnconfigureVnodes( RF_Raid_t * );
89 int rf_CheckLabels( RF_Raid_t *, RF_Config_t *);
90 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
91
92 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
93 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
94
95 /**************************************************************************
96 *
97 * initialize the disks comprising the array
98 *
99 * We want the spare disks to have regular row,col numbers so that we can
100 * easily substitue a spare for a failed disk. But, the driver code assumes
101 * throughout that the array contains numRow by numCol _non-spare_ disks, so
102 * it's not clear how to fit in the spares. This is an unfortunate holdover
103 * from raidSim. The quick and dirty fix is to make row zero bigger than the
104 * rest, and put all the spares in it. This probably needs to get changed
105 * eventually.
106 *
107 **************************************************************************/
108
109 int
110 rf_ConfigureDisks( listp, raidPtr, cfgPtr )
111 RF_ShutdownList_t **listp;
112 RF_Raid_t *raidPtr;
113 RF_Config_t *cfgPtr;
114 {
115 RF_RaidDisk_t **disks;
116 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
117 RF_RowCol_t r, c;
118 int bs, ret;
119 unsigned i, count, foundone = 0, numFailuresThisRow;
120 int force;
121
122 force = cfgPtr->force;
123
124 RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *),
125 (RF_RaidDisk_t **), raidPtr->cleanupList);
126 if (disks == NULL) {
127 ret = ENOMEM;
128 goto fail;
129 }
130 raidPtr->Disks = disks;
131
132 /* get space for the device-specific stuff... */
133 RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
134 sizeof(struct raidcinfo *), (struct raidcinfo **),
135 raidPtr->cleanupList);
136 if (raidPtr->raid_cinfo == NULL) {
137 ret = ENOMEM;
138 goto fail;
139 }
140 for (r = 0; r < raidPtr->numRow; r++) {
141 numFailuresThisRow = 0;
142 /* We allocate RF_MAXSPARE on the first row so that we
143 have room to do hot-swapping of spares */
144 RF_CallocAndAdd(disks[r], raidPtr->numCol
145 + ((r == 0) ? RF_MAXSPARE : 0),
146 sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
147 raidPtr->cleanupList);
148 if (disks[r] == NULL) {
149 ret = ENOMEM;
150 goto fail;
151 }
152 /* get more space for device specific stuff.. */
153 RF_CallocAndAdd(raidPtr->raid_cinfo[r],
154 raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0),
155 sizeof(struct raidcinfo), (struct raidcinfo *),
156 raidPtr->cleanupList);
157 if (raidPtr->raid_cinfo[r] == NULL) {
158 ret = ENOMEM;
159 goto fail;
160 }
161 for (c = 0; c < raidPtr->numCol; c++) {
162 ret = rf_ConfigureDisk(raidPtr,
163 &cfgPtr->devnames[r][c][0],
164 &disks[r][c], r, c);
165
166 if (ret)
167 goto fail;
168
169 if (disks[r][c].status == rf_ds_optimal) {
170 raidread_component_label(
171 raidPtr->raid_cinfo[r][c].ci_dev,
172 raidPtr->raid_cinfo[r][c].ci_vp,
173 &raidPtr->raid_cinfo[r][c].ci_label);
174 }
175
176 if (disks[r][c].status != rf_ds_optimal) {
177 numFailuresThisRow++;
178 } else {
179 if (disks[r][c].numBlocks < min_numblks)
180 min_numblks = disks[r][c].numBlocks;
181 DPRINTF7("Disk at row %d col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n",
182 r, c, disks[r][c].devname,
183 (long int) disks[r][c].numBlocks,
184 disks[r][c].blockSize,
185 (long int) disks[r][c].numBlocks *
186 disks[r][c].blockSize / 1024 / 1024);
187 }
188 }
189 /* XXX fix for n-fault tolerant */
190 /* XXX this should probably check to see how many failures
191 we can handle for this configuration! */
192 if (numFailuresThisRow > 0)
193 raidPtr->status[r] = rf_rs_degraded;
194 }
195
196 /* all disks must be the same size & have the same block size, bs must
197 * be a power of 2 */
198 bs = 0;
199 for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) {
200 for (c = 0; !foundone && c < raidPtr->numCol; c++) {
201 if (disks[r][c].status == rf_ds_optimal) {
202 bs = disks[r][c].blockSize;
203 foundone = 1;
204 }
205 }
206 }
207 if (!foundone) {
208 RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
209 ret = EINVAL;
210 goto fail;
211 }
212 for (count = 0, i = 1; i; i <<= 1)
213 if (bs & i)
214 count++;
215 if (count != 1) {
216 RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
217 ret = EINVAL;
218 goto fail;
219 }
220
221 if (rf_CheckLabels( raidPtr, cfgPtr )) {
222 printf("raid%d: There were fatal errors\n", raidPtr->raidid);
223 if (force != 0) {
224 printf("raid%d: Fatal errors being ignored.\n",
225 raidPtr->raidid);
226 } else {
227 ret = EINVAL;
228 goto fail;
229 }
230 }
231
232 for (r = 0; r < raidPtr->numRow; r++) {
233 for (c = 0; c < raidPtr->numCol; c++) {
234 if (disks[r][c].status == rf_ds_optimal) {
235 if (disks[r][c].blockSize != bs) {
236 RF_ERRORMSG2("Error: block size of disk at r %d c %d different from disk at r 0 c 0\n", r, c);
237 ret = EINVAL;
238 goto fail;
239 }
240 if (disks[r][c].numBlocks != min_numblks) {
241 RF_ERRORMSG3("WARNING: truncating disk at r %d c %d to %d blocks\n",
242 r, c, (int) min_numblks);
243 disks[r][c].numBlocks = min_numblks;
244 }
245 }
246 }
247 }
248
249 raidPtr->sectorsPerDisk = min_numblks;
250 raidPtr->logBytesPerSector = ffs(bs) - 1;
251 raidPtr->bytesPerSector = bs;
252 raidPtr->sectorMask = bs - 1;
253 return (0);
254
255 fail:
256
257 rf_UnconfigureVnodes( raidPtr );
258
259 return (ret);
260 }
261
262
263 /****************************************************************************
264 * set up the data structures describing the spare disks in the array
265 * recall from the above comment that the spare disk descriptors are stored
266 * in row zero, which is specially expanded to hold them.
267 ****************************************************************************/
268 int
269 rf_ConfigureSpareDisks( listp, raidPtr, cfgPtr )
270 RF_ShutdownList_t ** listp;
271 RF_Raid_t * raidPtr;
272 RF_Config_t * cfgPtr;
273 {
274 int i, ret;
275 unsigned int bs;
276 RF_RaidDisk_t *disks;
277 int num_spares_done;
278
279 num_spares_done = 0;
280
281 /* The space for the spares should have already been allocated by
282 * ConfigureDisks() */
283
284 disks = &raidPtr->Disks[0][raidPtr->numCol];
285 for (i = 0; i < raidPtr->numSpare; i++) {
286 ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
287 &disks[i], 0, raidPtr->numCol + i);
288 if (ret)
289 goto fail;
290 if (disks[i].status != rf_ds_optimal) {
291 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
292 &cfgPtr->spare_names[i][0]);
293 } else {
294 disks[i].status = rf_ds_spare; /* change status to
295 * spare */
296 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", i,
297 disks[i].devname,
298 (long int) disks[i].numBlocks, disks[i].blockSize,
299 (long int) disks[i].numBlocks *
300 disks[i].blockSize / 1024 / 1024);
301 }
302 num_spares_done++;
303 }
304
305 /* check sizes and block sizes on spare disks */
306 bs = 1 << raidPtr->logBytesPerSector;
307 for (i = 0; i < raidPtr->numSpare; i++) {
308 if (disks[i].blockSize != bs) {
309 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
310 ret = EINVAL;
311 goto fail;
312 }
313 if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
314 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
315 disks[i].devname, disks[i].blockSize,
316 (long int) raidPtr->sectorsPerDisk);
317 ret = EINVAL;
318 goto fail;
319 } else
320 if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
321 RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[i].devname, (long int) raidPtr->sectorsPerDisk);
322
323 disks[i].numBlocks = raidPtr->sectorsPerDisk;
324 }
325 }
326
327 return (0);
328
329 fail:
330
331 /* Release the hold on the main components. We've failed to allocate
332 * a spare, and since we're failing, we need to free things..
333
334 XXX failing to allocate a spare is *not* that big of a deal...
335 We *can* survive without it, if need be, esp. if we get hot
336 adding working.
337
338 If we don't fail out here, then we need a way to remove this spare...
339 that should be easier to do here than if we are "live"...
340
341 */
342
343 rf_UnconfigureVnodes( raidPtr );
344
345 return (ret);
346 }
347
348 static int
349 rf_AllocDiskStructures(raidPtr, cfgPtr)
350 RF_Raid_t *raidPtr;
351 RF_Config_t *cfgPtr;
352 {
353 RF_RaidDisk_t **disks;
354 int ret;
355 int r;
356
357 RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *),
358 (RF_RaidDisk_t **), raidPtr->cleanupList);
359 if (disks == NULL) {
360 ret = ENOMEM;
361 goto fail;
362 }
363 raidPtr->Disks = disks;
364 /* get space for the device-specific stuff... */
365 RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
366 sizeof(struct raidcinfo *), (struct raidcinfo **),
367 raidPtr->cleanupList);
368 if (raidPtr->raid_cinfo == NULL) {
369 ret = ENOMEM;
370 goto fail;
371 }
372
373 for (r = 0; r < raidPtr->numRow; r++) {
374 /* We allocate RF_MAXSPARE on the first row so that we
375 have room to do hot-swapping of spares */
376 RF_CallocAndAdd(disks[r], raidPtr->numCol
377 + ((r == 0) ? RF_MAXSPARE : 0),
378 sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
379 raidPtr->cleanupList);
380 if (disks[r] == NULL) {
381 ret = ENOMEM;
382 goto fail;
383 }
384 /* get more space for device specific stuff.. */
385 RF_CallocAndAdd(raidPtr->raid_cinfo[r],
386 raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0),
387 sizeof(struct raidcinfo), (struct raidcinfo *),
388 raidPtr->cleanupList);
389 if (raidPtr->raid_cinfo[r] == NULL) {
390 ret = ENOMEM;
391 goto fail;
392 }
393 }
394 return(0);
395 fail:
396 rf_UnconfigureVnodes( raidPtr );
397
398 return(ret);
399 }
400
401
402 /* configure a single disk during auto-configuration at boot */
403 int
404 rf_AutoConfigureDisks(raidPtr, cfgPtr, auto_config)
405 RF_Raid_t *raidPtr;
406 RF_Config_t *cfgPtr;
407 RF_AutoConfig_t *auto_config;
408 {
409 RF_RaidDisk_t **disks;
410 RF_RaidDisk_t *diskPtr;
411 RF_RowCol_t r, c;
412 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
413 int bs, ret;
414 int numFailuresThisRow;
415 int force;
416 RF_AutoConfig_t *ac;
417
418 #if DEBUG
419 printf("Starting autoconfiguration of RAID set...\n");
420 #endif
421 force = cfgPtr->force;
422
423 ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
424 if (ret)
425 goto fail;
426
427 disks = raidPtr->Disks;
428
429 for (r = 0; r < raidPtr->numRow; r++) {
430 numFailuresThisRow = 0;
431 for (c = 0; c < raidPtr->numCol; c++) {
432 diskPtr = &disks[r][c];
433
434 /* find this row/col in the autoconfig */
435 #if DEBUG
436 printf("Looking for %d,%d in autoconfig\n",r,c);
437 #endif
438 ac = auto_config;
439 while(ac!=NULL) {
440 if (ac->clabel==NULL) {
441 /* big-time bad news. */
442 goto fail;
443 }
444 if ((ac->clabel->row == r) &&
445 (ac->clabel->column == c)) {
446 /* it's this one... */
447 #if DEBUG
448 printf("Found: %s at %d,%d\n",
449 ac->devname,r,c);
450 #endif
451 break;
452 }
453 ac=ac->next;
454 }
455
456 if (ac!=NULL) {
457 /* Found it. Configure it.. */
458 diskPtr->blockSize = ac->clabel->blockSize;
459 diskPtr->numBlocks = ac->clabel->numBlocks;
460 /* Note: rf_protectedSectors is already
461 factored into numBlocks here */
462 raidPtr->raid_cinfo[r][c].ci_vp = ac->vp;
463 raidPtr->raid_cinfo[r][c].ci_dev = ac->dev;
464
465 memcpy(&raidPtr->raid_cinfo[r][c].ci_label,
466 ac->clabel, sizeof(*ac->clabel));
467 sprintf(diskPtr->devname, "/dev/%s",
468 ac->devname);
469
470 /* note the fact that this component was
471 autoconfigured. You'll need this info
472 later. Trust me :) */
473 diskPtr->auto_configured = 1;
474 diskPtr->dev = ac->dev;
475
476 /*
477 * we allow the user to specify that
478 * only a fraction of the disks should
479 * be used this is just for debug: it
480 * speeds up the parity scan
481 */
482
483 diskPtr->numBlocks = diskPtr->numBlocks *
484 rf_sizePercentage / 100;
485
486 /* XXX these will get set multiple times,
487 but since we're autoconfiguring, they'd
488 better be always the same each time!
489 If not, this is the least of your worries */
490
491 bs = diskPtr->blockSize;
492 min_numblks = diskPtr->numBlocks;
493 } else {
494 /* Didn't find it!! Component must be dead */
495 disks[r][c].status = rf_ds_failed;
496 numFailuresThisRow++;
497 }
498 }
499 /* XXX fix for n-fault tolerant */
500 /* XXX this should probably check to see how many failures
501 we can handle for this configuration! */
502 if (numFailuresThisRow > 0)
503 raidPtr->status[r] = rf_rs_degraded;
504 }
505
506 raidPtr->sectorsPerDisk = min_numblks;
507 raidPtr->logBytesPerSector = ffs(bs) - 1;
508 raidPtr->bytesPerSector = bs;
509 raidPtr->sectorMask = bs - 1;
510 return (0);
511
512 fail:
513
514 rf_UnconfigureVnodes( raidPtr );
515
516 return (ret);
517
518 }
519
520 /* configure a single disk in the array */
521 int
522 rf_ConfigureDisk(raidPtr, buf, diskPtr, row, col)
523 RF_Raid_t *raidPtr;
524 char *buf;
525 RF_RaidDisk_t *diskPtr;
526 RF_RowCol_t row;
527 RF_RowCol_t col;
528 {
529 char *p;
530 int retcode;
531
532 struct partinfo dpart;
533 struct vnode *vp;
534 struct vattr va;
535 struct proc *proc;
536 int error;
537
538 retcode = 0;
539 p = rf_find_non_white(buf);
540 if (p[strlen(p) - 1] == '\n') {
541 /* strip off the newline */
542 p[strlen(p) - 1] = '\0';
543 }
544 (void) strcpy(diskPtr->devname, p);
545
546 proc = raidPtr->engine_thread;
547
548 /* Let's start by claiming the component is fine and well... */
549 diskPtr->status = rf_ds_optimal;
550
551 raidPtr->raid_cinfo[row][col].ci_vp = NULL;
552 raidPtr->raid_cinfo[row][col].ci_dev = NULL;
553
554 error = raidlookup(diskPtr->devname, proc, &vp);
555 if (error) {
556 printf("raidlookup on device: %s failed!\n", diskPtr->devname);
557 if (error == ENXIO) {
558 /* the component isn't there... must be dead :-( */
559 diskPtr->status = rf_ds_failed;
560 } else {
561 return (error);
562 }
563 }
564 if (diskPtr->status == rf_ds_optimal) {
565
566 if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
567 return (error);
568 }
569 error = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart,
570 FREAD, proc->p_ucred, proc);
571 if (error) {
572 return (error);
573 }
574
575 diskPtr->blockSize = dpart.disklab->d_secsize;
576
577 diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
578 diskPtr->partitionSize = dpart.part->p_size;
579
580 raidPtr->raid_cinfo[row][col].ci_vp = vp;
581 raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev;
582
583 /* This component was not automatically configured */
584 diskPtr->auto_configured = 0;
585 diskPtr->dev = va.va_rdev;
586
587 /* we allow the user to specify that only a fraction of the
588 * disks should be used this is just for debug: it speeds up
589 * the parity scan */
590 diskPtr->numBlocks = diskPtr->numBlocks *
591 rf_sizePercentage / 100;
592 }
593 return (0);
594 }
595
596 static void rf_print_label_status( RF_Raid_t *, int, int, char *,
597 RF_ComponentLabel_t *);
598
599 static void
600 rf_print_label_status( raidPtr, row, column, dev_name, ci_label )
601 RF_Raid_t *raidPtr;
602 int row;
603 int column;
604 char *dev_name;
605 RF_ComponentLabel_t *ci_label;
606 {
607
608 printf("raid%d: Component %s being configured at row: %d col: %d\n",
609 raidPtr->raidid, dev_name, row, column );
610 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
611 ci_label->row, ci_label->column,
612 ci_label->num_rows, ci_label->num_columns);
613 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
614 ci_label->version, ci_label->serial_number,
615 ci_label->mod_counter);
616 printf(" Clean: %s Status: %d\n",
617 ci_label->clean ? "Yes" : "No", ci_label->status );
618 }
619
620 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
621 RF_ComponentLabel_t *, int, int );
622 static int rf_check_label_vitals( raidPtr, row, column, dev_name, ci_label,
623 serial_number, mod_counter )
624 RF_Raid_t *raidPtr;
625 int row;
626 int column;
627 char *dev_name;
628 RF_ComponentLabel_t *ci_label;
629 int serial_number;
630 int mod_counter;
631 {
632 int fatal_error = 0;
633
634 if (serial_number != ci_label->serial_number) {
635 printf("%s has a different serial number: %d %d\n",
636 dev_name, serial_number, ci_label->serial_number);
637 fatal_error = 1;
638 }
639 if (mod_counter != ci_label->mod_counter) {
640 printf("%s has a different modfication count: %d %d\n",
641 dev_name, mod_counter, ci_label->mod_counter);
642 }
643
644 if (row != ci_label->row) {
645 printf("Row out of alignment for: %s\n", dev_name);
646 fatal_error = 1;
647 }
648 if (column != ci_label->column) {
649 printf("Column out of alignment for: %s\n", dev_name);
650 fatal_error = 1;
651 }
652 if (raidPtr->numRow != ci_label->num_rows) {
653 printf("Number of rows do not match for: %s\n", dev_name);
654 fatal_error = 1;
655 }
656 if (raidPtr->numCol != ci_label->num_columns) {
657 printf("Number of columns do not match for: %s\n", dev_name);
658 fatal_error = 1;
659 }
660 if (ci_label->clean == 0) {
661 /* it's not clean, but that's not fatal */
662 printf("%s is not clean!\n", dev_name);
663 }
664 return(fatal_error);
665 }
666
667
668 /*
669
670 rf_CheckLabels() - check all the component labels for consistency.
671 Return an error if there is anything major amiss.
672
673 */
674
675 int
676 rf_CheckLabels( raidPtr, cfgPtr )
677 RF_Raid_t *raidPtr;
678 RF_Config_t *cfgPtr;
679 {
680 int r,c;
681 char *dev_name;
682 RF_ComponentLabel_t *ci_label;
683 int serial_number = 0;
684 int mod_number = 0;
685 int fatal_error = 0;
686 int mod_values[4];
687 int mod_count[4];
688 int ser_values[4];
689 int ser_count[4];
690 int num_ser;
691 int num_mod;
692 int i;
693 int found;
694 int hosed_row;
695 int hosed_column;
696 int too_fatal;
697 int parity_good;
698 int force;
699
700 hosed_row = -1;
701 hosed_column = -1;
702 too_fatal = 0;
703 force = cfgPtr->force;
704
705 /*
706 We're going to try to be a little intelligent here. If one
707 component's label is bogus, and we can identify that it's the
708 *only* one that's gone, we'll mark it as "failed" and allow
709 the configuration to proceed. This will be the *only* case
710 that we'll proceed if there would be (otherwise) fatal errors.
711
712 Basically we simply keep a count of how many components had
713 what serial number. If all but one agree, we simply mark
714 the disagreeing component as being failed, and allow
715 things to come up "normally".
716
717 We do this first for serial numbers, and then for "mod_counter".
718
719 */
720
721 num_ser = 0;
722 num_mod = 0;
723 for (r = 0; r < raidPtr->numRow && !fatal_error ; r++) {
724 for (c = 0; c < raidPtr->numCol; c++) {
725 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
726 found=0;
727 for(i=0;i<num_ser;i++) {
728 if (ser_values[i] == ci_label->serial_number) {
729 ser_count[i]++;
730 found=1;
731 break;
732 }
733 }
734 if (!found) {
735 ser_values[num_ser] = ci_label->serial_number;
736 ser_count[num_ser] = 1;
737 num_ser++;
738 if (num_ser>2) {
739 fatal_error = 1;
740 break;
741 }
742 }
743 found=0;
744 for(i=0;i<num_mod;i++) {
745 if (mod_values[i] == ci_label->mod_counter) {
746 mod_count[i]++;
747 found=1;
748 break;
749 }
750 }
751 if (!found) {
752 mod_values[num_mod] = ci_label->mod_counter;
753 mod_count[num_mod] = 1;
754 num_mod++;
755 if (num_mod>2) {
756 fatal_error = 1;
757 break;
758 }
759 }
760 }
761 }
762 #if DEBUG
763 printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
764 for(i=0;i<num_ser;i++) {
765 printf("%d %d\n", ser_values[i], ser_count[i]);
766 }
767 printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
768 for(i=0;i<num_mod;i++) {
769 printf("%d %d\n", mod_values[i], mod_count[i]);
770 }
771 #endif
772 serial_number = ser_values[0];
773 if (num_ser == 2) {
774 if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
775 /* Locate the maverick component */
776 if (ser_count[1] > ser_count[0]) {
777 serial_number = ser_values[1];
778 }
779 for (r = 0; r < raidPtr->numRow; r++) {
780 for (c = 0; c < raidPtr->numCol; c++) {
781 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
782 if (serial_number !=
783 ci_label->serial_number) {
784 hosed_row = r;
785 hosed_column = c;
786 break;
787 }
788 }
789 }
790 printf("Hosed component: %s\n",
791 &cfgPtr->devnames[hosed_row][hosed_column][0]);
792 if (!force) {
793 /* we'll fail this component, as if there are
794 other major errors, we arn't forcing things
795 and we'll abort the config anyways */
796 raidPtr->Disks[hosed_row][hosed_column].status
797 = rf_ds_failed;
798 raidPtr->numFailures++;
799 raidPtr->status[hosed_row] = rf_rs_degraded;
800 }
801 } else {
802 too_fatal = 1;
803 }
804 if (cfgPtr->parityConfig == '0') {
805 /* We've identified two different serial numbers.
806 RAID 0 can't cope with that, so we'll punt */
807 too_fatal = 1;
808 }
809
810 }
811
812 /* record the serial number for later. If we bail later, setting
813 this doesn't matter, otherwise we've got the best guess at the
814 correct serial number */
815 raidPtr->serial_number = serial_number;
816
817 mod_number = mod_values[0];
818 if (num_mod == 2) {
819 if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
820 /* Locate the maverick component */
821 if (mod_count[1] > mod_count[0]) {
822 mod_number = mod_values[1];
823 } else if (mod_count[1] < mod_count[0]) {
824 mod_number = mod_values[0];
825 } else {
826 /* counts of different modification values
827 are the same. Assume greater value is
828 the correct one, all other things
829 considered */
830 if (mod_values[0] > mod_values[1]) {
831 mod_number = mod_values[0];
832 } else {
833 mod_number = mod_values[1];
834 }
835
836 }
837 for (r = 0; r < raidPtr->numRow && !too_fatal ; r++) {
838 for (c = 0; c < raidPtr->numCol; c++) {
839 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
840 if (mod_number !=
841 ci_label->mod_counter) {
842 if ( ( hosed_row == r ) &&
843 ( hosed_column == c )) {
844 /* same one. Can
845 deal with it. */
846 } else {
847 hosed_row = r;
848 hosed_column = c;
849 if (num_ser != 1) {
850 too_fatal = 1;
851 break;
852 }
853 }
854 }
855 }
856 }
857 printf("Hosed component: %s\n",
858 &cfgPtr->devnames[hosed_row][hosed_column][0]);
859 if (!force) {
860 /* we'll fail this component, as if there are
861 other major errors, we arn't forcing things
862 and we'll abort the config anyways */
863 if (raidPtr->Disks[hosed_row][hosed_column].status != rf_ds_failed) {
864 raidPtr->Disks[hosed_row][hosed_column].status
865 = rf_ds_failed;
866 raidPtr->numFailures++;
867 raidPtr->status[hosed_row] = rf_rs_degraded;
868 }
869 }
870 } else {
871 too_fatal = 1;
872 }
873 if (cfgPtr->parityConfig == '0') {
874 /* We've identified two different mod counters.
875 RAID 0 can't cope with that, so we'll punt */
876 too_fatal = 1;
877 }
878 }
879
880 raidPtr->mod_counter = mod_number;
881
882 if (too_fatal) {
883 /* we've had both a serial number mismatch, and a mod_counter
884 mismatch -- and they involved two different components!!
885 Bail -- make things fail so that the user must force
886 the issue... */
887 hosed_row = -1;
888 hosed_column = -1;
889 }
890
891 if (num_ser > 2) {
892 printf("raid%d: Too many different serial numbers!\n",
893 raidPtr->raidid);
894 }
895
896 if (num_mod > 2) {
897 printf("raid%d: Too many different mod counters!\n",
898 raidPtr->raidid);
899 }
900
901 /* we start by assuming the parity will be good, and flee from
902 that notion at the slightest sign of trouble */
903
904 parity_good = RF_RAID_CLEAN;
905 for (r = 0; r < raidPtr->numRow; r++) {
906 for (c = 0; c < raidPtr->numCol; c++) {
907 dev_name = &cfgPtr->devnames[r][c][0];
908 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
909
910 if ((r == hosed_row) && (c == hosed_column)) {
911 printf("raid%d: Ignoring %s\n",
912 raidPtr->raidid, dev_name);
913 } else {
914 rf_print_label_status( raidPtr, r, c,
915 dev_name, ci_label );
916 if (rf_check_label_vitals( raidPtr, r, c,
917 dev_name, ci_label,
918 serial_number,
919 mod_number )) {
920 fatal_error = 1;
921 }
922 if (ci_label->clean != RF_RAID_CLEAN) {
923 parity_good = RF_RAID_DIRTY;
924 }
925 }
926 }
927 }
928 if (fatal_error) {
929 parity_good = RF_RAID_DIRTY;
930 }
931
932 /* we note the state of the parity */
933 raidPtr->parity_good = parity_good;
934
935 return(fatal_error);
936 }
937
938 int rf_add_hot_spare(RF_Raid_t *, RF_SingleComponent_t *);
939 int
940 rf_add_hot_spare(raidPtr, sparePtr)
941 RF_Raid_t *raidPtr;
942 RF_SingleComponent_t *sparePtr;
943 {
944 RF_RaidDisk_t *disks;
945 RF_DiskQueue_t *spareQueues;
946 int ret;
947 unsigned int bs;
948 int spare_number;
949
950 printf("Just in rf_add_hot_spare: %d\n",raidPtr->numSpare);
951 printf("Num col: %d\n",raidPtr->numCol);
952 if (raidPtr->numSpare >= RF_MAXSPARE) {
953 RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
954 return(EINVAL);
955 }
956
957 RF_LOCK_MUTEX(raidPtr->mutex);
958
959 /* the beginning of the spares... */
960 disks = &raidPtr->Disks[0][raidPtr->numCol];
961
962 spare_number = raidPtr->numSpare;
963
964 ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
965 &disks[spare_number], 0,
966 raidPtr->numCol + spare_number);
967
968 if (ret)
969 goto fail;
970 if (disks[spare_number].status != rf_ds_optimal) {
971 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
972 sparePtr->component_name);
973 ret=EINVAL;
974 goto fail;
975 } else {
976 disks[spare_number].status = rf_ds_spare;
977 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", spare_number,
978 disks[spare_number].devname,
979 (long int) disks[spare_number].numBlocks,
980 disks[spare_number].blockSize,
981 (long int) disks[spare_number].numBlocks *
982 disks[spare_number].blockSize / 1024 / 1024);
983 }
984
985
986 /* check sizes and block sizes on the spare disk */
987 bs = 1 << raidPtr->logBytesPerSector;
988 if (disks[spare_number].blockSize != bs) {
989 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
990 ret = EINVAL;
991 goto fail;
992 }
993 if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
994 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
995 disks[spare_number].devname,
996 disks[spare_number].blockSize,
997 (long int) raidPtr->sectorsPerDisk);
998 ret = EINVAL;
999 goto fail;
1000 } else {
1001 if (disks[spare_number].numBlocks >
1002 raidPtr->sectorsPerDisk) {
1003 RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[spare_number].devname,
1004 (long int) raidPtr->sectorsPerDisk);
1005
1006 disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
1007 }
1008 }
1009
1010 spareQueues = &raidPtr->Queues[0][raidPtr->numCol];
1011 ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
1012 0, raidPtr->numCol + spare_number,
1013 raidPtr->Queues[0][0].qPtr, /* XXX */
1014 raidPtr->sectorsPerDisk,
1015 raidPtr->Disks[0][raidPtr->numCol + spare_number].dev,
1016 raidPtr->Queues[0][0].maxOutstanding, /* XXX */
1017 &raidPtr->shutdownList,
1018 raidPtr->cleanupList);
1019
1020
1021 raidPtr->numSpare++;
1022 RF_UNLOCK_MUTEX(raidPtr->mutex);
1023 return (0);
1024
1025 fail:
1026 RF_UNLOCK_MUTEX(raidPtr->mutex);
1027 return(ret);
1028 }
1029
1030 int
1031 rf_remove_hot_spare(raidPtr,sparePtr)
1032 RF_Raid_t *raidPtr;
1033 RF_SingleComponent_t *sparePtr;
1034 {
1035 int spare_number;
1036
1037
1038 if (raidPtr->numSpare==0) {
1039 printf("No spares to remove!\n");
1040 return(EINVAL);
1041 }
1042
1043 spare_number = sparePtr->column;
1044
1045 return(EINVAL); /* XXX not implemented yet */
1046 #if 0
1047 if (spare_number < 0 || spare_number > raidPtr->numSpare) {
1048 return(EINVAL);
1049 }
1050
1051 /* verify that this spare isn't in use... */
1052
1053
1054
1055
1056 /* it's gone.. */
1057
1058 raidPtr->numSpare--;
1059
1060 return(0);
1061 #endif
1062 }
1063
1064
1065