rf_disks.c revision 1.15 1 /* $NetBSD: rf_disks.c,v 1.15 2000/02/13 04:53:57 oster Exp $ */
2 /*-
3 * Copyright (c) 1999 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1995 Carnegie-Mellon University.
40 * All rights reserved.
41 *
42 * Author: Mark Holland
43 *
44 * Permission to use, copy, modify and distribute this software and
45 * its documentation is hereby granted, provided that both the copyright
46 * notice and this permission notice appear in all copies of the
47 * software, derivative works or modified versions, and any portions
48 * thereof, and that both notices appear in supporting documentation.
49 *
50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53 *
54 * Carnegie Mellon requests users of this software to return to
55 *
56 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
57 * School of Computer Science
58 * Carnegie Mellon University
59 * Pittsburgh PA 15213-3890
60 *
61 * any improvements or extensions that they make and grant Carnegie the
62 * rights to redistribute these changes.
63 */
64
65 /***************************************************************
66 * rf_disks.c -- code to perform operations on the actual disks
67 ***************************************************************/
68
69 #include "rf_types.h"
70 #include "rf_raid.h"
71 #include "rf_alloclist.h"
72 #include "rf_utils.h"
73 #include "rf_configure.h"
74 #include "rf_general.h"
75 #include "rf_options.h"
76 #include "rf_kintf.h"
77 #include "rf_netbsd.h"
78
79 #include <sys/types.h>
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/proc.h>
83 #include <sys/ioctl.h>
84 #include <sys/fcntl.h>
85 #include <sys/vnode.h>
86
87 /* XXX these should be in a header file somewhere */
88 void rf_UnconfigureVnodes( RF_Raid_t * );
89 int rf_CheckLabels( RF_Raid_t *, RF_Config_t *);
90 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
91
92 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
93 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
94
95 /**************************************************************************
96 *
97 * initialize the disks comprising the array
98 *
99 * We want the spare disks to have regular row,col numbers so that we can
100 * easily substitue a spare for a failed disk. But, the driver code assumes
101 * throughout that the array contains numRow by numCol _non-spare_ disks, so
102 * it's not clear how to fit in the spares. This is an unfortunate holdover
103 * from raidSim. The quick and dirty fix is to make row zero bigger than the
104 * rest, and put all the spares in it. This probably needs to get changed
105 * eventually.
106 *
107 **************************************************************************/
108
109 int
110 rf_ConfigureDisks( listp, raidPtr, cfgPtr )
111 RF_ShutdownList_t **listp;
112 RF_Raid_t *raidPtr;
113 RF_Config_t *cfgPtr;
114 {
115 RF_RaidDisk_t **disks;
116 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
117 RF_RowCol_t r, c;
118 int bs, ret;
119 unsigned i, count, foundone = 0, numFailuresThisRow;
120 int force;
121
122 force = cfgPtr->force;
123
124 RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *),
125 (RF_RaidDisk_t **), raidPtr->cleanupList);
126 if (disks == NULL) {
127 ret = ENOMEM;
128 goto fail;
129 }
130 raidPtr->Disks = disks;
131
132 /* get space for the device-specific stuff... */
133 RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
134 sizeof(struct raidcinfo *), (struct raidcinfo **),
135 raidPtr->cleanupList);
136 if (raidPtr->raid_cinfo == NULL) {
137 ret = ENOMEM;
138 goto fail;
139 }
140 for (r = 0; r < raidPtr->numRow; r++) {
141 numFailuresThisRow = 0;
142 /* We allocate RF_MAXSPARE on the first row so that we
143 have room to do hot-swapping of spares */
144 RF_CallocAndAdd(disks[r], raidPtr->numCol
145 + ((r == 0) ? RF_MAXSPARE : 0),
146 sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
147 raidPtr->cleanupList);
148 if (disks[r] == NULL) {
149 ret = ENOMEM;
150 goto fail;
151 }
152 /* get more space for device specific stuff.. */
153 RF_CallocAndAdd(raidPtr->raid_cinfo[r],
154 raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0),
155 sizeof(struct raidcinfo), (struct raidcinfo *),
156 raidPtr->cleanupList);
157 if (raidPtr->raid_cinfo[r] == NULL) {
158 ret = ENOMEM;
159 goto fail;
160 }
161 for (c = 0; c < raidPtr->numCol; c++) {
162 ret = rf_ConfigureDisk(raidPtr,
163 &cfgPtr->devnames[r][c][0],
164 &disks[r][c], r, c);
165
166 if (ret)
167 goto fail;
168
169 if (disks[r][c].status == rf_ds_optimal) {
170 raidread_component_label(
171 raidPtr->raid_cinfo[r][c].ci_dev,
172 raidPtr->raid_cinfo[r][c].ci_vp,
173 &raidPtr->raid_cinfo[r][c].ci_label);
174 }
175
176 if (disks[r][c].status != rf_ds_optimal) {
177 numFailuresThisRow++;
178 } else {
179 if (disks[r][c].numBlocks < min_numblks)
180 min_numblks = disks[r][c].numBlocks;
181 DPRINTF7("Disk at row %d col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n",
182 r, c, disks[r][c].devname,
183 (long int) disks[r][c].numBlocks,
184 disks[r][c].blockSize,
185 (long int) disks[r][c].numBlocks *
186 disks[r][c].blockSize / 1024 / 1024);
187 }
188 }
189 /* XXX fix for n-fault tolerant */
190 /* XXX this should probably check to see how many failures
191 we can handle for this configuration! */
192 if (numFailuresThisRow > 0)
193 raidPtr->status[r] = rf_rs_degraded;
194 }
195
196 /* all disks must be the same size & have the same block size, bs must
197 * be a power of 2 */
198 bs = 0;
199 for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) {
200 for (c = 0; !foundone && c < raidPtr->numCol; c++) {
201 if (disks[r][c].status == rf_ds_optimal) {
202 bs = disks[r][c].blockSize;
203 foundone = 1;
204 }
205 }
206 }
207 if (!foundone) {
208 RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
209 ret = EINVAL;
210 goto fail;
211 }
212 for (count = 0, i = 1; i; i <<= 1)
213 if (bs & i)
214 count++;
215 if (count != 1) {
216 RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
217 ret = EINVAL;
218 goto fail;
219 }
220
221 if (rf_CheckLabels( raidPtr, cfgPtr )) {
222 printf("raid%d: There were fatal errors\n", raidPtr->raidid);
223 if (force != 0) {
224 printf("raid%d: Fatal errors being ignored.\n",
225 raidPtr->raidid);
226 } else {
227 ret = EINVAL;
228 goto fail;
229 }
230 }
231
232 for (r = 0; r < raidPtr->numRow; r++) {
233 for (c = 0; c < raidPtr->numCol; c++) {
234 if (disks[r][c].status == rf_ds_optimal) {
235 if (disks[r][c].blockSize != bs) {
236 RF_ERRORMSG2("Error: block size of disk at r %d c %d different from disk at r 0 c 0\n", r, c);
237 ret = EINVAL;
238 goto fail;
239 }
240 if (disks[r][c].numBlocks != min_numblks) {
241 RF_ERRORMSG3("WARNING: truncating disk at r %d c %d to %d blocks\n",
242 r, c, (int) min_numblks);
243 disks[r][c].numBlocks = min_numblks;
244 }
245 }
246 }
247 }
248
249 raidPtr->sectorsPerDisk = min_numblks;
250 raidPtr->logBytesPerSector = ffs(bs) - 1;
251 raidPtr->bytesPerSector = bs;
252 raidPtr->sectorMask = bs - 1;
253 return (0);
254
255 fail:
256
257 rf_UnconfigureVnodes( raidPtr );
258
259 return (ret);
260 }
261
262
263 /****************************************************************************
264 * set up the data structures describing the spare disks in the array
265 * recall from the above comment that the spare disk descriptors are stored
266 * in row zero, which is specially expanded to hold them.
267 ****************************************************************************/
268 int
269 rf_ConfigureSpareDisks( listp, raidPtr, cfgPtr )
270 RF_ShutdownList_t ** listp;
271 RF_Raid_t * raidPtr;
272 RF_Config_t * cfgPtr;
273 {
274 int i, ret;
275 unsigned int bs;
276 RF_RaidDisk_t *disks;
277 int num_spares_done;
278
279 num_spares_done = 0;
280
281 /* The space for the spares should have already been allocated by
282 * ConfigureDisks() */
283
284 disks = &raidPtr->Disks[0][raidPtr->numCol];
285 for (i = 0; i < raidPtr->numSpare; i++) {
286 ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
287 &disks[i], 0, raidPtr->numCol + i);
288 if (ret)
289 goto fail;
290 if (disks[i].status != rf_ds_optimal) {
291 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
292 &cfgPtr->spare_names[i][0]);
293 } else {
294 disks[i].status = rf_ds_spare; /* change status to
295 * spare */
296 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", i,
297 disks[i].devname,
298 (long int) disks[i].numBlocks, disks[i].blockSize,
299 (long int) disks[i].numBlocks *
300 disks[i].blockSize / 1024 / 1024);
301 }
302 num_spares_done++;
303 }
304
305 /* check sizes and block sizes on spare disks */
306 bs = 1 << raidPtr->logBytesPerSector;
307 for (i = 0; i < raidPtr->numSpare; i++) {
308 if (disks[i].blockSize != bs) {
309 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
310 ret = EINVAL;
311 goto fail;
312 }
313 if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
314 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
315 disks[i].devname, disks[i].blockSize,
316 (long int) raidPtr->sectorsPerDisk);
317 ret = EINVAL;
318 goto fail;
319 } else
320 if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
321 RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[i].devname, (long int) raidPtr->sectorsPerDisk);
322
323 disks[i].numBlocks = raidPtr->sectorsPerDisk;
324 }
325 }
326
327 return (0);
328
329 fail:
330
331 /* Release the hold on the main components. We've failed to allocate
332 * a spare, and since we're failing, we need to free things..
333
334 XXX failing to allocate a spare is *not* that big of a deal...
335 We *can* survive without it, if need be, esp. if we get hot
336 adding working.
337
338 If we don't fail out here, then we need a way to remove this spare...
339 that should be easier to do here than if we are "live"...
340
341 */
342
343 rf_UnconfigureVnodes( raidPtr );
344
345 return (ret);
346 }
347
348 static int
349 rf_AllocDiskStructures(raidPtr, cfgPtr)
350 RF_Raid_t *raidPtr;
351 RF_Config_t *cfgPtr;
352 {
353 RF_RaidDisk_t **disks;
354 int ret;
355 int r;
356
357 RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *),
358 (RF_RaidDisk_t **), raidPtr->cleanupList);
359 if (disks == NULL) {
360 ret = ENOMEM;
361 goto fail;
362 }
363 raidPtr->Disks = disks;
364 /* get space for the device-specific stuff... */
365 RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
366 sizeof(struct raidcinfo *), (struct raidcinfo **),
367 raidPtr->cleanupList);
368 if (raidPtr->raid_cinfo == NULL) {
369 ret = ENOMEM;
370 goto fail;
371 }
372
373 for (r = 0; r < raidPtr->numRow; r++) {
374 /* We allocate RF_MAXSPARE on the first row so that we
375 have room to do hot-swapping of spares */
376 RF_CallocAndAdd(disks[r], raidPtr->numCol
377 + ((r == 0) ? RF_MAXSPARE : 0),
378 sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
379 raidPtr->cleanupList);
380 if (disks[r] == NULL) {
381 ret = ENOMEM;
382 goto fail;
383 }
384 /* get more space for device specific stuff.. */
385 RF_CallocAndAdd(raidPtr->raid_cinfo[r],
386 raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0),
387 sizeof(struct raidcinfo), (struct raidcinfo *),
388 raidPtr->cleanupList);
389 if (raidPtr->raid_cinfo[r] == NULL) {
390 ret = ENOMEM;
391 goto fail;
392 }
393 }
394 return(0);
395 fail:
396 rf_UnconfigureVnodes( raidPtr );
397
398 return(ret);
399 }
400
401
402 /* configure a single disk during auto-configuration at boot */
403 int
404 rf_AutoConfigureDisks(raidPtr, cfgPtr, auto_config)
405 RF_Raid_t *raidPtr;
406 RF_Config_t *cfgPtr;
407 RF_AutoConfig_t *auto_config;
408 {
409 RF_RaidDisk_t **disks;
410 RF_RaidDisk_t *diskPtr;
411 RF_RowCol_t r, c;
412 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
413 int bs, ret;
414 int numFailuresThisRow;
415 int force;
416 RF_AutoConfig_t *ac;
417
418 #if DEBUG
419 printf("Starting autoconfiguration of RAID set...\n");
420 #endif
421 force = cfgPtr->force;
422
423 ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
424 if (ret)
425 goto fail;
426
427 disks = raidPtr->Disks;
428
429 for (r = 0; r < raidPtr->numRow; r++) {
430 numFailuresThisRow = 0;
431 for (c = 0; c < raidPtr->numCol; c++) {
432 diskPtr = &disks[r][c];
433
434 /* find this row/col in the autoconfig */
435 #if DEBUG
436 printf("Looking for %d,%d in autoconfig\n",r,c);
437 #endif
438 ac = auto_config;
439 while(ac!=NULL) {
440 if (ac->clabel==NULL) {
441 /* big-time bad news. */
442 goto fail;
443 }
444 if ((ac->clabel->row == r) &&
445 (ac->clabel->column == c)) {
446 /* it's this one... */
447 #if DEBUG
448 printf("Found: %s at %d,%d\n",
449 ac->devname,r,c);
450 #endif
451 break;
452 }
453 ac=ac->next;
454 }
455
456 if (ac!=NULL) {
457 /* Found it. Configure it.. */
458 diskPtr->blockSize = ac->clabel->blockSize;
459 diskPtr->numBlocks = ac->clabel->numBlocks;
460 /* Note: rf_protectedSectors is already
461 factored into numBlocks here */
462 raidPtr->raid_cinfo[r][c].ci_vp = ac->vp;
463 raidPtr->raid_cinfo[r][c].ci_dev = ac->dev;
464
465 memcpy(&raidPtr->raid_cinfo[r][c].ci_label,
466 ac->clabel, sizeof(*ac->clabel));
467 sprintf(diskPtr->devname, "/dev/%s",
468 ac->devname);
469 diskPtr->dev = ac->dev;
470
471 /*
472 * we allow the user to specify that
473 * only a fraction of the disks should
474 * be used this is just for debug: it
475 * speeds up the parity scan
476 */
477
478 diskPtr->numBlocks = diskPtr->numBlocks *
479 rf_sizePercentage / 100;
480
481 /* XXX these will get set multiple times,
482 but since we're autoconfiguring, they'd
483 better be always the same each time!
484 If not, this is the least of your worries */
485
486 bs = diskPtr->blockSize;
487 min_numblks = diskPtr->numBlocks;
488 } else {
489 /* Didn't find it!! Component must be dead */
490 disks[r][c].status = rf_ds_failed;
491 numFailuresThisRow++;
492 }
493 }
494 /* XXX fix for n-fault tolerant */
495 /* XXX this should probably check to see how many failures
496 we can handle for this configuration! */
497 if (numFailuresThisRow > 0)
498 raidPtr->status[r] = rf_rs_degraded;
499 }
500
501 raidPtr->sectorsPerDisk = min_numblks;
502 raidPtr->logBytesPerSector = ffs(bs) - 1;
503 raidPtr->bytesPerSector = bs;
504 raidPtr->sectorMask = bs - 1;
505 return (0);
506
507 fail:
508
509 rf_UnconfigureVnodes( raidPtr );
510
511 return (ret);
512
513 }
514
515 /* configure a single disk in the array */
516 int
517 rf_ConfigureDisk(raidPtr, buf, diskPtr, row, col)
518 RF_Raid_t *raidPtr;
519 char *buf;
520 RF_RaidDisk_t *diskPtr;
521 RF_RowCol_t row;
522 RF_RowCol_t col;
523 {
524 char *p;
525 int retcode;
526
527 struct partinfo dpart;
528 struct vnode *vp;
529 struct vattr va;
530 struct proc *proc;
531 int error;
532
533 retcode = 0;
534 p = rf_find_non_white(buf);
535 if (p[strlen(p) - 1] == '\n') {
536 /* strip off the newline */
537 p[strlen(p) - 1] = '\0';
538 }
539 (void) strcpy(diskPtr->devname, p);
540
541 proc = raidPtr->engine_thread;
542
543 /* Let's start by claiming the component is fine and well... */
544 diskPtr->status = rf_ds_optimal;
545
546 raidPtr->raid_cinfo[row][col].ci_vp = NULL;
547 raidPtr->raid_cinfo[row][col].ci_dev = NULL;
548
549 error = raidlookup(diskPtr->devname, proc, &vp);
550 if (error) {
551 printf("raidlookup on device: %s failed!\n", diskPtr->devname);
552 if (error == ENXIO) {
553 /* the component isn't there... must be dead :-( */
554 diskPtr->status = rf_ds_failed;
555 } else {
556 return (error);
557 }
558 }
559 if (diskPtr->status == rf_ds_optimal) {
560
561 if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
562 return (error);
563 }
564 error = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart,
565 FREAD, proc->p_ucred, proc);
566 if (error) {
567 return (error);
568 }
569
570 diskPtr->blockSize = dpart.disklab->d_secsize;
571
572 diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
573 diskPtr->partitionSize = dpart.part->p_size;
574
575 raidPtr->raid_cinfo[row][col].ci_vp = vp;
576 raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev;
577
578 diskPtr->dev = va.va_rdev;
579
580 /* we allow the user to specify that only a fraction of the
581 * disks should be used this is just for debug: it speeds up
582 * the parity scan */
583 diskPtr->numBlocks = diskPtr->numBlocks *
584 rf_sizePercentage / 100;
585 }
586 return (0);
587 }
588
589 static void rf_print_label_status( RF_Raid_t *, int, int, char *,
590 RF_ComponentLabel_t *);
591
592 static void
593 rf_print_label_status( raidPtr, row, column, dev_name, ci_label )
594 RF_Raid_t *raidPtr;
595 int row;
596 int column;
597 char *dev_name;
598 RF_ComponentLabel_t *ci_label;
599 {
600
601 printf("raid%d: Component %s being configured at row: %d col: %d\n",
602 raidPtr->raidid, dev_name, row, column );
603 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
604 ci_label->row, ci_label->column,
605 ci_label->num_rows, ci_label->num_columns);
606 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
607 ci_label->version, ci_label->serial_number,
608 ci_label->mod_counter);
609 printf(" Clean: %s Status: %d\n",
610 ci_label->clean ? "Yes" : "No", ci_label->status );
611 }
612
613 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
614 RF_ComponentLabel_t *, int, int );
615 static int rf_check_label_vitals( raidPtr, row, column, dev_name, ci_label,
616 serial_number, mod_counter )
617 RF_Raid_t *raidPtr;
618 int row;
619 int column;
620 char *dev_name;
621 RF_ComponentLabel_t *ci_label;
622 int serial_number;
623 int mod_counter;
624 {
625 int fatal_error = 0;
626
627 if (serial_number != ci_label->serial_number) {
628 printf("%s has a different serial number: %d %d\n",
629 dev_name, serial_number, ci_label->serial_number);
630 fatal_error = 1;
631 }
632 if (mod_counter != ci_label->mod_counter) {
633 printf("%s has a different modfication count: %d %d\n",
634 dev_name, mod_counter, ci_label->mod_counter);
635 }
636
637 if (row != ci_label->row) {
638 printf("Row out of alignment for: %s\n", dev_name);
639 fatal_error = 1;
640 }
641 if (column != ci_label->column) {
642 printf("Column out of alignment for: %s\n", dev_name);
643 fatal_error = 1;
644 }
645 if (raidPtr->numRow != ci_label->num_rows) {
646 printf("Number of rows do not match for: %s\n", dev_name);
647 fatal_error = 1;
648 }
649 if (raidPtr->numCol != ci_label->num_columns) {
650 printf("Number of columns do not match for: %s\n", dev_name);
651 fatal_error = 1;
652 }
653 if (ci_label->clean == 0) {
654 /* it's not clean, but that's not fatal */
655 printf("%s is not clean!\n", dev_name);
656 }
657 return(fatal_error);
658 }
659
660
661 /*
662
663 rf_CheckLabels() - check all the component labels for consistency.
664 Return an error if there is anything major amiss.
665
666 */
667
668 int
669 rf_CheckLabels( raidPtr, cfgPtr )
670 RF_Raid_t *raidPtr;
671 RF_Config_t *cfgPtr;
672 {
673 int r,c;
674 char *dev_name;
675 RF_ComponentLabel_t *ci_label;
676 int serial_number = 0;
677 int mod_number = 0;
678 int fatal_error = 0;
679 int mod_values[4];
680 int mod_count[4];
681 int ser_values[4];
682 int ser_count[4];
683 int num_ser;
684 int num_mod;
685 int i;
686 int found;
687 int hosed_row;
688 int hosed_column;
689 int too_fatal;
690 int parity_good;
691 int force;
692
693 hosed_row = -1;
694 hosed_column = -1;
695 too_fatal = 0;
696 force = cfgPtr->force;
697
698 /*
699 We're going to try to be a little intelligent here. If one
700 component's label is bogus, and we can identify that it's the
701 *only* one that's gone, we'll mark it as "failed" and allow
702 the configuration to proceed. This will be the *only* case
703 that we'll proceed if there would be (otherwise) fatal errors.
704
705 Basically we simply keep a count of how many components had
706 what serial number. If all but one agree, we simply mark
707 the disagreeing component as being failed, and allow
708 things to come up "normally".
709
710 We do this first for serial numbers, and then for "mod_counter".
711
712 */
713
714 num_ser = 0;
715 num_mod = 0;
716 for (r = 0; r < raidPtr->numRow && !fatal_error ; r++) {
717 for (c = 0; c < raidPtr->numCol; c++) {
718 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
719 found=0;
720 for(i=0;i<num_ser;i++) {
721 if (ser_values[i] == ci_label->serial_number) {
722 ser_count[i]++;
723 found=1;
724 break;
725 }
726 }
727 if (!found) {
728 ser_values[num_ser] = ci_label->serial_number;
729 ser_count[num_ser] = 1;
730 num_ser++;
731 if (num_ser>2) {
732 fatal_error = 1;
733 break;
734 }
735 }
736 found=0;
737 for(i=0;i<num_mod;i++) {
738 if (mod_values[i] == ci_label->mod_counter) {
739 mod_count[i]++;
740 found=1;
741 break;
742 }
743 }
744 if (!found) {
745 mod_values[num_mod] = ci_label->mod_counter;
746 mod_count[num_mod] = 1;
747 num_mod++;
748 if (num_mod>2) {
749 fatal_error = 1;
750 break;
751 }
752 }
753 }
754 }
755 #if DEBUG
756 printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
757 for(i=0;i<num_ser;i++) {
758 printf("%d %d\n", ser_values[i], ser_count[i]);
759 }
760 printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
761 for(i=0;i<num_mod;i++) {
762 printf("%d %d\n", mod_values[i], mod_count[i]);
763 }
764 #endif
765 serial_number = ser_values[0];
766 if (num_ser == 2) {
767 if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
768 /* Locate the maverick component */
769 if (ser_count[1] > ser_count[0]) {
770 serial_number = ser_values[1];
771 }
772 for (r = 0; r < raidPtr->numRow; r++) {
773 for (c = 0; c < raidPtr->numCol; c++) {
774 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
775 if (serial_number !=
776 ci_label->serial_number) {
777 hosed_row = r;
778 hosed_column = c;
779 break;
780 }
781 }
782 }
783 printf("Hosed component: %s\n",
784 &cfgPtr->devnames[hosed_row][hosed_column][0]);
785 if (!force) {
786 /* we'll fail this component, as if there are
787 other major errors, we arn't forcing things
788 and we'll abort the config anyways */
789 raidPtr->Disks[hosed_row][hosed_column].status
790 = rf_ds_failed;
791 raidPtr->numFailures++;
792 raidPtr->status[hosed_row] = rf_rs_degraded;
793 }
794 } else {
795 too_fatal = 1;
796 }
797 if (cfgPtr->parityConfig == '0') {
798 /* We've identified two different serial numbers.
799 RAID 0 can't cope with that, so we'll punt */
800 too_fatal = 1;
801 }
802
803 }
804
805 /* record the serial number for later. If we bail later, setting
806 this doesn't matter, otherwise we've got the best guess at the
807 correct serial number */
808 raidPtr->serial_number = serial_number;
809
810 mod_number = mod_values[0];
811 if (num_mod == 2) {
812 if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
813 /* Locate the maverick component */
814 if (mod_count[1] > mod_count[0]) {
815 mod_number = mod_values[1];
816 } else if (mod_count[1] < mod_count[0]) {
817 mod_number = mod_values[0];
818 } else {
819 /* counts of different modification values
820 are the same. Assume greater value is
821 the correct one, all other things
822 considered */
823 if (mod_values[0] > mod_values[1]) {
824 mod_number = mod_values[0];
825 } else {
826 mod_number = mod_values[1];
827 }
828
829 }
830 for (r = 0; r < raidPtr->numRow && !too_fatal ; r++) {
831 for (c = 0; c < raidPtr->numCol; c++) {
832 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
833 if (mod_number !=
834 ci_label->mod_counter) {
835 if ( ( hosed_row == r ) &&
836 ( hosed_column == c )) {
837 /* same one. Can
838 deal with it. */
839 } else {
840 hosed_row = r;
841 hosed_column = c;
842 if (num_ser != 1) {
843 too_fatal = 1;
844 break;
845 }
846 }
847 }
848 }
849 }
850 printf("Hosed component: %s\n",
851 &cfgPtr->devnames[hosed_row][hosed_column][0]);
852 if (!force) {
853 /* we'll fail this component, as if there are
854 other major errors, we arn't forcing things
855 and we'll abort the config anyways */
856 if (raidPtr->Disks[hosed_row][hosed_column].status != rf_ds_failed) {
857 raidPtr->Disks[hosed_row][hosed_column].status
858 = rf_ds_failed;
859 raidPtr->numFailures++;
860 raidPtr->status[hosed_row] = rf_rs_degraded;
861 }
862 }
863 } else {
864 too_fatal = 1;
865 }
866 if (cfgPtr->parityConfig == '0') {
867 /* We've identified two different mod counters.
868 RAID 0 can't cope with that, so we'll punt */
869 too_fatal = 1;
870 }
871 }
872
873 raidPtr->mod_counter = mod_number;
874
875 if (too_fatal) {
876 /* we've had both a serial number mismatch, and a mod_counter
877 mismatch -- and they involved two different components!!
878 Bail -- make things fail so that the user must force
879 the issue... */
880 hosed_row = -1;
881 hosed_column = -1;
882 }
883
884 if (num_ser > 2) {
885 printf("raid%d: Too many different serial numbers!\n",
886 raidPtr->raidid);
887 }
888
889 if (num_mod > 2) {
890 printf("raid%d: Too many different mod counters!\n",
891 raidPtr->raidid);
892 }
893
894 /* we start by assuming the parity will be good, and flee from
895 that notion at the slightest sign of trouble */
896
897 parity_good = RF_RAID_CLEAN;
898 for (r = 0; r < raidPtr->numRow; r++) {
899 for (c = 0; c < raidPtr->numCol; c++) {
900 dev_name = &cfgPtr->devnames[r][c][0];
901 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
902
903 if ((r == hosed_row) && (c == hosed_column)) {
904 printf("raid%d: Ignoring %s\n",
905 raidPtr->raidid, dev_name);
906 } else {
907 rf_print_label_status( raidPtr, r, c,
908 dev_name, ci_label );
909 if (rf_check_label_vitals( raidPtr, r, c,
910 dev_name, ci_label,
911 serial_number,
912 mod_number )) {
913 fatal_error = 1;
914 }
915 if (ci_label->clean != RF_RAID_CLEAN) {
916 parity_good = RF_RAID_DIRTY;
917 }
918 }
919 }
920 }
921 if (fatal_error) {
922 parity_good = RF_RAID_DIRTY;
923 }
924
925 /* we note the state of the parity */
926 raidPtr->parity_good = parity_good;
927
928 return(fatal_error);
929 }
930
931 int rf_add_hot_spare(RF_Raid_t *, RF_SingleComponent_t *);
932 int
933 rf_add_hot_spare(raidPtr, sparePtr)
934 RF_Raid_t *raidPtr;
935 RF_SingleComponent_t *sparePtr;
936 {
937 RF_RaidDisk_t *disks;
938 RF_DiskQueue_t *spareQueues;
939 int ret;
940 unsigned int bs;
941 int spare_number;
942
943 printf("Just in rf_add_hot_spare: %d\n",raidPtr->numSpare);
944 printf("Num col: %d\n",raidPtr->numCol);
945 if (raidPtr->numSpare >= RF_MAXSPARE) {
946 RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
947 return(EINVAL);
948 }
949
950 RF_LOCK_MUTEX(raidPtr->mutex);
951
952 /* the beginning of the spares... */
953 disks = &raidPtr->Disks[0][raidPtr->numCol];
954
955 spare_number = raidPtr->numSpare;
956
957 ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
958 &disks[spare_number], 0,
959 raidPtr->numCol + spare_number);
960
961 if (ret)
962 goto fail;
963 if (disks[spare_number].status != rf_ds_optimal) {
964 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
965 sparePtr->component_name);
966 ret=EINVAL;
967 goto fail;
968 } else {
969 disks[spare_number].status = rf_ds_spare;
970 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", spare_number,
971 disks[spare_number].devname,
972 (long int) disks[spare_number].numBlocks,
973 disks[spare_number].blockSize,
974 (long int) disks[spare_number].numBlocks *
975 disks[spare_number].blockSize / 1024 / 1024);
976 }
977
978
979 /* check sizes and block sizes on the spare disk */
980 bs = 1 << raidPtr->logBytesPerSector;
981 if (disks[spare_number].blockSize != bs) {
982 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
983 ret = EINVAL;
984 goto fail;
985 }
986 if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
987 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
988 disks[spare_number].devname,
989 disks[spare_number].blockSize,
990 (long int) raidPtr->sectorsPerDisk);
991 ret = EINVAL;
992 goto fail;
993 } else {
994 if (disks[spare_number].numBlocks >
995 raidPtr->sectorsPerDisk) {
996 RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[spare_number].devname,
997 (long int) raidPtr->sectorsPerDisk);
998
999 disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
1000 }
1001 }
1002
1003 spareQueues = &raidPtr->Queues[0][raidPtr->numCol];
1004 ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
1005 0, raidPtr->numCol + spare_number,
1006 raidPtr->Queues[0][0].qPtr, /* XXX */
1007 raidPtr->sectorsPerDisk,
1008 raidPtr->Disks[0][raidPtr->numCol + spare_number].dev,
1009 raidPtr->Queues[0][0].maxOutstanding, /* XXX */
1010 &raidPtr->shutdownList,
1011 raidPtr->cleanupList);
1012
1013
1014 raidPtr->numSpare++;
1015 RF_UNLOCK_MUTEX(raidPtr->mutex);
1016 return (0);
1017
1018 fail:
1019 RF_UNLOCK_MUTEX(raidPtr->mutex);
1020 return(ret);
1021 }
1022
1023 int
1024 rf_remove_hot_spare(raidPtr,sparePtr)
1025 RF_Raid_t *raidPtr;
1026 RF_SingleComponent_t *sparePtr;
1027 {
1028 int spare_number;
1029
1030
1031 if (raidPtr->numSpare==0) {
1032 printf("No spares to remove!\n");
1033 return(EINVAL);
1034 }
1035
1036 spare_number = sparePtr->column;
1037
1038 return(EINVAL); /* XXX not implemented yet */
1039 #if 0
1040 if (spare_number < 0 || spare_number > raidPtr->numSpare) {
1041 return(EINVAL);
1042 }
1043
1044 /* verify that this spare isn't in use... */
1045
1046
1047
1048
1049 /* it's gone.. */
1050
1051 raidPtr->numSpare--;
1052
1053 return(0);
1054 #endif
1055 }
1056
1057
1058