rf_disks.c revision 1.18 1 /* $NetBSD: rf_disks.c,v 1.18 2000/02/24 02:55:05 oster Exp $ */
2 /*-
3 * Copyright (c) 1999 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1995 Carnegie-Mellon University.
40 * All rights reserved.
41 *
42 * Author: Mark Holland
43 *
44 * Permission to use, copy, modify and distribute this software and
45 * its documentation is hereby granted, provided that both the copyright
46 * notice and this permission notice appear in all copies of the
47 * software, derivative works or modified versions, and any portions
48 * thereof, and that both notices appear in supporting documentation.
49 *
50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53 *
54 * Carnegie Mellon requests users of this software to return to
55 *
56 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
57 * School of Computer Science
58 * Carnegie Mellon University
59 * Pittsburgh PA 15213-3890
60 *
61 * any improvements or extensions that they make and grant Carnegie the
62 * rights to redistribute these changes.
63 */
64
65 /***************************************************************
66 * rf_disks.c -- code to perform operations on the actual disks
67 ***************************************************************/
68
69 #include "rf_types.h"
70 #include "rf_raid.h"
71 #include "rf_alloclist.h"
72 #include "rf_utils.h"
73 #include "rf_configure.h"
74 #include "rf_general.h"
75 #include "rf_options.h"
76 #include "rf_kintf.h"
77 #include "rf_netbsd.h"
78
79 #include <sys/types.h>
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/proc.h>
83 #include <sys/ioctl.h>
84 #include <sys/fcntl.h>
85 #include <sys/vnode.h>
86
87 /* XXX these should be in a header file somewhere */
88 void rf_UnconfigureVnodes( RF_Raid_t * );
89 int rf_CheckLabels( RF_Raid_t *, RF_Config_t *);
90 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
91
92 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
93 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
94
95 /**************************************************************************
96 *
97 * initialize the disks comprising the array
98 *
99 * We want the spare disks to have regular row,col numbers so that we can
100 * easily substitue a spare for a failed disk. But, the driver code assumes
101 * throughout that the array contains numRow by numCol _non-spare_ disks, so
102 * it's not clear how to fit in the spares. This is an unfortunate holdover
103 * from raidSim. The quick and dirty fix is to make row zero bigger than the
104 * rest, and put all the spares in it. This probably needs to get changed
105 * eventually.
106 *
107 **************************************************************************/
108
109 int
110 rf_ConfigureDisks( listp, raidPtr, cfgPtr )
111 RF_ShutdownList_t **listp;
112 RF_Raid_t *raidPtr;
113 RF_Config_t *cfgPtr;
114 {
115 RF_RaidDisk_t **disks;
116 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
117 RF_RowCol_t r, c;
118 int bs, ret;
119 unsigned i, count, foundone = 0, numFailuresThisRow;
120 int force;
121
122 force = cfgPtr->force;
123
124 RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *),
125 (RF_RaidDisk_t **), raidPtr->cleanupList);
126 if (disks == NULL) {
127 ret = ENOMEM;
128 goto fail;
129 }
130 raidPtr->Disks = disks;
131
132 /* get space for the device-specific stuff... */
133 RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
134 sizeof(struct raidcinfo *), (struct raidcinfo **),
135 raidPtr->cleanupList);
136 if (raidPtr->raid_cinfo == NULL) {
137 ret = ENOMEM;
138 goto fail;
139 }
140 for (r = 0; r < raidPtr->numRow; r++) {
141 numFailuresThisRow = 0;
142 /* We allocate RF_MAXSPARE on the first row so that we
143 have room to do hot-swapping of spares */
144 RF_CallocAndAdd(disks[r], raidPtr->numCol
145 + ((r == 0) ? RF_MAXSPARE : 0),
146 sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
147 raidPtr->cleanupList);
148 if (disks[r] == NULL) {
149 ret = ENOMEM;
150 goto fail;
151 }
152 /* get more space for device specific stuff.. */
153 RF_CallocAndAdd(raidPtr->raid_cinfo[r],
154 raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0),
155 sizeof(struct raidcinfo), (struct raidcinfo *),
156 raidPtr->cleanupList);
157 if (raidPtr->raid_cinfo[r] == NULL) {
158 ret = ENOMEM;
159 goto fail;
160 }
161 for (c = 0; c < raidPtr->numCol; c++) {
162 ret = rf_ConfigureDisk(raidPtr,
163 &cfgPtr->devnames[r][c][0],
164 &disks[r][c], r, c);
165
166 if (ret)
167 goto fail;
168
169 if (disks[r][c].status == rf_ds_optimal) {
170 raidread_component_label(
171 raidPtr->raid_cinfo[r][c].ci_dev,
172 raidPtr->raid_cinfo[r][c].ci_vp,
173 &raidPtr->raid_cinfo[r][c].ci_label);
174 }
175
176 if (disks[r][c].status != rf_ds_optimal) {
177 numFailuresThisRow++;
178 } else {
179 if (disks[r][c].numBlocks < min_numblks)
180 min_numblks = disks[r][c].numBlocks;
181 DPRINTF7("Disk at row %d col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n",
182 r, c, disks[r][c].devname,
183 (long int) disks[r][c].numBlocks,
184 disks[r][c].blockSize,
185 (long int) disks[r][c].numBlocks *
186 disks[r][c].blockSize / 1024 / 1024);
187 }
188 }
189 /* XXX fix for n-fault tolerant */
190 /* XXX this should probably check to see how many failures
191 we can handle for this configuration! */
192 if (numFailuresThisRow > 0)
193 raidPtr->status[r] = rf_rs_degraded;
194 }
195
196 /* all disks must be the same size & have the same block size, bs must
197 * be a power of 2 */
198 bs = 0;
199 for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) {
200 for (c = 0; !foundone && c < raidPtr->numCol; c++) {
201 if (disks[r][c].status == rf_ds_optimal) {
202 bs = disks[r][c].blockSize;
203 foundone = 1;
204 }
205 }
206 }
207 if (!foundone) {
208 RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
209 ret = EINVAL;
210 goto fail;
211 }
212 for (count = 0, i = 1; i; i <<= 1)
213 if (bs & i)
214 count++;
215 if (count != 1) {
216 RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
217 ret = EINVAL;
218 goto fail;
219 }
220
221 if (rf_CheckLabels( raidPtr, cfgPtr )) {
222 printf("raid%d: There were fatal errors\n", raidPtr->raidid);
223 if (force != 0) {
224 printf("raid%d: Fatal errors being ignored.\n",
225 raidPtr->raidid);
226 } else {
227 ret = EINVAL;
228 goto fail;
229 }
230 }
231
232 for (r = 0; r < raidPtr->numRow; r++) {
233 for (c = 0; c < raidPtr->numCol; c++) {
234 if (disks[r][c].status == rf_ds_optimal) {
235 if (disks[r][c].blockSize != bs) {
236 RF_ERRORMSG2("Error: block size of disk at r %d c %d different from disk at r 0 c 0\n", r, c);
237 ret = EINVAL;
238 goto fail;
239 }
240 if (disks[r][c].numBlocks != min_numblks) {
241 RF_ERRORMSG3("WARNING: truncating disk at r %d c %d to %d blocks\n",
242 r, c, (int) min_numblks);
243 disks[r][c].numBlocks = min_numblks;
244 }
245 }
246 }
247 }
248
249 raidPtr->sectorsPerDisk = min_numblks;
250 raidPtr->logBytesPerSector = ffs(bs) - 1;
251 raidPtr->bytesPerSector = bs;
252 raidPtr->sectorMask = bs - 1;
253 return (0);
254
255 fail:
256
257 rf_UnconfigureVnodes( raidPtr );
258
259 return (ret);
260 }
261
262
263 /****************************************************************************
264 * set up the data structures describing the spare disks in the array
265 * recall from the above comment that the spare disk descriptors are stored
266 * in row zero, which is specially expanded to hold them.
267 ****************************************************************************/
268 int
269 rf_ConfigureSpareDisks( listp, raidPtr, cfgPtr )
270 RF_ShutdownList_t ** listp;
271 RF_Raid_t * raidPtr;
272 RF_Config_t * cfgPtr;
273 {
274 int i, ret;
275 unsigned int bs;
276 RF_RaidDisk_t *disks;
277 int num_spares_done;
278
279 num_spares_done = 0;
280
281 /* The space for the spares should have already been allocated by
282 * ConfigureDisks() */
283
284 disks = &raidPtr->Disks[0][raidPtr->numCol];
285 for (i = 0; i < raidPtr->numSpare; i++) {
286 ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
287 &disks[i], 0, raidPtr->numCol + i);
288 if (ret)
289 goto fail;
290 if (disks[i].status != rf_ds_optimal) {
291 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
292 &cfgPtr->spare_names[i][0]);
293 } else {
294 disks[i].status = rf_ds_spare; /* change status to
295 * spare */
296 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", i,
297 disks[i].devname,
298 (long int) disks[i].numBlocks, disks[i].blockSize,
299 (long int) disks[i].numBlocks *
300 disks[i].blockSize / 1024 / 1024);
301 }
302 num_spares_done++;
303 }
304
305 /* check sizes and block sizes on spare disks */
306 bs = 1 << raidPtr->logBytesPerSector;
307 for (i = 0; i < raidPtr->numSpare; i++) {
308 if (disks[i].blockSize != bs) {
309 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
310 ret = EINVAL;
311 goto fail;
312 }
313 if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
314 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
315 disks[i].devname, disks[i].blockSize,
316 (long int) raidPtr->sectorsPerDisk);
317 ret = EINVAL;
318 goto fail;
319 } else
320 if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
321 RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[i].devname, (long int) raidPtr->sectorsPerDisk);
322
323 disks[i].numBlocks = raidPtr->sectorsPerDisk;
324 }
325 }
326
327 return (0);
328
329 fail:
330
331 /* Release the hold on the main components. We've failed to allocate
332 * a spare, and since we're failing, we need to free things..
333
334 XXX failing to allocate a spare is *not* that big of a deal...
335 We *can* survive without it, if need be, esp. if we get hot
336 adding working.
337
338 If we don't fail out here, then we need a way to remove this spare...
339 that should be easier to do here than if we are "live"...
340
341 */
342
343 rf_UnconfigureVnodes( raidPtr );
344
345 return (ret);
346 }
347
348 static int
349 rf_AllocDiskStructures(raidPtr, cfgPtr)
350 RF_Raid_t *raidPtr;
351 RF_Config_t *cfgPtr;
352 {
353 RF_RaidDisk_t **disks;
354 int ret;
355 int r;
356
357 RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *),
358 (RF_RaidDisk_t **), raidPtr->cleanupList);
359 if (disks == NULL) {
360 ret = ENOMEM;
361 goto fail;
362 }
363 raidPtr->Disks = disks;
364 /* get space for the device-specific stuff... */
365 RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
366 sizeof(struct raidcinfo *), (struct raidcinfo **),
367 raidPtr->cleanupList);
368 if (raidPtr->raid_cinfo == NULL) {
369 ret = ENOMEM;
370 goto fail;
371 }
372
373 for (r = 0; r < raidPtr->numRow; r++) {
374 /* We allocate RF_MAXSPARE on the first row so that we
375 have room to do hot-swapping of spares */
376 RF_CallocAndAdd(disks[r], raidPtr->numCol
377 + ((r == 0) ? RF_MAXSPARE : 0),
378 sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
379 raidPtr->cleanupList);
380 if (disks[r] == NULL) {
381 ret = ENOMEM;
382 goto fail;
383 }
384 /* get more space for device specific stuff.. */
385 RF_CallocAndAdd(raidPtr->raid_cinfo[r],
386 raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0),
387 sizeof(struct raidcinfo), (struct raidcinfo *),
388 raidPtr->cleanupList);
389 if (raidPtr->raid_cinfo[r] == NULL) {
390 ret = ENOMEM;
391 goto fail;
392 }
393 }
394 return(0);
395 fail:
396 rf_UnconfigureVnodes( raidPtr );
397
398 return(ret);
399 }
400
401
402 /* configure a single disk during auto-configuration at boot */
403 int
404 rf_AutoConfigureDisks(raidPtr, cfgPtr, auto_config)
405 RF_Raid_t *raidPtr;
406 RF_Config_t *cfgPtr;
407 RF_AutoConfig_t *auto_config;
408 {
409 RF_RaidDisk_t **disks;
410 RF_RaidDisk_t *diskPtr;
411 RF_RowCol_t r, c;
412 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
413 int bs, ret;
414 int numFailuresThisRow;
415 int force;
416 RF_AutoConfig_t *ac;
417 int parity_good;
418
419 #if DEBUG
420 printf("Starting autoconfiguration of RAID set...\n");
421 #endif
422 force = cfgPtr->force;
423
424 ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
425 if (ret)
426 goto fail;
427
428 disks = raidPtr->Disks;
429
430 /* assume the parity will be fine.. */
431 parity_good = RF_RAID_CLEAN;
432
433 for (r = 0; r < raidPtr->numRow; r++) {
434 numFailuresThisRow = 0;
435 for (c = 0; c < raidPtr->numCol; c++) {
436 diskPtr = &disks[r][c];
437
438 /* find this row/col in the autoconfig */
439 #if DEBUG
440 printf("Looking for %d,%d in autoconfig\n",r,c);
441 #endif
442 ac = auto_config;
443 while(ac!=NULL) {
444 if (ac->clabel==NULL) {
445 /* big-time bad news. */
446 goto fail;
447 }
448 if ((ac->clabel->row == r) &&
449 (ac->clabel->column == c)) {
450 /* it's this one... */
451 #if DEBUG
452 printf("Found: %s at %d,%d\n",
453 ac->devname,r,c);
454 #endif
455 break;
456 }
457 ac=ac->next;
458 }
459
460 if (ac!=NULL) {
461 /* Found it. Configure it.. */
462 diskPtr->blockSize = ac->clabel->blockSize;
463 diskPtr->numBlocks = ac->clabel->numBlocks;
464 /* Note: rf_protectedSectors is already
465 factored into numBlocks here */
466 raidPtr->raid_cinfo[r][c].ci_vp = ac->vp;
467 raidPtr->raid_cinfo[r][c].ci_dev = ac->dev;
468
469 memcpy(&raidPtr->raid_cinfo[r][c].ci_label,
470 ac->clabel, sizeof(*ac->clabel));
471 sprintf(diskPtr->devname, "/dev/%s",
472 ac->devname);
473
474 /* note the fact that this component was
475 autoconfigured. You'll need this info
476 later. Trust me :) */
477 diskPtr->auto_configured = 1;
478 diskPtr->dev = ac->dev;
479
480 /*
481 * we allow the user to specify that
482 * only a fraction of the disks should
483 * be used this is just for debug: it
484 * speeds up the parity scan
485 */
486
487 diskPtr->numBlocks = diskPtr->numBlocks *
488 rf_sizePercentage / 100;
489
490 /* XXX these will get set multiple times,
491 but since we're autoconfiguring, they'd
492 better be always the same each time!
493 If not, this is the least of your worries */
494
495 bs = diskPtr->blockSize;
496 min_numblks = diskPtr->numBlocks;
497
498 /* this gets done multiple times, but that's
499 fine -- the serial number will be the same
500 for all components, guaranteed */
501 raidPtr->serial_number =
502 ac->clabel->serial_number;
503
504 if (ac->clabel->clean != RF_RAID_CLEAN) {
505 parity_good = RF_RAID_DIRTY;
506 }
507
508 } else {
509 /* Didn't find it!! Component must be dead */
510 disks[r][c].status = rf_ds_failed;
511 numFailuresThisRow++;
512 }
513 }
514 /* XXX fix for n-fault tolerant */
515 /* XXX this should probably check to see how many failures
516 we can handle for this configuration! */
517 if (numFailuresThisRow > 0)
518 raidPtr->status[r] = rf_rs_degraded;
519 }
520
521 /* note the state of the parity, if any */
522 raidPtr->parity_good = parity_good;
523 raidPtr->sectorsPerDisk = min_numblks;
524 raidPtr->logBytesPerSector = ffs(bs) - 1;
525 raidPtr->bytesPerSector = bs;
526 raidPtr->sectorMask = bs - 1;
527 return (0);
528
529 fail:
530
531 rf_UnconfigureVnodes( raidPtr );
532
533 return (ret);
534
535 }
536
537 /* configure a single disk in the array */
538 int
539 rf_ConfigureDisk(raidPtr, buf, diskPtr, row, col)
540 RF_Raid_t *raidPtr;
541 char *buf;
542 RF_RaidDisk_t *diskPtr;
543 RF_RowCol_t row;
544 RF_RowCol_t col;
545 {
546 char *p;
547 int retcode;
548
549 struct partinfo dpart;
550 struct vnode *vp;
551 struct vattr va;
552 struct proc *proc;
553 int error;
554
555 retcode = 0;
556 p = rf_find_non_white(buf);
557 if (p[strlen(p) - 1] == '\n') {
558 /* strip off the newline */
559 p[strlen(p) - 1] = '\0';
560 }
561 (void) strcpy(diskPtr->devname, p);
562
563 proc = raidPtr->engine_thread;
564
565 /* Let's start by claiming the component is fine and well... */
566 diskPtr->status = rf_ds_optimal;
567
568 raidPtr->raid_cinfo[row][col].ci_vp = NULL;
569 raidPtr->raid_cinfo[row][col].ci_dev = NULL;
570
571 error = raidlookup(diskPtr->devname, proc, &vp);
572 if (error) {
573 printf("raidlookup on device: %s failed!\n", diskPtr->devname);
574 if (error == ENXIO) {
575 /* the component isn't there... must be dead :-( */
576 diskPtr->status = rf_ds_failed;
577 } else {
578 return (error);
579 }
580 }
581 if (diskPtr->status == rf_ds_optimal) {
582
583 if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
584 return (error);
585 }
586 error = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart,
587 FREAD, proc->p_ucred, proc);
588 if (error) {
589 return (error);
590 }
591
592 diskPtr->blockSize = dpart.disklab->d_secsize;
593
594 diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
595 diskPtr->partitionSize = dpart.part->p_size;
596
597 raidPtr->raid_cinfo[row][col].ci_vp = vp;
598 raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev;
599
600 /* This component was not automatically configured */
601 diskPtr->auto_configured = 0;
602 diskPtr->dev = va.va_rdev;
603
604 /* we allow the user to specify that only a fraction of the
605 * disks should be used this is just for debug: it speeds up
606 * the parity scan */
607 diskPtr->numBlocks = diskPtr->numBlocks *
608 rf_sizePercentage / 100;
609 }
610 return (0);
611 }
612
613 static void rf_print_label_status( RF_Raid_t *, int, int, char *,
614 RF_ComponentLabel_t *);
615
616 static void
617 rf_print_label_status( raidPtr, row, column, dev_name, ci_label )
618 RF_Raid_t *raidPtr;
619 int row;
620 int column;
621 char *dev_name;
622 RF_ComponentLabel_t *ci_label;
623 {
624
625 printf("raid%d: Component %s being configured at row: %d col: %d\n",
626 raidPtr->raidid, dev_name, row, column );
627 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
628 ci_label->row, ci_label->column,
629 ci_label->num_rows, ci_label->num_columns);
630 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
631 ci_label->version, ci_label->serial_number,
632 ci_label->mod_counter);
633 printf(" Clean: %s Status: %d\n",
634 ci_label->clean ? "Yes" : "No", ci_label->status );
635 }
636
637 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
638 RF_ComponentLabel_t *, int, int );
639 static int rf_check_label_vitals( raidPtr, row, column, dev_name, ci_label,
640 serial_number, mod_counter )
641 RF_Raid_t *raidPtr;
642 int row;
643 int column;
644 char *dev_name;
645 RF_ComponentLabel_t *ci_label;
646 int serial_number;
647 int mod_counter;
648 {
649 int fatal_error = 0;
650
651 if (serial_number != ci_label->serial_number) {
652 printf("%s has a different serial number: %d %d\n",
653 dev_name, serial_number, ci_label->serial_number);
654 fatal_error = 1;
655 }
656 if (mod_counter != ci_label->mod_counter) {
657 printf("%s has a different modfication count: %d %d\n",
658 dev_name, mod_counter, ci_label->mod_counter);
659 }
660
661 if (row != ci_label->row) {
662 printf("Row out of alignment for: %s\n", dev_name);
663 fatal_error = 1;
664 }
665 if (column != ci_label->column) {
666 printf("Column out of alignment for: %s\n", dev_name);
667 fatal_error = 1;
668 }
669 if (raidPtr->numRow != ci_label->num_rows) {
670 printf("Number of rows do not match for: %s\n", dev_name);
671 fatal_error = 1;
672 }
673 if (raidPtr->numCol != ci_label->num_columns) {
674 printf("Number of columns do not match for: %s\n", dev_name);
675 fatal_error = 1;
676 }
677 if (ci_label->clean == 0) {
678 /* it's not clean, but that's not fatal */
679 printf("%s is not clean!\n", dev_name);
680 }
681 return(fatal_error);
682 }
683
684
685 /*
686
687 rf_CheckLabels() - check all the component labels for consistency.
688 Return an error if there is anything major amiss.
689
690 */
691
692 int
693 rf_CheckLabels( raidPtr, cfgPtr )
694 RF_Raid_t *raidPtr;
695 RF_Config_t *cfgPtr;
696 {
697 int r,c;
698 char *dev_name;
699 RF_ComponentLabel_t *ci_label;
700 int serial_number = 0;
701 int mod_number = 0;
702 int fatal_error = 0;
703 int mod_values[4];
704 int mod_count[4];
705 int ser_values[4];
706 int ser_count[4];
707 int num_ser;
708 int num_mod;
709 int i;
710 int found;
711 int hosed_row;
712 int hosed_column;
713 int too_fatal;
714 int parity_good;
715 int force;
716
717 hosed_row = -1;
718 hosed_column = -1;
719 too_fatal = 0;
720 force = cfgPtr->force;
721
722 /*
723 We're going to try to be a little intelligent here. If one
724 component's label is bogus, and we can identify that it's the
725 *only* one that's gone, we'll mark it as "failed" and allow
726 the configuration to proceed. This will be the *only* case
727 that we'll proceed if there would be (otherwise) fatal errors.
728
729 Basically we simply keep a count of how many components had
730 what serial number. If all but one agree, we simply mark
731 the disagreeing component as being failed, and allow
732 things to come up "normally".
733
734 We do this first for serial numbers, and then for "mod_counter".
735
736 */
737
738 num_ser = 0;
739 num_mod = 0;
740 for (r = 0; r < raidPtr->numRow && !fatal_error ; r++) {
741 for (c = 0; c < raidPtr->numCol; c++) {
742 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
743 found=0;
744 for(i=0;i<num_ser;i++) {
745 if (ser_values[i] == ci_label->serial_number) {
746 ser_count[i]++;
747 found=1;
748 break;
749 }
750 }
751 if (!found) {
752 ser_values[num_ser] = ci_label->serial_number;
753 ser_count[num_ser] = 1;
754 num_ser++;
755 if (num_ser>2) {
756 fatal_error = 1;
757 break;
758 }
759 }
760 found=0;
761 for(i=0;i<num_mod;i++) {
762 if (mod_values[i] == ci_label->mod_counter) {
763 mod_count[i]++;
764 found=1;
765 break;
766 }
767 }
768 if (!found) {
769 mod_values[num_mod] = ci_label->mod_counter;
770 mod_count[num_mod] = 1;
771 num_mod++;
772 if (num_mod>2) {
773 fatal_error = 1;
774 break;
775 }
776 }
777 }
778 }
779 #if DEBUG
780 printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
781 for(i=0;i<num_ser;i++) {
782 printf("%d %d\n", ser_values[i], ser_count[i]);
783 }
784 printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
785 for(i=0;i<num_mod;i++) {
786 printf("%d %d\n", mod_values[i], mod_count[i]);
787 }
788 #endif
789 serial_number = ser_values[0];
790 if (num_ser == 2) {
791 if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
792 /* Locate the maverick component */
793 if (ser_count[1] > ser_count[0]) {
794 serial_number = ser_values[1];
795 }
796 for (r = 0; r < raidPtr->numRow; r++) {
797 for (c = 0; c < raidPtr->numCol; c++) {
798 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
799 if (serial_number !=
800 ci_label->serial_number) {
801 hosed_row = r;
802 hosed_column = c;
803 break;
804 }
805 }
806 }
807 printf("Hosed component: %s\n",
808 &cfgPtr->devnames[hosed_row][hosed_column][0]);
809 if (!force) {
810 /* we'll fail this component, as if there are
811 other major errors, we arn't forcing things
812 and we'll abort the config anyways */
813 raidPtr->Disks[hosed_row][hosed_column].status
814 = rf_ds_failed;
815 raidPtr->numFailures++;
816 raidPtr->status[hosed_row] = rf_rs_degraded;
817 }
818 } else {
819 too_fatal = 1;
820 }
821 if (cfgPtr->parityConfig == '0') {
822 /* We've identified two different serial numbers.
823 RAID 0 can't cope with that, so we'll punt */
824 too_fatal = 1;
825 }
826
827 }
828
829 /* record the serial number for later. If we bail later, setting
830 this doesn't matter, otherwise we've got the best guess at the
831 correct serial number */
832 raidPtr->serial_number = serial_number;
833
834 mod_number = mod_values[0];
835 if (num_mod == 2) {
836 if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
837 /* Locate the maverick component */
838 if (mod_count[1] > mod_count[0]) {
839 mod_number = mod_values[1];
840 } else if (mod_count[1] < mod_count[0]) {
841 mod_number = mod_values[0];
842 } else {
843 /* counts of different modification values
844 are the same. Assume greater value is
845 the correct one, all other things
846 considered */
847 if (mod_values[0] > mod_values[1]) {
848 mod_number = mod_values[0];
849 } else {
850 mod_number = mod_values[1];
851 }
852
853 }
854 for (r = 0; r < raidPtr->numRow && !too_fatal ; r++) {
855 for (c = 0; c < raidPtr->numCol; c++) {
856 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
857 if (mod_number !=
858 ci_label->mod_counter) {
859 if ( ( hosed_row == r ) &&
860 ( hosed_column == c )) {
861 /* same one. Can
862 deal with it. */
863 } else {
864 hosed_row = r;
865 hosed_column = c;
866 if (num_ser != 1) {
867 too_fatal = 1;
868 break;
869 }
870 }
871 }
872 }
873 }
874 printf("Hosed component: %s\n",
875 &cfgPtr->devnames[hosed_row][hosed_column][0]);
876 if (!force) {
877 /* we'll fail this component, as if there are
878 other major errors, we arn't forcing things
879 and we'll abort the config anyways */
880 if (raidPtr->Disks[hosed_row][hosed_column].status != rf_ds_failed) {
881 raidPtr->Disks[hosed_row][hosed_column].status
882 = rf_ds_failed;
883 raidPtr->numFailures++;
884 raidPtr->status[hosed_row] = rf_rs_degraded;
885 }
886 }
887 } else {
888 too_fatal = 1;
889 }
890 if (cfgPtr->parityConfig == '0') {
891 /* We've identified two different mod counters.
892 RAID 0 can't cope with that, so we'll punt */
893 too_fatal = 1;
894 }
895 }
896
897 raidPtr->mod_counter = mod_number;
898
899 if (too_fatal) {
900 /* we've had both a serial number mismatch, and a mod_counter
901 mismatch -- and they involved two different components!!
902 Bail -- make things fail so that the user must force
903 the issue... */
904 hosed_row = -1;
905 hosed_column = -1;
906 }
907
908 if (num_ser > 2) {
909 printf("raid%d: Too many different serial numbers!\n",
910 raidPtr->raidid);
911 }
912
913 if (num_mod > 2) {
914 printf("raid%d: Too many different mod counters!\n",
915 raidPtr->raidid);
916 }
917
918 /* we start by assuming the parity will be good, and flee from
919 that notion at the slightest sign of trouble */
920
921 parity_good = RF_RAID_CLEAN;
922 for (r = 0; r < raidPtr->numRow; r++) {
923 for (c = 0; c < raidPtr->numCol; c++) {
924 dev_name = &cfgPtr->devnames[r][c][0];
925 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
926
927 if ((r == hosed_row) && (c == hosed_column)) {
928 printf("raid%d: Ignoring %s\n",
929 raidPtr->raidid, dev_name);
930 } else {
931 rf_print_label_status( raidPtr, r, c,
932 dev_name, ci_label );
933 if (rf_check_label_vitals( raidPtr, r, c,
934 dev_name, ci_label,
935 serial_number,
936 mod_number )) {
937 fatal_error = 1;
938 }
939 if (ci_label->clean != RF_RAID_CLEAN) {
940 parity_good = RF_RAID_DIRTY;
941 }
942 }
943 }
944 }
945 if (fatal_error) {
946 parity_good = RF_RAID_DIRTY;
947 }
948
949 /* we note the state of the parity */
950 raidPtr->parity_good = parity_good;
951
952 return(fatal_error);
953 }
954
955 int rf_add_hot_spare(RF_Raid_t *, RF_SingleComponent_t *);
956 int
957 rf_add_hot_spare(raidPtr, sparePtr)
958 RF_Raid_t *raidPtr;
959 RF_SingleComponent_t *sparePtr;
960 {
961 RF_RaidDisk_t *disks;
962 RF_DiskQueue_t *spareQueues;
963 int ret;
964 unsigned int bs;
965 int spare_number;
966
967 printf("Just in rf_add_hot_spare: %d\n",raidPtr->numSpare);
968 printf("Num col: %d\n",raidPtr->numCol);
969 if (raidPtr->numSpare >= RF_MAXSPARE) {
970 RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
971 return(EINVAL);
972 }
973
974 RF_LOCK_MUTEX(raidPtr->mutex);
975
976 /* the beginning of the spares... */
977 disks = &raidPtr->Disks[0][raidPtr->numCol];
978
979 spare_number = raidPtr->numSpare;
980
981 ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
982 &disks[spare_number], 0,
983 raidPtr->numCol + spare_number);
984
985 if (ret)
986 goto fail;
987 if (disks[spare_number].status != rf_ds_optimal) {
988 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
989 sparePtr->component_name);
990 ret=EINVAL;
991 goto fail;
992 } else {
993 disks[spare_number].status = rf_ds_spare;
994 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", spare_number,
995 disks[spare_number].devname,
996 (long int) disks[spare_number].numBlocks,
997 disks[spare_number].blockSize,
998 (long int) disks[spare_number].numBlocks *
999 disks[spare_number].blockSize / 1024 / 1024);
1000 }
1001
1002
1003 /* check sizes and block sizes on the spare disk */
1004 bs = 1 << raidPtr->logBytesPerSector;
1005 if (disks[spare_number].blockSize != bs) {
1006 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
1007 ret = EINVAL;
1008 goto fail;
1009 }
1010 if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
1011 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
1012 disks[spare_number].devname,
1013 disks[spare_number].blockSize,
1014 (long int) raidPtr->sectorsPerDisk);
1015 ret = EINVAL;
1016 goto fail;
1017 } else {
1018 if (disks[spare_number].numBlocks >
1019 raidPtr->sectorsPerDisk) {
1020 RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[spare_number].devname,
1021 (long int) raidPtr->sectorsPerDisk);
1022
1023 disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
1024 }
1025 }
1026
1027 spareQueues = &raidPtr->Queues[0][raidPtr->numCol];
1028 ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
1029 0, raidPtr->numCol + spare_number,
1030 raidPtr->Queues[0][0].qPtr, /* XXX */
1031 raidPtr->sectorsPerDisk,
1032 raidPtr->Disks[0][raidPtr->numCol + spare_number].dev,
1033 raidPtr->maxOutstanding,
1034 &raidPtr->shutdownList,
1035 raidPtr->cleanupList);
1036
1037
1038 raidPtr->numSpare++;
1039 RF_UNLOCK_MUTEX(raidPtr->mutex);
1040 return (0);
1041
1042 fail:
1043 RF_UNLOCK_MUTEX(raidPtr->mutex);
1044 return(ret);
1045 }
1046
1047 int
1048 rf_remove_hot_spare(raidPtr,sparePtr)
1049 RF_Raid_t *raidPtr;
1050 RF_SingleComponent_t *sparePtr;
1051 {
1052 int spare_number;
1053
1054
1055 if (raidPtr->numSpare==0) {
1056 printf("No spares to remove!\n");
1057 return(EINVAL);
1058 }
1059
1060 spare_number = sparePtr->column;
1061
1062 return(EINVAL); /* XXX not implemented yet */
1063 #if 0
1064 if (spare_number < 0 || spare_number > raidPtr->numSpare) {
1065 return(EINVAL);
1066 }
1067
1068 /* verify that this spare isn't in use... */
1069
1070
1071
1072
1073 /* it's gone.. */
1074
1075 raidPtr->numSpare--;
1076
1077 return(0);
1078 #endif
1079 }
1080
1081
1082