rf_disks.c revision 1.20 1 /* $NetBSD: rf_disks.c,v 1.20 2000/02/25 19:56:32 oster Exp $ */
2 /*-
3 * Copyright (c) 1999 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1995 Carnegie-Mellon University.
40 * All rights reserved.
41 *
42 * Author: Mark Holland
43 *
44 * Permission to use, copy, modify and distribute this software and
45 * its documentation is hereby granted, provided that both the copyright
46 * notice and this permission notice appear in all copies of the
47 * software, derivative works or modified versions, and any portions
48 * thereof, and that both notices appear in supporting documentation.
49 *
50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53 *
54 * Carnegie Mellon requests users of this software to return to
55 *
56 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
57 * School of Computer Science
58 * Carnegie Mellon University
59 * Pittsburgh PA 15213-3890
60 *
61 * any improvements or extensions that they make and grant Carnegie the
62 * rights to redistribute these changes.
63 */
64
65 /***************************************************************
66 * rf_disks.c -- code to perform operations on the actual disks
67 ***************************************************************/
68
69 #include "rf_types.h"
70 #include "rf_raid.h"
71 #include "rf_alloclist.h"
72 #include "rf_utils.h"
73 #include "rf_configure.h"
74 #include "rf_general.h"
75 #include "rf_options.h"
76 #include "rf_kintf.h"
77 #include "rf_netbsd.h"
78
79 #include <sys/types.h>
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/proc.h>
83 #include <sys/ioctl.h>
84 #include <sys/fcntl.h>
85 #include <sys/vnode.h>
86
87 /* XXX these should be in a header file somewhere */
88 void rf_UnconfigureVnodes( RF_Raid_t * );
89 int rf_CheckLabels( RF_Raid_t *, RF_Config_t *);
90 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
91
92 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
93 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
94
95 /**************************************************************************
96 *
97 * initialize the disks comprising the array
98 *
99 * We want the spare disks to have regular row,col numbers so that we can
100 * easily substitue a spare for a failed disk. But, the driver code assumes
101 * throughout that the array contains numRow by numCol _non-spare_ disks, so
102 * it's not clear how to fit in the spares. This is an unfortunate holdover
103 * from raidSim. The quick and dirty fix is to make row zero bigger than the
104 * rest, and put all the spares in it. This probably needs to get changed
105 * eventually.
106 *
107 **************************************************************************/
108
109 int
110 rf_ConfigureDisks( listp, raidPtr, cfgPtr )
111 RF_ShutdownList_t **listp;
112 RF_Raid_t *raidPtr;
113 RF_Config_t *cfgPtr;
114 {
115 RF_RaidDisk_t **disks;
116 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
117 RF_RowCol_t r, c;
118 int bs, ret;
119 unsigned i, count, foundone = 0, numFailuresThisRow;
120 int force;
121
122 force = cfgPtr->force;
123
124 RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *),
125 (RF_RaidDisk_t **), raidPtr->cleanupList);
126 if (disks == NULL) {
127 ret = ENOMEM;
128 goto fail;
129 }
130 raidPtr->Disks = disks;
131
132 /* get space for the device-specific stuff... */
133 RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
134 sizeof(struct raidcinfo *), (struct raidcinfo **),
135 raidPtr->cleanupList);
136 if (raidPtr->raid_cinfo == NULL) {
137 ret = ENOMEM;
138 goto fail;
139 }
140 for (r = 0; r < raidPtr->numRow; r++) {
141 numFailuresThisRow = 0;
142 /* We allocate RF_MAXSPARE on the first row so that we
143 have room to do hot-swapping of spares */
144 RF_CallocAndAdd(disks[r], raidPtr->numCol
145 + ((r == 0) ? RF_MAXSPARE : 0),
146 sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
147 raidPtr->cleanupList);
148 if (disks[r] == NULL) {
149 ret = ENOMEM;
150 goto fail;
151 }
152 /* get more space for device specific stuff.. */
153 RF_CallocAndAdd(raidPtr->raid_cinfo[r],
154 raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0),
155 sizeof(struct raidcinfo), (struct raidcinfo *),
156 raidPtr->cleanupList);
157 if (raidPtr->raid_cinfo[r] == NULL) {
158 ret = ENOMEM;
159 goto fail;
160 }
161 for (c = 0; c < raidPtr->numCol; c++) {
162 ret = rf_ConfigureDisk(raidPtr,
163 &cfgPtr->devnames[r][c][0],
164 &disks[r][c], r, c);
165
166 if (ret)
167 goto fail;
168
169 if (disks[r][c].status == rf_ds_optimal) {
170 raidread_component_label(
171 raidPtr->raid_cinfo[r][c].ci_dev,
172 raidPtr->raid_cinfo[r][c].ci_vp,
173 &raidPtr->raid_cinfo[r][c].ci_label);
174 }
175
176 if (disks[r][c].status != rf_ds_optimal) {
177 numFailuresThisRow++;
178 } else {
179 if (disks[r][c].numBlocks < min_numblks)
180 min_numblks = disks[r][c].numBlocks;
181 DPRINTF7("Disk at row %d col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n",
182 r, c, disks[r][c].devname,
183 (long int) disks[r][c].numBlocks,
184 disks[r][c].blockSize,
185 (long int) disks[r][c].numBlocks *
186 disks[r][c].blockSize / 1024 / 1024);
187 }
188 }
189 /* XXX fix for n-fault tolerant */
190 /* XXX this should probably check to see how many failures
191 we can handle for this configuration! */
192 if (numFailuresThisRow > 0)
193 raidPtr->status[r] = rf_rs_degraded;
194 }
195
196 /* all disks must be the same size & have the same block size, bs must
197 * be a power of 2 */
198 bs = 0;
199 for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) {
200 for (c = 0; !foundone && c < raidPtr->numCol; c++) {
201 if (disks[r][c].status == rf_ds_optimal) {
202 bs = disks[r][c].blockSize;
203 foundone = 1;
204 }
205 }
206 }
207 if (!foundone) {
208 RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
209 ret = EINVAL;
210 goto fail;
211 }
212 for (count = 0, i = 1; i; i <<= 1)
213 if (bs & i)
214 count++;
215 if (count != 1) {
216 RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
217 ret = EINVAL;
218 goto fail;
219 }
220
221 if (rf_CheckLabels( raidPtr, cfgPtr )) {
222 printf("raid%d: There were fatal errors\n", raidPtr->raidid);
223 if (force != 0) {
224 printf("raid%d: Fatal errors being ignored.\n",
225 raidPtr->raidid);
226 } else {
227 ret = EINVAL;
228 goto fail;
229 }
230 }
231
232 for (r = 0; r < raidPtr->numRow; r++) {
233 for (c = 0; c < raidPtr->numCol; c++) {
234 if (disks[r][c].status == rf_ds_optimal) {
235 if (disks[r][c].blockSize != bs) {
236 RF_ERRORMSG2("Error: block size of disk at r %d c %d different from disk at r 0 c 0\n", r, c);
237 ret = EINVAL;
238 goto fail;
239 }
240 if (disks[r][c].numBlocks != min_numblks) {
241 RF_ERRORMSG3("WARNING: truncating disk at r %d c %d to %d blocks\n",
242 r, c, (int) min_numblks);
243 disks[r][c].numBlocks = min_numblks;
244 }
245 }
246 }
247 }
248
249 raidPtr->sectorsPerDisk = min_numblks;
250 raidPtr->logBytesPerSector = ffs(bs) - 1;
251 raidPtr->bytesPerSector = bs;
252 raidPtr->sectorMask = bs - 1;
253 return (0);
254
255 fail:
256
257 rf_UnconfigureVnodes( raidPtr );
258
259 return (ret);
260 }
261
262
263 /****************************************************************************
264 * set up the data structures describing the spare disks in the array
265 * recall from the above comment that the spare disk descriptors are stored
266 * in row zero, which is specially expanded to hold them.
267 ****************************************************************************/
268 int
269 rf_ConfigureSpareDisks( listp, raidPtr, cfgPtr )
270 RF_ShutdownList_t ** listp;
271 RF_Raid_t * raidPtr;
272 RF_Config_t * cfgPtr;
273 {
274 int i, ret;
275 unsigned int bs;
276 RF_RaidDisk_t *disks;
277 int num_spares_done;
278
279 num_spares_done = 0;
280
281 /* The space for the spares should have already been allocated by
282 * ConfigureDisks() */
283
284 disks = &raidPtr->Disks[0][raidPtr->numCol];
285 for (i = 0; i < raidPtr->numSpare; i++) {
286 ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
287 &disks[i], 0, raidPtr->numCol + i);
288 if (ret)
289 goto fail;
290 if (disks[i].status != rf_ds_optimal) {
291 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
292 &cfgPtr->spare_names[i][0]);
293 } else {
294 disks[i].status = rf_ds_spare; /* change status to
295 * spare */
296 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", i,
297 disks[i].devname,
298 (long int) disks[i].numBlocks, disks[i].blockSize,
299 (long int) disks[i].numBlocks *
300 disks[i].blockSize / 1024 / 1024);
301 }
302 num_spares_done++;
303 }
304
305 /* check sizes and block sizes on spare disks */
306 bs = 1 << raidPtr->logBytesPerSector;
307 for (i = 0; i < raidPtr->numSpare; i++) {
308 if (disks[i].blockSize != bs) {
309 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
310 ret = EINVAL;
311 goto fail;
312 }
313 if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
314 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
315 disks[i].devname, disks[i].blockSize,
316 (long int) raidPtr->sectorsPerDisk);
317 ret = EINVAL;
318 goto fail;
319 } else
320 if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
321 RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[i].devname, (long int) raidPtr->sectorsPerDisk);
322
323 disks[i].numBlocks = raidPtr->sectorsPerDisk;
324 }
325 }
326
327 return (0);
328
329 fail:
330
331 /* Release the hold on the main components. We've failed to allocate
332 * a spare, and since we're failing, we need to free things..
333
334 XXX failing to allocate a spare is *not* that big of a deal...
335 We *can* survive without it, if need be, esp. if we get hot
336 adding working.
337
338 If we don't fail out here, then we need a way to remove this spare...
339 that should be easier to do here than if we are "live"...
340
341 */
342
343 rf_UnconfigureVnodes( raidPtr );
344
345 return (ret);
346 }
347
348 static int
349 rf_AllocDiskStructures(raidPtr, cfgPtr)
350 RF_Raid_t *raidPtr;
351 RF_Config_t *cfgPtr;
352 {
353 RF_RaidDisk_t **disks;
354 int ret;
355 int r;
356
357 RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *),
358 (RF_RaidDisk_t **), raidPtr->cleanupList);
359 if (disks == NULL) {
360 ret = ENOMEM;
361 goto fail;
362 }
363 raidPtr->Disks = disks;
364 /* get space for the device-specific stuff... */
365 RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
366 sizeof(struct raidcinfo *), (struct raidcinfo **),
367 raidPtr->cleanupList);
368 if (raidPtr->raid_cinfo == NULL) {
369 ret = ENOMEM;
370 goto fail;
371 }
372
373 for (r = 0; r < raidPtr->numRow; r++) {
374 /* We allocate RF_MAXSPARE on the first row so that we
375 have room to do hot-swapping of spares */
376 RF_CallocAndAdd(disks[r], raidPtr->numCol
377 + ((r == 0) ? RF_MAXSPARE : 0),
378 sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
379 raidPtr->cleanupList);
380 if (disks[r] == NULL) {
381 ret = ENOMEM;
382 goto fail;
383 }
384 /* get more space for device specific stuff.. */
385 RF_CallocAndAdd(raidPtr->raid_cinfo[r],
386 raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0),
387 sizeof(struct raidcinfo), (struct raidcinfo *),
388 raidPtr->cleanupList);
389 if (raidPtr->raid_cinfo[r] == NULL) {
390 ret = ENOMEM;
391 goto fail;
392 }
393 }
394 return(0);
395 fail:
396 rf_UnconfigureVnodes( raidPtr );
397
398 return(ret);
399 }
400
401
402 /* configure a single disk during auto-configuration at boot */
403 int
404 rf_AutoConfigureDisks(raidPtr, cfgPtr, auto_config)
405 RF_Raid_t *raidPtr;
406 RF_Config_t *cfgPtr;
407 RF_AutoConfig_t *auto_config;
408 {
409 RF_RaidDisk_t **disks;
410 RF_RaidDisk_t *diskPtr;
411 RF_RowCol_t r, c;
412 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
413 int bs, ret;
414 int numFailuresThisRow;
415 int force;
416 RF_AutoConfig_t *ac;
417 int parity_good;
418 int mod_counter;
419
420 #if DEBUG
421 printf("Starting autoconfiguration of RAID set...\n");
422 #endif
423 force = cfgPtr->force;
424
425 ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
426 if (ret)
427 goto fail;
428
429 disks = raidPtr->Disks;
430
431 /* assume the parity will be fine.. */
432 parity_good = RF_RAID_CLEAN;
433
434 /* Check for mod_counters that are too low */
435 mod_counter = -1;
436 ac = auto_config;
437 while(ac!=NULL) {
438 if (ac->clabel->mod_counter > mod_counter) {
439 mod_counter = ac->clabel->mod_counter;
440 }
441 ac = ac->next;
442 }
443 if (mod_counter == -1) {
444 /* mod_counters were all negative!?!?!?
445 Ok, we can deal with that. */
446 #if 0
447 ac = auto_config;
448 while(ac!=NULL) {
449 if (ac->clabel->mod_counter > mod_counter) {
450 mod_counter = ac->clabel->mod_counter;
451 }
452 ac = ac->next;
453 }
454 #endif
455 }
456
457 for (r = 0; r < raidPtr->numRow; r++) {
458 numFailuresThisRow = 0;
459 for (c = 0; c < raidPtr->numCol; c++) {
460 diskPtr = &disks[r][c];
461
462 /* find this row/col in the autoconfig */
463 #if DEBUG
464 printf("Looking for %d,%d in autoconfig\n",r,c);
465 #endif
466 ac = auto_config;
467 while(ac!=NULL) {
468 if (ac->clabel==NULL) {
469 /* big-time bad news. */
470 goto fail;
471 }
472 if ((ac->clabel->row == r) &&
473 (ac->clabel->column == c)) {
474 /* it's this one... */
475 #if DEBUG
476 printf("Found: %s at %d,%d\n",
477 ac->devname,r,c);
478 #endif
479
480 break;
481 }
482 ac=ac->next;
483 }
484
485 if (ac!=NULL) {
486 /* Found it. Configure it.. */
487 diskPtr->blockSize = ac->clabel->blockSize;
488 diskPtr->numBlocks = ac->clabel->numBlocks;
489 /* Note: rf_protectedSectors is already
490 factored into numBlocks here */
491 raidPtr->raid_cinfo[r][c].ci_vp = ac->vp;
492 raidPtr->raid_cinfo[r][c].ci_dev = ac->dev;
493
494 memcpy(&raidPtr->raid_cinfo[r][c].ci_label,
495 ac->clabel, sizeof(*ac->clabel));
496 sprintf(diskPtr->devname, "/dev/%s",
497 ac->devname);
498
499 /* note the fact that this component was
500 autoconfigured. You'll need this info
501 later. Trust me :) */
502 diskPtr->auto_configured = 1;
503 diskPtr->dev = ac->dev;
504
505 /*
506 * we allow the user to specify that
507 * only a fraction of the disks should
508 * be used this is just for debug: it
509 * speeds up the parity scan
510 */
511
512 diskPtr->numBlocks = diskPtr->numBlocks *
513 rf_sizePercentage / 100;
514
515 /* XXX these will get set multiple times,
516 but since we're autoconfiguring, they'd
517 better be always the same each time!
518 If not, this is the least of your worries */
519
520 bs = diskPtr->blockSize;
521 min_numblks = diskPtr->numBlocks;
522
523 /* this gets done multiple times, but that's
524 fine -- the serial number will be the same
525 for all components, guaranteed */
526 raidPtr->serial_number =
527 ac->clabel->serial_number;
528 /* check the last time the label
529 was modified */
530 if (ac->clabel->mod_counter !=
531 mod_counter) {
532 /* Even though we've filled in all
533 of the above, we don't trust
534 this component since it's
535 modification counter is not
536 in sync with the rest, and we really
537 consider it to be failed. */
538 disks[r][c].status = rf_ds_failed;
539 numFailuresThisRow++;
540 } else {
541 if (ac->clabel->clean !=
542 RF_RAID_CLEAN) {
543 parity_good = RF_RAID_DIRTY;
544 }
545 }
546 } else {
547 /* Didn't find it at all!!
548 Component must really be dead */
549 disks[r][c].status = rf_ds_failed;
550 numFailuresThisRow++;
551 }
552 }
553 /* XXX fix for n-fault tolerant */
554 /* XXX this should probably check to see how many failures
555 we can handle for this configuration! */
556 if (numFailuresThisRow > 0)
557 raidPtr->status[r] = rf_rs_degraded;
558 }
559
560 raidPtr->mod_counter = mod_counter;
561
562 /* note the state of the parity, if any */
563 raidPtr->parity_good = parity_good;
564 raidPtr->sectorsPerDisk = min_numblks;
565 raidPtr->logBytesPerSector = ffs(bs) - 1;
566 raidPtr->bytesPerSector = bs;
567 raidPtr->sectorMask = bs - 1;
568 return (0);
569
570 fail:
571
572 rf_UnconfigureVnodes( raidPtr );
573
574 return (ret);
575
576 }
577
578 /* configure a single disk in the array */
579 int
580 rf_ConfigureDisk(raidPtr, buf, diskPtr, row, col)
581 RF_Raid_t *raidPtr;
582 char *buf;
583 RF_RaidDisk_t *diskPtr;
584 RF_RowCol_t row;
585 RF_RowCol_t col;
586 {
587 char *p;
588 int retcode;
589
590 struct partinfo dpart;
591 struct vnode *vp;
592 struct vattr va;
593 struct proc *proc;
594 int error;
595
596 retcode = 0;
597 p = rf_find_non_white(buf);
598 if (p[strlen(p) - 1] == '\n') {
599 /* strip off the newline */
600 p[strlen(p) - 1] = '\0';
601 }
602 (void) strcpy(diskPtr->devname, p);
603
604 proc = raidPtr->engine_thread;
605
606 /* Let's start by claiming the component is fine and well... */
607 diskPtr->status = rf_ds_optimal;
608
609 raidPtr->raid_cinfo[row][col].ci_vp = NULL;
610 raidPtr->raid_cinfo[row][col].ci_dev = NULL;
611
612 error = raidlookup(diskPtr->devname, proc, &vp);
613 if (error) {
614 printf("raidlookup on device: %s failed!\n", diskPtr->devname);
615 if (error == ENXIO) {
616 /* the component isn't there... must be dead :-( */
617 diskPtr->status = rf_ds_failed;
618 } else {
619 return (error);
620 }
621 }
622 if (diskPtr->status == rf_ds_optimal) {
623
624 if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
625 return (error);
626 }
627 error = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart,
628 FREAD, proc->p_ucred, proc);
629 if (error) {
630 return (error);
631 }
632
633 diskPtr->blockSize = dpart.disklab->d_secsize;
634
635 diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
636 diskPtr->partitionSize = dpart.part->p_size;
637
638 raidPtr->raid_cinfo[row][col].ci_vp = vp;
639 raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev;
640
641 /* This component was not automatically configured */
642 diskPtr->auto_configured = 0;
643 diskPtr->dev = va.va_rdev;
644
645 /* we allow the user to specify that only a fraction of the
646 * disks should be used this is just for debug: it speeds up
647 * the parity scan */
648 diskPtr->numBlocks = diskPtr->numBlocks *
649 rf_sizePercentage / 100;
650 }
651 return (0);
652 }
653
654 static void rf_print_label_status( RF_Raid_t *, int, int, char *,
655 RF_ComponentLabel_t *);
656
657 static void
658 rf_print_label_status( raidPtr, row, column, dev_name, ci_label )
659 RF_Raid_t *raidPtr;
660 int row;
661 int column;
662 char *dev_name;
663 RF_ComponentLabel_t *ci_label;
664 {
665
666 printf("raid%d: Component %s being configured at row: %d col: %d\n",
667 raidPtr->raidid, dev_name, row, column );
668 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
669 ci_label->row, ci_label->column,
670 ci_label->num_rows, ci_label->num_columns);
671 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
672 ci_label->version, ci_label->serial_number,
673 ci_label->mod_counter);
674 printf(" Clean: %s Status: %d\n",
675 ci_label->clean ? "Yes" : "No", ci_label->status );
676 }
677
678 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
679 RF_ComponentLabel_t *, int, int );
680 static int rf_check_label_vitals( raidPtr, row, column, dev_name, ci_label,
681 serial_number, mod_counter )
682 RF_Raid_t *raidPtr;
683 int row;
684 int column;
685 char *dev_name;
686 RF_ComponentLabel_t *ci_label;
687 int serial_number;
688 int mod_counter;
689 {
690 int fatal_error = 0;
691
692 if (serial_number != ci_label->serial_number) {
693 printf("%s has a different serial number: %d %d\n",
694 dev_name, serial_number, ci_label->serial_number);
695 fatal_error = 1;
696 }
697 if (mod_counter != ci_label->mod_counter) {
698 printf("%s has a different modfication count: %d %d\n",
699 dev_name, mod_counter, ci_label->mod_counter);
700 }
701
702 if (row != ci_label->row) {
703 printf("Row out of alignment for: %s\n", dev_name);
704 fatal_error = 1;
705 }
706 if (column != ci_label->column) {
707 printf("Column out of alignment for: %s\n", dev_name);
708 fatal_error = 1;
709 }
710 if (raidPtr->numRow != ci_label->num_rows) {
711 printf("Number of rows do not match for: %s\n", dev_name);
712 fatal_error = 1;
713 }
714 if (raidPtr->numCol != ci_label->num_columns) {
715 printf("Number of columns do not match for: %s\n", dev_name);
716 fatal_error = 1;
717 }
718 if (ci_label->clean == 0) {
719 /* it's not clean, but that's not fatal */
720 printf("%s is not clean!\n", dev_name);
721 }
722 return(fatal_error);
723 }
724
725
726 /*
727
728 rf_CheckLabels() - check all the component labels for consistency.
729 Return an error if there is anything major amiss.
730
731 */
732
733 int
734 rf_CheckLabels( raidPtr, cfgPtr )
735 RF_Raid_t *raidPtr;
736 RF_Config_t *cfgPtr;
737 {
738 int r,c;
739 char *dev_name;
740 RF_ComponentLabel_t *ci_label;
741 int serial_number = 0;
742 int mod_number = 0;
743 int fatal_error = 0;
744 int mod_values[4];
745 int mod_count[4];
746 int ser_values[4];
747 int ser_count[4];
748 int num_ser;
749 int num_mod;
750 int i;
751 int found;
752 int hosed_row;
753 int hosed_column;
754 int too_fatal;
755 int parity_good;
756 int force;
757
758 hosed_row = -1;
759 hosed_column = -1;
760 too_fatal = 0;
761 force = cfgPtr->force;
762
763 /*
764 We're going to try to be a little intelligent here. If one
765 component's label is bogus, and we can identify that it's the
766 *only* one that's gone, we'll mark it as "failed" and allow
767 the configuration to proceed. This will be the *only* case
768 that we'll proceed if there would be (otherwise) fatal errors.
769
770 Basically we simply keep a count of how many components had
771 what serial number. If all but one agree, we simply mark
772 the disagreeing component as being failed, and allow
773 things to come up "normally".
774
775 We do this first for serial numbers, and then for "mod_counter".
776
777 */
778
779 num_ser = 0;
780 num_mod = 0;
781 for (r = 0; r < raidPtr->numRow && !fatal_error ; r++) {
782 for (c = 0; c < raidPtr->numCol; c++) {
783 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
784 found=0;
785 for(i=0;i<num_ser;i++) {
786 if (ser_values[i] == ci_label->serial_number) {
787 ser_count[i]++;
788 found=1;
789 break;
790 }
791 }
792 if (!found) {
793 ser_values[num_ser] = ci_label->serial_number;
794 ser_count[num_ser] = 1;
795 num_ser++;
796 if (num_ser>2) {
797 fatal_error = 1;
798 break;
799 }
800 }
801 found=0;
802 for(i=0;i<num_mod;i++) {
803 if (mod_values[i] == ci_label->mod_counter) {
804 mod_count[i]++;
805 found=1;
806 break;
807 }
808 }
809 if (!found) {
810 mod_values[num_mod] = ci_label->mod_counter;
811 mod_count[num_mod] = 1;
812 num_mod++;
813 if (num_mod>2) {
814 fatal_error = 1;
815 break;
816 }
817 }
818 }
819 }
820 #if DEBUG
821 printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
822 for(i=0;i<num_ser;i++) {
823 printf("%d %d\n", ser_values[i], ser_count[i]);
824 }
825 printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
826 for(i=0;i<num_mod;i++) {
827 printf("%d %d\n", mod_values[i], mod_count[i]);
828 }
829 #endif
830 serial_number = ser_values[0];
831 if (num_ser == 2) {
832 if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
833 /* Locate the maverick component */
834 if (ser_count[1] > ser_count[0]) {
835 serial_number = ser_values[1];
836 }
837 for (r = 0; r < raidPtr->numRow; r++) {
838 for (c = 0; c < raidPtr->numCol; c++) {
839 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
840 if (serial_number !=
841 ci_label->serial_number) {
842 hosed_row = r;
843 hosed_column = c;
844 break;
845 }
846 }
847 }
848 printf("Hosed component: %s\n",
849 &cfgPtr->devnames[hosed_row][hosed_column][0]);
850 if (!force) {
851 /* we'll fail this component, as if there are
852 other major errors, we arn't forcing things
853 and we'll abort the config anyways */
854 raidPtr->Disks[hosed_row][hosed_column].status
855 = rf_ds_failed;
856 raidPtr->numFailures++;
857 raidPtr->status[hosed_row] = rf_rs_degraded;
858 }
859 } else {
860 too_fatal = 1;
861 }
862 if (cfgPtr->parityConfig == '0') {
863 /* We've identified two different serial numbers.
864 RAID 0 can't cope with that, so we'll punt */
865 too_fatal = 1;
866 }
867
868 }
869
870 /* record the serial number for later. If we bail later, setting
871 this doesn't matter, otherwise we've got the best guess at the
872 correct serial number */
873 raidPtr->serial_number = serial_number;
874
875 mod_number = mod_values[0];
876 if (num_mod == 2) {
877 if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
878 /* Locate the maverick component */
879 if (mod_count[1] > mod_count[0]) {
880 mod_number = mod_values[1];
881 } else if (mod_count[1] < mod_count[0]) {
882 mod_number = mod_values[0];
883 } else {
884 /* counts of different modification values
885 are the same. Assume greater value is
886 the correct one, all other things
887 considered */
888 if (mod_values[0] > mod_values[1]) {
889 mod_number = mod_values[0];
890 } else {
891 mod_number = mod_values[1];
892 }
893
894 }
895 for (r = 0; r < raidPtr->numRow && !too_fatal ; r++) {
896 for (c = 0; c < raidPtr->numCol; c++) {
897 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
898 if (mod_number !=
899 ci_label->mod_counter) {
900 if ( ( hosed_row == r ) &&
901 ( hosed_column == c )) {
902 /* same one. Can
903 deal with it. */
904 } else {
905 hosed_row = r;
906 hosed_column = c;
907 if (num_ser != 1) {
908 too_fatal = 1;
909 break;
910 }
911 }
912 }
913 }
914 }
915 printf("Hosed component: %s\n",
916 &cfgPtr->devnames[hosed_row][hosed_column][0]);
917 if (!force) {
918 /* we'll fail this component, as if there are
919 other major errors, we arn't forcing things
920 and we'll abort the config anyways */
921 if (raidPtr->Disks[hosed_row][hosed_column].status != rf_ds_failed) {
922 raidPtr->Disks[hosed_row][hosed_column].status
923 = rf_ds_failed;
924 raidPtr->numFailures++;
925 raidPtr->status[hosed_row] = rf_rs_degraded;
926 }
927 }
928 } else {
929 too_fatal = 1;
930 }
931 if (cfgPtr->parityConfig == '0') {
932 /* We've identified two different mod counters.
933 RAID 0 can't cope with that, so we'll punt */
934 too_fatal = 1;
935 }
936 }
937
938 raidPtr->mod_counter = mod_number;
939
940 if (too_fatal) {
941 /* we've had both a serial number mismatch, and a mod_counter
942 mismatch -- and they involved two different components!!
943 Bail -- make things fail so that the user must force
944 the issue... */
945 hosed_row = -1;
946 hosed_column = -1;
947 }
948
949 if (num_ser > 2) {
950 printf("raid%d: Too many different serial numbers!\n",
951 raidPtr->raidid);
952 }
953
954 if (num_mod > 2) {
955 printf("raid%d: Too many different mod counters!\n",
956 raidPtr->raidid);
957 }
958
959 /* we start by assuming the parity will be good, and flee from
960 that notion at the slightest sign of trouble */
961
962 parity_good = RF_RAID_CLEAN;
963 for (r = 0; r < raidPtr->numRow; r++) {
964 for (c = 0; c < raidPtr->numCol; c++) {
965 dev_name = &cfgPtr->devnames[r][c][0];
966 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
967
968 if ((r == hosed_row) && (c == hosed_column)) {
969 printf("raid%d: Ignoring %s\n",
970 raidPtr->raidid, dev_name);
971 } else {
972 rf_print_label_status( raidPtr, r, c,
973 dev_name, ci_label );
974 if (rf_check_label_vitals( raidPtr, r, c,
975 dev_name, ci_label,
976 serial_number,
977 mod_number )) {
978 fatal_error = 1;
979 }
980 if (ci_label->clean != RF_RAID_CLEAN) {
981 parity_good = RF_RAID_DIRTY;
982 }
983 }
984 }
985 }
986 if (fatal_error) {
987 parity_good = RF_RAID_DIRTY;
988 }
989
990 /* we note the state of the parity */
991 raidPtr->parity_good = parity_good;
992
993 return(fatal_error);
994 }
995
996 int rf_add_hot_spare(RF_Raid_t *, RF_SingleComponent_t *);
997 int
998 rf_add_hot_spare(raidPtr, sparePtr)
999 RF_Raid_t *raidPtr;
1000 RF_SingleComponent_t *sparePtr;
1001 {
1002 RF_RaidDisk_t *disks;
1003 RF_DiskQueue_t *spareQueues;
1004 int ret;
1005 unsigned int bs;
1006 int spare_number;
1007
1008 printf("Just in rf_add_hot_spare: %d\n",raidPtr->numSpare);
1009 printf("Num col: %d\n",raidPtr->numCol);
1010 if (raidPtr->numSpare >= RF_MAXSPARE) {
1011 RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
1012 return(EINVAL);
1013 }
1014
1015 RF_LOCK_MUTEX(raidPtr->mutex);
1016
1017 /* the beginning of the spares... */
1018 disks = &raidPtr->Disks[0][raidPtr->numCol];
1019
1020 spare_number = raidPtr->numSpare;
1021
1022 ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
1023 &disks[spare_number], 0,
1024 raidPtr->numCol + spare_number);
1025
1026 if (ret)
1027 goto fail;
1028 if (disks[spare_number].status != rf_ds_optimal) {
1029 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
1030 sparePtr->component_name);
1031 ret=EINVAL;
1032 goto fail;
1033 } else {
1034 disks[spare_number].status = rf_ds_spare;
1035 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", spare_number,
1036 disks[spare_number].devname,
1037 (long int) disks[spare_number].numBlocks,
1038 disks[spare_number].blockSize,
1039 (long int) disks[spare_number].numBlocks *
1040 disks[spare_number].blockSize / 1024 / 1024);
1041 }
1042
1043
1044 /* check sizes and block sizes on the spare disk */
1045 bs = 1 << raidPtr->logBytesPerSector;
1046 if (disks[spare_number].blockSize != bs) {
1047 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
1048 ret = EINVAL;
1049 goto fail;
1050 }
1051 if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
1052 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
1053 disks[spare_number].devname,
1054 disks[spare_number].blockSize,
1055 (long int) raidPtr->sectorsPerDisk);
1056 ret = EINVAL;
1057 goto fail;
1058 } else {
1059 if (disks[spare_number].numBlocks >
1060 raidPtr->sectorsPerDisk) {
1061 RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[spare_number].devname,
1062 (long int) raidPtr->sectorsPerDisk);
1063
1064 disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
1065 }
1066 }
1067
1068 spareQueues = &raidPtr->Queues[0][raidPtr->numCol];
1069 ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
1070 0, raidPtr->numCol + spare_number,
1071 raidPtr->qType,
1072 raidPtr->sectorsPerDisk,
1073 raidPtr->Disks[0][raidPtr->numCol +
1074 spare_number].dev,
1075 raidPtr->maxOutstanding,
1076 &raidPtr->shutdownList,
1077 raidPtr->cleanupList);
1078
1079
1080 raidPtr->numSpare++;
1081 RF_UNLOCK_MUTEX(raidPtr->mutex);
1082 return (0);
1083
1084 fail:
1085 RF_UNLOCK_MUTEX(raidPtr->mutex);
1086 return(ret);
1087 }
1088
1089 int
1090 rf_remove_hot_spare(raidPtr,sparePtr)
1091 RF_Raid_t *raidPtr;
1092 RF_SingleComponent_t *sparePtr;
1093 {
1094 int spare_number;
1095
1096
1097 if (raidPtr->numSpare==0) {
1098 printf("No spares to remove!\n");
1099 return(EINVAL);
1100 }
1101
1102 spare_number = sparePtr->column;
1103
1104 return(EINVAL); /* XXX not implemented yet */
1105 #if 0
1106 if (spare_number < 0 || spare_number > raidPtr->numSpare) {
1107 return(EINVAL);
1108 }
1109
1110 /* verify that this spare isn't in use... */
1111
1112
1113
1114
1115 /* it's gone.. */
1116
1117 raidPtr->numSpare--;
1118
1119 return(0);
1120 #endif
1121 }
1122
1123
1124