rf_disks.c revision 1.34.6.1 1 /* $NetBSD: rf_disks.c,v 1.34.6.1 2001/09/07 04:45:28 thorpej Exp $ */
2 /*-
3 * Copyright (c) 1999 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1995 Carnegie-Mellon University.
40 * All rights reserved.
41 *
42 * Author: Mark Holland
43 *
44 * Permission to use, copy, modify and distribute this software and
45 * its documentation is hereby granted, provided that both the copyright
46 * notice and this permission notice appear in all copies of the
47 * software, derivative works or modified versions, and any portions
48 * thereof, and that both notices appear in supporting documentation.
49 *
50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53 *
54 * Carnegie Mellon requests users of this software to return to
55 *
56 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
57 * School of Computer Science
58 * Carnegie Mellon University
59 * Pittsburgh PA 15213-3890
60 *
61 * any improvements or extensions that they make and grant Carnegie the
62 * rights to redistribute these changes.
63 */
64
65 /***************************************************************
66 * rf_disks.c -- code to perform operations on the actual disks
67 ***************************************************************/
68
69 #include "rf_types.h"
70 #include "rf_raid.h"
71 #include "rf_alloclist.h"
72 #include "rf_utils.h"
73 #include "rf_configure.h"
74 #include "rf_general.h"
75 #include "rf_options.h"
76 #include "rf_kintf.h"
77 #include "rf_netbsd.h"
78
79 #include <sys/types.h>
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/proc.h>
83 #include <sys/ioctl.h>
84 #include <sys/fcntl.h>
85 #include <sys/vnode.h>
86
87 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
88 static void rf_print_label_status( RF_Raid_t *, int, int, char *,
89 RF_ComponentLabel_t *);
90 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
91 RF_ComponentLabel_t *, int, int );
92
93 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
94 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
95
96 /**************************************************************************
97 *
98 * initialize the disks comprising the array
99 *
100 * We want the spare disks to have regular row,col numbers so that we can
101 * easily substitue a spare for a failed disk. But, the driver code assumes
102 * throughout that the array contains numRow by numCol _non-spare_ disks, so
103 * it's not clear how to fit in the spares. This is an unfortunate holdover
104 * from raidSim. The quick and dirty fix is to make row zero bigger than the
105 * rest, and put all the spares in it. This probably needs to get changed
106 * eventually.
107 *
108 **************************************************************************/
109
110 int
111 rf_ConfigureDisks( listp, raidPtr, cfgPtr )
112 RF_ShutdownList_t **listp;
113 RF_Raid_t *raidPtr;
114 RF_Config_t *cfgPtr;
115 {
116 RF_RaidDisk_t **disks;
117 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
118 RF_RowCol_t r, c;
119 int bs, ret;
120 unsigned i, count, foundone = 0, numFailuresThisRow;
121 int force;
122
123 force = cfgPtr->force;
124
125 ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
126 if (ret)
127 goto fail;
128
129 disks = raidPtr->Disks;
130
131 for (r = 0; r < raidPtr->numRow; r++) {
132 numFailuresThisRow = 0;
133 for (c = 0; c < raidPtr->numCol; c++) {
134 ret = rf_ConfigureDisk(raidPtr,
135 &cfgPtr->devnames[r][c][0],
136 &disks[r][c], r, c);
137
138 if (ret)
139 goto fail;
140
141 if (disks[r][c].status == rf_ds_optimal) {
142 raidread_component_label(
143 raidPtr->raid_cinfo[r][c].ci_vp,
144 &raidPtr->raid_cinfo[r][c].ci_label);
145 }
146
147 if (disks[r][c].status != rf_ds_optimal) {
148 numFailuresThisRow++;
149 } else {
150 if (disks[r][c].numBlocks < min_numblks)
151 min_numblks = disks[r][c].numBlocks;
152 DPRINTF7("Disk at row %d col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n",
153 r, c, disks[r][c].devname,
154 (long int) disks[r][c].numBlocks,
155 disks[r][c].blockSize,
156 (long int) disks[r][c].numBlocks *
157 disks[r][c].blockSize / 1024 / 1024);
158 }
159 }
160 /* XXX fix for n-fault tolerant */
161 /* XXX this should probably check to see how many failures
162 we can handle for this configuration! */
163 if (numFailuresThisRow > 0)
164 raidPtr->status[r] = rf_rs_degraded;
165 }
166
167 /* all disks must be the same size & have the same block size, bs must
168 * be a power of 2 */
169 bs = 0;
170 for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) {
171 for (c = 0; !foundone && c < raidPtr->numCol; c++) {
172 if (disks[r][c].status == rf_ds_optimal) {
173 bs = disks[r][c].blockSize;
174 foundone = 1;
175 }
176 }
177 }
178 if (!foundone) {
179 RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
180 ret = EINVAL;
181 goto fail;
182 }
183 for (count = 0, i = 1; i; i <<= 1)
184 if (bs & i)
185 count++;
186 if (count != 1) {
187 RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
188 ret = EINVAL;
189 goto fail;
190 }
191
192 if (rf_CheckLabels( raidPtr, cfgPtr )) {
193 printf("raid%d: There were fatal errors\n", raidPtr->raidid);
194 if (force != 0) {
195 printf("raid%d: Fatal errors being ignored.\n",
196 raidPtr->raidid);
197 } else {
198 ret = EINVAL;
199 goto fail;
200 }
201 }
202
203 for (r = 0; r < raidPtr->numRow; r++) {
204 for (c = 0; c < raidPtr->numCol; c++) {
205 if (disks[r][c].status == rf_ds_optimal) {
206 if (disks[r][c].blockSize != bs) {
207 RF_ERRORMSG2("Error: block size of disk at r %d c %d different from disk at r 0 c 0\n", r, c);
208 ret = EINVAL;
209 goto fail;
210 }
211 if (disks[r][c].numBlocks != min_numblks) {
212 RF_ERRORMSG3("WARNING: truncating disk at r %d c %d to %d blocks\n",
213 r, c, (int) min_numblks);
214 disks[r][c].numBlocks = min_numblks;
215 }
216 }
217 }
218 }
219
220 raidPtr->sectorsPerDisk = min_numblks;
221 raidPtr->logBytesPerSector = ffs(bs) - 1;
222 raidPtr->bytesPerSector = bs;
223 raidPtr->sectorMask = bs - 1;
224 return (0);
225
226 fail:
227
228 rf_UnconfigureVnodes( raidPtr );
229
230 return (ret);
231 }
232
233
234 /****************************************************************************
235 * set up the data structures describing the spare disks in the array
236 * recall from the above comment that the spare disk descriptors are stored
237 * in row zero, which is specially expanded to hold them.
238 ****************************************************************************/
239 int
240 rf_ConfigureSpareDisks( listp, raidPtr, cfgPtr )
241 RF_ShutdownList_t ** listp;
242 RF_Raid_t * raidPtr;
243 RF_Config_t * cfgPtr;
244 {
245 int i, ret;
246 unsigned int bs;
247 RF_RaidDisk_t *disks;
248 int num_spares_done;
249
250 num_spares_done = 0;
251
252 /* The space for the spares should have already been allocated by
253 * ConfigureDisks() */
254
255 disks = &raidPtr->Disks[0][raidPtr->numCol];
256 for (i = 0; i < raidPtr->numSpare; i++) {
257 ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
258 &disks[i], 0, raidPtr->numCol + i);
259 if (ret)
260 goto fail;
261 if (disks[i].status != rf_ds_optimal) {
262 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
263 &cfgPtr->spare_names[i][0]);
264 } else {
265 disks[i].status = rf_ds_spare; /* change status to
266 * spare */
267 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", i,
268 disks[i].devname,
269 (long int) disks[i].numBlocks, disks[i].blockSize,
270 (long int) disks[i].numBlocks *
271 disks[i].blockSize / 1024 / 1024);
272 }
273 num_spares_done++;
274 }
275
276 /* check sizes and block sizes on spare disks */
277 bs = 1 << raidPtr->logBytesPerSector;
278 for (i = 0; i < raidPtr->numSpare; i++) {
279 if (disks[i].blockSize != bs) {
280 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
281 ret = EINVAL;
282 goto fail;
283 }
284 if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
285 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
286 disks[i].devname, disks[i].blockSize,
287 (long int) raidPtr->sectorsPerDisk);
288 ret = EINVAL;
289 goto fail;
290 } else
291 if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
292 RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[i].devname, (long int) raidPtr->sectorsPerDisk);
293
294 disks[i].numBlocks = raidPtr->sectorsPerDisk;
295 }
296 }
297
298 return (0);
299
300 fail:
301
302 /* Release the hold on the main components. We've failed to allocate
303 * a spare, and since we're failing, we need to free things..
304
305 XXX failing to allocate a spare is *not* that big of a deal...
306 We *can* survive without it, if need be, esp. if we get hot
307 adding working.
308
309 If we don't fail out here, then we need a way to remove this spare...
310 that should be easier to do here than if we are "live"...
311
312 */
313
314 rf_UnconfigureVnodes( raidPtr );
315
316 return (ret);
317 }
318
319 static int
320 rf_AllocDiskStructures(raidPtr, cfgPtr)
321 RF_Raid_t *raidPtr;
322 RF_Config_t *cfgPtr;
323 {
324 RF_RaidDisk_t **disks;
325 int ret;
326 int r;
327
328 RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *),
329 (RF_RaidDisk_t **), raidPtr->cleanupList);
330 if (disks == NULL) {
331 ret = ENOMEM;
332 goto fail;
333 }
334 raidPtr->Disks = disks;
335 /* get space for the device-specific stuff... */
336 RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
337 sizeof(struct raidcinfo *), (struct raidcinfo **),
338 raidPtr->cleanupList);
339 if (raidPtr->raid_cinfo == NULL) {
340 ret = ENOMEM;
341 goto fail;
342 }
343
344 for (r = 0; r < raidPtr->numRow; r++) {
345 /* We allocate RF_MAXSPARE on the first row so that we
346 have room to do hot-swapping of spares */
347 RF_CallocAndAdd(disks[r], raidPtr->numCol
348 + ((r == 0) ? RF_MAXSPARE : 0),
349 sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
350 raidPtr->cleanupList);
351 if (disks[r] == NULL) {
352 ret = ENOMEM;
353 goto fail;
354 }
355 /* get more space for device specific stuff.. */
356 RF_CallocAndAdd(raidPtr->raid_cinfo[r],
357 raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0),
358 sizeof(struct raidcinfo), (struct raidcinfo *),
359 raidPtr->cleanupList);
360 if (raidPtr->raid_cinfo[r] == NULL) {
361 ret = ENOMEM;
362 goto fail;
363 }
364 }
365 return(0);
366 fail:
367 rf_UnconfigureVnodes( raidPtr );
368
369 return(ret);
370 }
371
372
373 /* configure a single disk during auto-configuration at boot */
374 int
375 rf_AutoConfigureDisks(raidPtr, cfgPtr, auto_config)
376 RF_Raid_t *raidPtr;
377 RF_Config_t *cfgPtr;
378 RF_AutoConfig_t *auto_config;
379 {
380 RF_RaidDisk_t **disks;
381 RF_RaidDisk_t *diskPtr;
382 RF_RowCol_t r, c;
383 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
384 int bs, ret;
385 int numFailuresThisRow;
386 int force;
387 RF_AutoConfig_t *ac;
388 int parity_good;
389 int mod_counter;
390 int mod_counter_found;
391
392 #if DEBUG
393 printf("Starting autoconfiguration of RAID set...\n");
394 #endif
395 force = cfgPtr->force;
396
397 ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
398 if (ret)
399 goto fail;
400
401 disks = raidPtr->Disks;
402
403 /* assume the parity will be fine.. */
404 parity_good = RF_RAID_CLEAN;
405
406 /* Check for mod_counters that are too low */
407 mod_counter_found = 0;
408 mod_counter = 0;
409 ac = auto_config;
410 while(ac!=NULL) {
411 if (mod_counter_found==0) {
412 mod_counter = ac->clabel->mod_counter;
413 mod_counter_found = 1;
414 } else {
415 if (ac->clabel->mod_counter > mod_counter) {
416 mod_counter = ac->clabel->mod_counter;
417 }
418 }
419 ac->flag = 0; /* clear the general purpose flag */
420 ac = ac->next;
421 }
422
423 bs = 0;
424 for (r = 0; r < raidPtr->numRow; r++) {
425 numFailuresThisRow = 0;
426 for (c = 0; c < raidPtr->numCol; c++) {
427 diskPtr = &disks[r][c];
428
429 /* find this row/col in the autoconfig */
430 #if DEBUG
431 printf("Looking for %d,%d in autoconfig\n",r,c);
432 #endif
433 ac = auto_config;
434 while(ac!=NULL) {
435 if (ac->clabel==NULL) {
436 /* big-time bad news. */
437 goto fail;
438 }
439 if ((ac->clabel->row == r) &&
440 (ac->clabel->column == c) &&
441 (ac->clabel->mod_counter == mod_counter)) {
442 /* it's this one... */
443 /* flag it as 'used', so we don't
444 free it later. */
445 ac->flag = 1;
446 #if DEBUG
447 printf("Found: %s at %d,%d\n",
448 ac->devname,r,c);
449 #endif
450
451 break;
452 }
453 ac=ac->next;
454 }
455
456 if (ac==NULL) {
457 /* we didn't find an exact match with a
458 correct mod_counter above... can we
459 find one with an incorrect mod_counter
460 to use instead? (this one, if we find
461 it, will be marked as failed once the
462 set configures)
463 */
464
465 ac = auto_config;
466 while(ac!=NULL) {
467 if (ac->clabel==NULL) {
468 /* big-time bad news. */
469 goto fail;
470 }
471 if ((ac->clabel->row == r) &&
472 (ac->clabel->column == c)) {
473 /* it's this one...
474 flag it as 'used', so we
475 don't free it later. */
476 ac->flag = 1;
477 #if DEBUG
478 printf("Found(low mod_counter): %s at %d,%d\n",
479 ac->devname,r,c);
480 #endif
481
482 break;
483 }
484 ac=ac->next;
485 }
486 }
487
488
489
490 if (ac!=NULL) {
491 /* Found it. Configure it.. */
492 diskPtr->blockSize = ac->clabel->blockSize;
493 diskPtr->numBlocks = ac->clabel->numBlocks;
494 /* Note: rf_protectedSectors is already
495 factored into numBlocks here */
496 raidPtr->raid_cinfo[r][c].ci_vp = ac->vp;
497
498 memcpy(&raidPtr->raid_cinfo[r][c].ci_label,
499 ac->clabel, sizeof(*ac->clabel));
500 sprintf(diskPtr->devname, "/dev/%s",
501 ac->devname);
502
503 /* note the fact that this component was
504 autoconfigured. You'll need this info
505 later. Trust me :) */
506 diskPtr->auto_configured = 1;
507
508 /*
509 * we allow the user to specify that
510 * only a fraction of the disks should
511 * be used this is just for debug: it
512 * speeds up the parity scan
513 */
514
515 diskPtr->numBlocks = diskPtr->numBlocks *
516 rf_sizePercentage / 100;
517
518 /* XXX these will get set multiple times,
519 but since we're autoconfiguring, they'd
520 better be always the same each time!
521 If not, this is the least of your worries */
522
523 bs = diskPtr->blockSize;
524 min_numblks = diskPtr->numBlocks;
525
526 /* this gets done multiple times, but that's
527 fine -- the serial number will be the same
528 for all components, guaranteed */
529 raidPtr->serial_number =
530 ac->clabel->serial_number;
531 /* check the last time the label
532 was modified */
533 if (ac->clabel->mod_counter !=
534 mod_counter) {
535 /* Even though we've filled in all
536 of the above, we don't trust
537 this component since it's
538 modification counter is not
539 in sync with the rest, and we really
540 consider it to be failed. */
541 disks[r][c].status = rf_ds_failed;
542 numFailuresThisRow++;
543 } else {
544 if (ac->clabel->clean !=
545 RF_RAID_CLEAN) {
546 parity_good = RF_RAID_DIRTY;
547 }
548 }
549 } else {
550 /* Didn't find it at all!!
551 Component must really be dead */
552 disks[r][c].status = rf_ds_failed;
553 sprintf(disks[r][c].devname,"component%d",
554 r * raidPtr->numCol + c);
555 numFailuresThisRow++;
556 }
557 }
558 /* XXX fix for n-fault tolerant */
559 /* XXX this should probably check to see how many failures
560 we can handle for this configuration! */
561 if (numFailuresThisRow > 0)
562 raidPtr->status[r] = rf_rs_degraded;
563 }
564
565 /* close the device for the ones that didn't get used */
566
567 ac = auto_config;
568 while(ac!=NULL) {
569 if (ac->flag == 0) {
570 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
571 VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED, 0);
572 vput(ac->vp);
573 ac->vp = NULL;
574 #if DEBUG
575 printf("Released %s from auto-config set.\n",
576 ac->devname);
577 #endif
578 }
579 ac = ac->next;
580 }
581
582 raidPtr->mod_counter = mod_counter;
583
584 /* note the state of the parity, if any */
585 raidPtr->parity_good = parity_good;
586 raidPtr->sectorsPerDisk = min_numblks;
587 raidPtr->logBytesPerSector = ffs(bs) - 1;
588 raidPtr->bytesPerSector = bs;
589 raidPtr->sectorMask = bs - 1;
590 return (0);
591
592 fail:
593
594 rf_UnconfigureVnodes( raidPtr );
595
596 return (ret);
597
598 }
599
600 /* configure a single disk in the array */
601 int
602 rf_ConfigureDisk(raidPtr, buf, diskPtr, row, col)
603 RF_Raid_t *raidPtr;
604 char *buf;
605 RF_RaidDisk_t *diskPtr;
606 RF_RowCol_t row;
607 RF_RowCol_t col;
608 {
609 char *p;
610 int retcode;
611
612 struct partinfo dpart;
613 struct vnode *vp;
614 struct vattr va;
615 struct proc *proc;
616 int error;
617
618 retcode = 0;
619 p = rf_find_non_white(buf);
620 if (p[strlen(p) - 1] == '\n') {
621 /* strip off the newline */
622 p[strlen(p) - 1] = '\0';
623 }
624 (void) strcpy(diskPtr->devname, p);
625
626 proc = raidPtr->engine_thread;
627
628 /* Let's start by claiming the component is fine and well... */
629 diskPtr->status = rf_ds_optimal;
630
631 raidPtr->raid_cinfo[row][col].ci_vp = NULL;
632
633 error = raidlookup(diskPtr->devname, proc, &vp);
634 if (error) {
635 printf("raidlookup on device: %s failed!\n", diskPtr->devname);
636 if (error == ENXIO) {
637 /* the component isn't there... must be dead :-( */
638 diskPtr->status = rf_ds_failed;
639 } else {
640 return (error);
641 }
642 }
643 if (diskPtr->status == rf_ds_optimal) {
644
645 if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
646 return (error);
647 }
648 error = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart,
649 FREAD, proc->p_ucred, proc);
650 if (error) {
651 return (error);
652 }
653
654 diskPtr->blockSize = dpart.disklab->d_secsize;
655
656 diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
657 diskPtr->partitionSize = dpart.part->p_size;
658
659 raidPtr->raid_cinfo[row][col].ci_vp = vp;
660
661 /* This component was not automatically configured */
662 diskPtr->auto_configured = 0;
663
664 /* we allow the user to specify that only a fraction of the
665 * disks should be used this is just for debug: it speeds up
666 * the parity scan */
667 diskPtr->numBlocks = diskPtr->numBlocks *
668 rf_sizePercentage / 100;
669 }
670 return (0);
671 }
672
673 static void
674 rf_print_label_status( raidPtr, row, column, dev_name, ci_label )
675 RF_Raid_t *raidPtr;
676 int row;
677 int column;
678 char *dev_name;
679 RF_ComponentLabel_t *ci_label;
680 {
681
682 printf("raid%d: Component %s being configured at row: %d col: %d\n",
683 raidPtr->raidid, dev_name, row, column );
684 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
685 ci_label->row, ci_label->column,
686 ci_label->num_rows, ci_label->num_columns);
687 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
688 ci_label->version, ci_label->serial_number,
689 ci_label->mod_counter);
690 printf(" Clean: %s Status: %d\n",
691 ci_label->clean ? "Yes" : "No", ci_label->status );
692 }
693
694 static int rf_check_label_vitals( raidPtr, row, column, dev_name, ci_label,
695 serial_number, mod_counter )
696 RF_Raid_t *raidPtr;
697 int row;
698 int column;
699 char *dev_name;
700 RF_ComponentLabel_t *ci_label;
701 int serial_number;
702 int mod_counter;
703 {
704 int fatal_error = 0;
705
706 if (serial_number != ci_label->serial_number) {
707 printf("%s has a different serial number: %d %d\n",
708 dev_name, serial_number, ci_label->serial_number);
709 fatal_error = 1;
710 }
711 if (mod_counter != ci_label->mod_counter) {
712 printf("%s has a different modfication count: %d %d\n",
713 dev_name, mod_counter, ci_label->mod_counter);
714 }
715
716 if (row != ci_label->row) {
717 printf("Row out of alignment for: %s\n", dev_name);
718 fatal_error = 1;
719 }
720 if (column != ci_label->column) {
721 printf("Column out of alignment for: %s\n", dev_name);
722 fatal_error = 1;
723 }
724 if (raidPtr->numRow != ci_label->num_rows) {
725 printf("Number of rows do not match for: %s\n", dev_name);
726 fatal_error = 1;
727 }
728 if (raidPtr->numCol != ci_label->num_columns) {
729 printf("Number of columns do not match for: %s\n", dev_name);
730 fatal_error = 1;
731 }
732 if (ci_label->clean == 0) {
733 /* it's not clean, but that's not fatal */
734 printf("%s is not clean!\n", dev_name);
735 }
736 return(fatal_error);
737 }
738
739
740 /*
741
742 rf_CheckLabels() - check all the component labels for consistency.
743 Return an error if there is anything major amiss.
744
745 */
746
747 int
748 rf_CheckLabels( raidPtr, cfgPtr )
749 RF_Raid_t *raidPtr;
750 RF_Config_t *cfgPtr;
751 {
752 int r,c;
753 char *dev_name;
754 RF_ComponentLabel_t *ci_label;
755 int serial_number = 0;
756 int mod_number = 0;
757 int fatal_error = 0;
758 int mod_values[4];
759 int mod_count[4];
760 int ser_values[4];
761 int ser_count[4];
762 int num_ser;
763 int num_mod;
764 int i;
765 int found;
766 int hosed_row;
767 int hosed_column;
768 int too_fatal;
769 int parity_good;
770 int force;
771
772 hosed_row = -1;
773 hosed_column = -1;
774 too_fatal = 0;
775 force = cfgPtr->force;
776
777 /*
778 We're going to try to be a little intelligent here. If one
779 component's label is bogus, and we can identify that it's the
780 *only* one that's gone, we'll mark it as "failed" and allow
781 the configuration to proceed. This will be the *only* case
782 that we'll proceed if there would be (otherwise) fatal errors.
783
784 Basically we simply keep a count of how many components had
785 what serial number. If all but one agree, we simply mark
786 the disagreeing component as being failed, and allow
787 things to come up "normally".
788
789 We do this first for serial numbers, and then for "mod_counter".
790
791 */
792
793 num_ser = 0;
794 num_mod = 0;
795 for (r = 0; r < raidPtr->numRow && !fatal_error ; r++) {
796 for (c = 0; c < raidPtr->numCol; c++) {
797 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
798 found=0;
799 for(i=0;i<num_ser;i++) {
800 if (ser_values[i] == ci_label->serial_number) {
801 ser_count[i]++;
802 found=1;
803 break;
804 }
805 }
806 if (!found) {
807 ser_values[num_ser] = ci_label->serial_number;
808 ser_count[num_ser] = 1;
809 num_ser++;
810 if (num_ser>2) {
811 fatal_error = 1;
812 break;
813 }
814 }
815 found=0;
816 for(i=0;i<num_mod;i++) {
817 if (mod_values[i] == ci_label->mod_counter) {
818 mod_count[i]++;
819 found=1;
820 break;
821 }
822 }
823 if (!found) {
824 mod_values[num_mod] = ci_label->mod_counter;
825 mod_count[num_mod] = 1;
826 num_mod++;
827 if (num_mod>2) {
828 fatal_error = 1;
829 break;
830 }
831 }
832 }
833 }
834 #if DEBUG
835 printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
836 for(i=0;i<num_ser;i++) {
837 printf("%d %d\n", ser_values[i], ser_count[i]);
838 }
839 printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
840 for(i=0;i<num_mod;i++) {
841 printf("%d %d\n", mod_values[i], mod_count[i]);
842 }
843 #endif
844 serial_number = ser_values[0];
845 if (num_ser == 2) {
846 if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
847 /* Locate the maverick component */
848 if (ser_count[1] > ser_count[0]) {
849 serial_number = ser_values[1];
850 }
851 for (r = 0; r < raidPtr->numRow; r++) {
852 for (c = 0; c < raidPtr->numCol; c++) {
853 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
854 if (serial_number !=
855 ci_label->serial_number) {
856 hosed_row = r;
857 hosed_column = c;
858 break;
859 }
860 }
861 }
862 printf("Hosed component: %s\n",
863 &cfgPtr->devnames[hosed_row][hosed_column][0]);
864 if (!force) {
865 /* we'll fail this component, as if there are
866 other major errors, we arn't forcing things
867 and we'll abort the config anyways */
868 raidPtr->Disks[hosed_row][hosed_column].status
869 = rf_ds_failed;
870 raidPtr->numFailures++;
871 raidPtr->status[hosed_row] = rf_rs_degraded;
872 }
873 } else {
874 too_fatal = 1;
875 }
876 if (cfgPtr->parityConfig == '0') {
877 /* We've identified two different serial numbers.
878 RAID 0 can't cope with that, so we'll punt */
879 too_fatal = 1;
880 }
881
882 }
883
884 /* record the serial number for later. If we bail later, setting
885 this doesn't matter, otherwise we've got the best guess at the
886 correct serial number */
887 raidPtr->serial_number = serial_number;
888
889 mod_number = mod_values[0];
890 if (num_mod == 2) {
891 if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
892 /* Locate the maverick component */
893 if (mod_count[1] > mod_count[0]) {
894 mod_number = mod_values[1];
895 } else if (mod_count[1] < mod_count[0]) {
896 mod_number = mod_values[0];
897 } else {
898 /* counts of different modification values
899 are the same. Assume greater value is
900 the correct one, all other things
901 considered */
902 if (mod_values[0] > mod_values[1]) {
903 mod_number = mod_values[0];
904 } else {
905 mod_number = mod_values[1];
906 }
907
908 }
909 for (r = 0; r < raidPtr->numRow && !too_fatal ; r++) {
910 for (c = 0; c < raidPtr->numCol; c++) {
911 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
912 if (mod_number !=
913 ci_label->mod_counter) {
914 if ( ( hosed_row == r ) &&
915 ( hosed_column == c )) {
916 /* same one. Can
917 deal with it. */
918 } else {
919 hosed_row = r;
920 hosed_column = c;
921 if (num_ser != 1) {
922 too_fatal = 1;
923 break;
924 }
925 }
926 }
927 }
928 }
929 printf("Hosed component: %s\n",
930 &cfgPtr->devnames[hosed_row][hosed_column][0]);
931 if (!force) {
932 /* we'll fail this component, as if there are
933 other major errors, we arn't forcing things
934 and we'll abort the config anyways */
935 if (raidPtr->Disks[hosed_row][hosed_column].status != rf_ds_failed) {
936 raidPtr->Disks[hosed_row][hosed_column].status
937 = rf_ds_failed;
938 raidPtr->numFailures++;
939 raidPtr->status[hosed_row] = rf_rs_degraded;
940 }
941 }
942 } else {
943 too_fatal = 1;
944 }
945 if (cfgPtr->parityConfig == '0') {
946 /* We've identified two different mod counters.
947 RAID 0 can't cope with that, so we'll punt */
948 too_fatal = 1;
949 }
950 }
951
952 raidPtr->mod_counter = mod_number;
953
954 if (too_fatal) {
955 /* we've had both a serial number mismatch, and a mod_counter
956 mismatch -- and they involved two different components!!
957 Bail -- make things fail so that the user must force
958 the issue... */
959 hosed_row = -1;
960 hosed_column = -1;
961 }
962
963 if (num_ser > 2) {
964 printf("raid%d: Too many different serial numbers!\n",
965 raidPtr->raidid);
966 }
967
968 if (num_mod > 2) {
969 printf("raid%d: Too many different mod counters!\n",
970 raidPtr->raidid);
971 }
972
973 /* we start by assuming the parity will be good, and flee from
974 that notion at the slightest sign of trouble */
975
976 parity_good = RF_RAID_CLEAN;
977 for (r = 0; r < raidPtr->numRow; r++) {
978 for (c = 0; c < raidPtr->numCol; c++) {
979 dev_name = &cfgPtr->devnames[r][c][0];
980 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
981
982 if ((r == hosed_row) && (c == hosed_column)) {
983 printf("raid%d: Ignoring %s\n",
984 raidPtr->raidid, dev_name);
985 } else {
986 rf_print_label_status( raidPtr, r, c,
987 dev_name, ci_label );
988 if (rf_check_label_vitals( raidPtr, r, c,
989 dev_name, ci_label,
990 serial_number,
991 mod_number )) {
992 fatal_error = 1;
993 }
994 if (ci_label->clean != RF_RAID_CLEAN) {
995 parity_good = RF_RAID_DIRTY;
996 }
997 }
998 }
999 }
1000 if (fatal_error) {
1001 parity_good = RF_RAID_DIRTY;
1002 }
1003
1004 /* we note the state of the parity */
1005 raidPtr->parity_good = parity_good;
1006
1007 return(fatal_error);
1008 }
1009
1010 int
1011 rf_add_hot_spare(raidPtr, sparePtr)
1012 RF_Raid_t *raidPtr;
1013 RF_SingleComponent_t *sparePtr;
1014 {
1015 RF_RaidDisk_t *disks;
1016 RF_DiskQueue_t *spareQueues;
1017 int ret;
1018 unsigned int bs;
1019 int spare_number;
1020
1021 #if 0
1022 printf("Just in rf_add_hot_spare: %d\n",raidPtr->numSpare);
1023 printf("Num col: %d\n",raidPtr->numCol);
1024 #endif
1025 if (raidPtr->numSpare >= RF_MAXSPARE) {
1026 RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
1027 return(EINVAL);
1028 }
1029
1030 RF_LOCK_MUTEX(raidPtr->mutex);
1031
1032 /* the beginning of the spares... */
1033 disks = &raidPtr->Disks[0][raidPtr->numCol];
1034
1035 spare_number = raidPtr->numSpare;
1036
1037 ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
1038 &disks[spare_number], 0,
1039 raidPtr->numCol + spare_number);
1040
1041 if (ret)
1042 goto fail;
1043 if (disks[spare_number].status != rf_ds_optimal) {
1044 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
1045 sparePtr->component_name);
1046 ret=EINVAL;
1047 goto fail;
1048 } else {
1049 disks[spare_number].status = rf_ds_spare;
1050 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", spare_number,
1051 disks[spare_number].devname,
1052 (long int) disks[spare_number].numBlocks,
1053 disks[spare_number].blockSize,
1054 (long int) disks[spare_number].numBlocks *
1055 disks[spare_number].blockSize / 1024 / 1024);
1056 }
1057
1058
1059 /* check sizes and block sizes on the spare disk */
1060 bs = 1 << raidPtr->logBytesPerSector;
1061 if (disks[spare_number].blockSize != bs) {
1062 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
1063 ret = EINVAL;
1064 goto fail;
1065 }
1066 if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
1067 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
1068 disks[spare_number].devname,
1069 disks[spare_number].blockSize,
1070 (long int) raidPtr->sectorsPerDisk);
1071 ret = EINVAL;
1072 goto fail;
1073 } else {
1074 if (disks[spare_number].numBlocks >
1075 raidPtr->sectorsPerDisk) {
1076 RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[spare_number].devname,
1077 (long int) raidPtr->sectorsPerDisk);
1078
1079 disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
1080 }
1081 }
1082
1083 spareQueues = &raidPtr->Queues[0][raidPtr->numCol];
1084 ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
1085 0, raidPtr->numCol + spare_number,
1086 raidPtr->qType,
1087 raidPtr->sectorsPerDisk,
1088 raidPtr->maxOutstanding,
1089 &raidPtr->shutdownList,
1090 raidPtr->cleanupList);
1091
1092
1093 raidPtr->numSpare++;
1094 RF_UNLOCK_MUTEX(raidPtr->mutex);
1095 return (0);
1096
1097 fail:
1098 RF_UNLOCK_MUTEX(raidPtr->mutex);
1099 return(ret);
1100 }
1101
1102 int
1103 rf_remove_hot_spare(raidPtr,sparePtr)
1104 RF_Raid_t *raidPtr;
1105 RF_SingleComponent_t *sparePtr;
1106 {
1107 int spare_number;
1108
1109
1110 if (raidPtr->numSpare==0) {
1111 printf("No spares to remove!\n");
1112 return(EINVAL);
1113 }
1114
1115 spare_number = sparePtr->column;
1116
1117 return(EINVAL); /* XXX not implemented yet */
1118 #if 0
1119 if (spare_number < 0 || spare_number > raidPtr->numSpare) {
1120 return(EINVAL);
1121 }
1122
1123 /* verify that this spare isn't in use... */
1124
1125
1126
1127
1128 /* it's gone.. */
1129
1130 raidPtr->numSpare--;
1131
1132 return(0);
1133 #endif
1134 }
1135
1136
1137 int
1138 rf_delete_component(raidPtr,component)
1139 RF_Raid_t *raidPtr;
1140 RF_SingleComponent_t *component;
1141 {
1142 RF_RaidDisk_t *disks;
1143
1144 if ((component->row < 0) ||
1145 (component->row >= raidPtr->numRow) ||
1146 (component->column < 0) ||
1147 (component->column >= raidPtr->numCol)) {
1148 return(EINVAL);
1149 }
1150
1151 disks = &raidPtr->Disks[component->row][component->column];
1152
1153 /* 1. This component must be marked as 'failed' */
1154
1155 return(EINVAL); /* Not implemented yet. */
1156 }
1157
1158 int
1159 rf_incorporate_hot_spare(raidPtr,component)
1160 RF_Raid_t *raidPtr;
1161 RF_SingleComponent_t *component;
1162 {
1163
1164 /* Issues here include how to 'move' this in if there is IO
1165 taking place (e.g. component queues and such) */
1166
1167 return(EINVAL); /* Not implemented yet. */
1168 }
1169