rf_disks.c revision 1.32 1 /* $NetBSD: rf_disks.c,v 1.32 2000/09/08 01:36:35 oster Exp $ */
2 /*-
3 * Copyright (c) 1999 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1995 Carnegie-Mellon University.
40 * All rights reserved.
41 *
42 * Author: Mark Holland
43 *
44 * Permission to use, copy, modify and distribute this software and
45 * its documentation is hereby granted, provided that both the copyright
46 * notice and this permission notice appear in all copies of the
47 * software, derivative works or modified versions, and any portions
48 * thereof, and that both notices appear in supporting documentation.
49 *
50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53 *
54 * Carnegie Mellon requests users of this software to return to
55 *
56 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
57 * School of Computer Science
58 * Carnegie Mellon University
59 * Pittsburgh PA 15213-3890
60 *
61 * any improvements or extensions that they make and grant Carnegie the
62 * rights to redistribute these changes.
63 */
64
65 /***************************************************************
66 * rf_disks.c -- code to perform operations on the actual disks
67 ***************************************************************/
68
69 #include "rf_types.h"
70 #include "rf_raid.h"
71 #include "rf_alloclist.h"
72 #include "rf_utils.h"
73 #include "rf_configure.h"
74 #include "rf_general.h"
75 #include "rf_options.h"
76 #include "rf_kintf.h"
77 #include "rf_netbsd.h"
78
79 #include <sys/types.h>
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/proc.h>
83 #include <sys/ioctl.h>
84 #include <sys/fcntl.h>
85 #include <sys/vnode.h>
86
87 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
88 static void rf_print_label_status( RF_Raid_t *, int, int, char *,
89 RF_ComponentLabel_t *);
90 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
91 RF_ComponentLabel_t *, int, int );
92
93 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
94 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
95
96 /**************************************************************************
97 *
98 * initialize the disks comprising the array
99 *
100 * We want the spare disks to have regular row,col numbers so that we can
101 * easily substitue a spare for a failed disk. But, the driver code assumes
102 * throughout that the array contains numRow by numCol _non-spare_ disks, so
103 * it's not clear how to fit in the spares. This is an unfortunate holdover
104 * from raidSim. The quick and dirty fix is to make row zero bigger than the
105 * rest, and put all the spares in it. This probably needs to get changed
106 * eventually.
107 *
108 **************************************************************************/
109
110 int
111 rf_ConfigureDisks( listp, raidPtr, cfgPtr )
112 RF_ShutdownList_t **listp;
113 RF_Raid_t *raidPtr;
114 RF_Config_t *cfgPtr;
115 {
116 RF_RaidDisk_t **disks;
117 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
118 RF_RowCol_t r, c;
119 int bs, ret;
120 unsigned i, count, foundone = 0, numFailuresThisRow;
121 int force;
122
123 force = cfgPtr->force;
124
125 ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
126 if (ret)
127 goto fail;
128
129 disks = raidPtr->Disks;
130
131 for (r = 0; r < raidPtr->numRow; r++) {
132 numFailuresThisRow = 0;
133 for (c = 0; c < raidPtr->numCol; c++) {
134 ret = rf_ConfigureDisk(raidPtr,
135 &cfgPtr->devnames[r][c][0],
136 &disks[r][c], r, c);
137
138 if (ret)
139 goto fail;
140
141 if (disks[r][c].status == rf_ds_optimal) {
142 raidread_component_label(
143 raidPtr->raid_cinfo[r][c].ci_dev,
144 raidPtr->raid_cinfo[r][c].ci_vp,
145 &raidPtr->raid_cinfo[r][c].ci_label);
146 }
147
148 if (disks[r][c].status != rf_ds_optimal) {
149 numFailuresThisRow++;
150 } else {
151 if (disks[r][c].numBlocks < min_numblks)
152 min_numblks = disks[r][c].numBlocks;
153 DPRINTF7("Disk at row %d col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n",
154 r, c, disks[r][c].devname,
155 (long int) disks[r][c].numBlocks,
156 disks[r][c].blockSize,
157 (long int) disks[r][c].numBlocks *
158 disks[r][c].blockSize / 1024 / 1024);
159 }
160 }
161 /* XXX fix for n-fault tolerant */
162 /* XXX this should probably check to see how many failures
163 we can handle for this configuration! */
164 if (numFailuresThisRow > 0)
165 raidPtr->status[r] = rf_rs_degraded;
166 }
167
168 /* all disks must be the same size & have the same block size, bs must
169 * be a power of 2 */
170 bs = 0;
171 for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) {
172 for (c = 0; !foundone && c < raidPtr->numCol; c++) {
173 if (disks[r][c].status == rf_ds_optimal) {
174 bs = disks[r][c].blockSize;
175 foundone = 1;
176 }
177 }
178 }
179 if (!foundone) {
180 RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
181 ret = EINVAL;
182 goto fail;
183 }
184 for (count = 0, i = 1; i; i <<= 1)
185 if (bs & i)
186 count++;
187 if (count != 1) {
188 RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
189 ret = EINVAL;
190 goto fail;
191 }
192
193 if (rf_CheckLabels( raidPtr, cfgPtr )) {
194 printf("raid%d: There were fatal errors\n", raidPtr->raidid);
195 if (force != 0) {
196 printf("raid%d: Fatal errors being ignored.\n",
197 raidPtr->raidid);
198 } else {
199 ret = EINVAL;
200 goto fail;
201 }
202 }
203
204 for (r = 0; r < raidPtr->numRow; r++) {
205 for (c = 0; c < raidPtr->numCol; c++) {
206 if (disks[r][c].status == rf_ds_optimal) {
207 if (disks[r][c].blockSize != bs) {
208 RF_ERRORMSG2("Error: block size of disk at r %d c %d different from disk at r 0 c 0\n", r, c);
209 ret = EINVAL;
210 goto fail;
211 }
212 if (disks[r][c].numBlocks != min_numblks) {
213 RF_ERRORMSG3("WARNING: truncating disk at r %d c %d to %d blocks\n",
214 r, c, (int) min_numblks);
215 disks[r][c].numBlocks = min_numblks;
216 }
217 }
218 }
219 }
220
221 raidPtr->sectorsPerDisk = min_numblks;
222 raidPtr->logBytesPerSector = ffs(bs) - 1;
223 raidPtr->bytesPerSector = bs;
224 raidPtr->sectorMask = bs - 1;
225 return (0);
226
227 fail:
228
229 rf_UnconfigureVnodes( raidPtr );
230
231 return (ret);
232 }
233
234
235 /****************************************************************************
236 * set up the data structures describing the spare disks in the array
237 * recall from the above comment that the spare disk descriptors are stored
238 * in row zero, which is specially expanded to hold them.
239 ****************************************************************************/
240 int
241 rf_ConfigureSpareDisks( listp, raidPtr, cfgPtr )
242 RF_ShutdownList_t ** listp;
243 RF_Raid_t * raidPtr;
244 RF_Config_t * cfgPtr;
245 {
246 int i, ret;
247 unsigned int bs;
248 RF_RaidDisk_t *disks;
249 int num_spares_done;
250
251 num_spares_done = 0;
252
253 /* The space for the spares should have already been allocated by
254 * ConfigureDisks() */
255
256 disks = &raidPtr->Disks[0][raidPtr->numCol];
257 for (i = 0; i < raidPtr->numSpare; i++) {
258 ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
259 &disks[i], 0, raidPtr->numCol + i);
260 if (ret)
261 goto fail;
262 if (disks[i].status != rf_ds_optimal) {
263 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
264 &cfgPtr->spare_names[i][0]);
265 } else {
266 disks[i].status = rf_ds_spare; /* change status to
267 * spare */
268 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", i,
269 disks[i].devname,
270 (long int) disks[i].numBlocks, disks[i].blockSize,
271 (long int) disks[i].numBlocks *
272 disks[i].blockSize / 1024 / 1024);
273 }
274 num_spares_done++;
275 }
276
277 /* check sizes and block sizes on spare disks */
278 bs = 1 << raidPtr->logBytesPerSector;
279 for (i = 0; i < raidPtr->numSpare; i++) {
280 if (disks[i].blockSize != bs) {
281 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
282 ret = EINVAL;
283 goto fail;
284 }
285 if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
286 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
287 disks[i].devname, disks[i].blockSize,
288 (long int) raidPtr->sectorsPerDisk);
289 ret = EINVAL;
290 goto fail;
291 } else
292 if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
293 RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[i].devname, (long int) raidPtr->sectorsPerDisk);
294
295 disks[i].numBlocks = raidPtr->sectorsPerDisk;
296 }
297 }
298
299 return (0);
300
301 fail:
302
303 /* Release the hold on the main components. We've failed to allocate
304 * a spare, and since we're failing, we need to free things..
305
306 XXX failing to allocate a spare is *not* that big of a deal...
307 We *can* survive without it, if need be, esp. if we get hot
308 adding working.
309
310 If we don't fail out here, then we need a way to remove this spare...
311 that should be easier to do here than if we are "live"...
312
313 */
314
315 rf_UnconfigureVnodes( raidPtr );
316
317 return (ret);
318 }
319
320 static int
321 rf_AllocDiskStructures(raidPtr, cfgPtr)
322 RF_Raid_t *raidPtr;
323 RF_Config_t *cfgPtr;
324 {
325 RF_RaidDisk_t **disks;
326 int ret;
327 int r;
328
329 RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *),
330 (RF_RaidDisk_t **), raidPtr->cleanupList);
331 if (disks == NULL) {
332 ret = ENOMEM;
333 goto fail;
334 }
335 raidPtr->Disks = disks;
336 /* get space for the device-specific stuff... */
337 RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
338 sizeof(struct raidcinfo *), (struct raidcinfo **),
339 raidPtr->cleanupList);
340 if (raidPtr->raid_cinfo == NULL) {
341 ret = ENOMEM;
342 goto fail;
343 }
344
345 for (r = 0; r < raidPtr->numRow; r++) {
346 /* We allocate RF_MAXSPARE on the first row so that we
347 have room to do hot-swapping of spares */
348 RF_CallocAndAdd(disks[r], raidPtr->numCol
349 + ((r == 0) ? RF_MAXSPARE : 0),
350 sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
351 raidPtr->cleanupList);
352 if (disks[r] == NULL) {
353 ret = ENOMEM;
354 goto fail;
355 }
356 /* get more space for device specific stuff.. */
357 RF_CallocAndAdd(raidPtr->raid_cinfo[r],
358 raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0),
359 sizeof(struct raidcinfo), (struct raidcinfo *),
360 raidPtr->cleanupList);
361 if (raidPtr->raid_cinfo[r] == NULL) {
362 ret = ENOMEM;
363 goto fail;
364 }
365 }
366 return(0);
367 fail:
368 rf_UnconfigureVnodes( raidPtr );
369
370 return(ret);
371 }
372
373
374 /* configure a single disk during auto-configuration at boot */
375 int
376 rf_AutoConfigureDisks(raidPtr, cfgPtr, auto_config)
377 RF_Raid_t *raidPtr;
378 RF_Config_t *cfgPtr;
379 RF_AutoConfig_t *auto_config;
380 {
381 RF_RaidDisk_t **disks;
382 RF_RaidDisk_t *diskPtr;
383 RF_RowCol_t r, c;
384 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
385 int bs, ret;
386 int numFailuresThisRow;
387 int force;
388 RF_AutoConfig_t *ac;
389 int parity_good;
390 int mod_counter;
391 int mod_counter_found;
392
393 #if DEBUG
394 printf("Starting autoconfiguration of RAID set...\n");
395 #endif
396 force = cfgPtr->force;
397
398 ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
399 if (ret)
400 goto fail;
401
402 disks = raidPtr->Disks;
403
404 /* assume the parity will be fine.. */
405 parity_good = RF_RAID_CLEAN;
406
407 /* Check for mod_counters that are too low */
408 mod_counter_found = 0;
409 ac = auto_config;
410 while(ac!=NULL) {
411 if (mod_counter_found==0) {
412 mod_counter = ac->clabel->mod_counter;
413 mod_counter_found = 1;
414 } else {
415 if (ac->clabel->mod_counter > mod_counter) {
416 mod_counter = ac->clabel->mod_counter;
417 }
418 }
419 ac->flag = 0; /* clear the general purpose flag */
420 ac = ac->next;
421 }
422
423 for (r = 0; r < raidPtr->numRow; r++) {
424 numFailuresThisRow = 0;
425 for (c = 0; c < raidPtr->numCol; c++) {
426 diskPtr = &disks[r][c];
427
428 /* find this row/col in the autoconfig */
429 #if DEBUG
430 printf("Looking for %d,%d in autoconfig\n",r,c);
431 #endif
432 ac = auto_config;
433 while(ac!=NULL) {
434 if (ac->clabel==NULL) {
435 /* big-time bad news. */
436 goto fail;
437 }
438 if ((ac->clabel->row == r) &&
439 (ac->clabel->column == c) &&
440 (ac->clabel->mod_counter == mod_counter)) {
441 /* it's this one... */
442 /* flag it as 'used', so we don't
443 free it later. */
444 ac->flag = 1;
445 #if DEBUG
446 printf("Found: %s at %d,%d\n",
447 ac->devname,r,c);
448 #endif
449
450 break;
451 }
452 ac=ac->next;
453 }
454
455 if (ac==NULL) {
456 /* we didn't find an exact match with a
457 correct mod_counter above... can we
458 find one with an incorrect mod_counter
459 to use instead? (this one, if we find
460 it, will be marked as failed once the
461 set configures)
462 */
463
464 ac = auto_config;
465 while(ac!=NULL) {
466 if (ac->clabel==NULL) {
467 /* big-time bad news. */
468 goto fail;
469 }
470 if ((ac->clabel->row == r) &&
471 (ac->clabel->column == c)) {
472 /* it's this one...
473 flag it as 'used', so we
474 don't free it later. */
475 ac->flag = 1;
476 #if DEBUG
477 printf("Found(low mod_counter): %s at %d,%d\n",
478 ac->devname,r,c);
479 #endif
480
481 break;
482 }
483 ac=ac->next;
484 }
485 }
486
487
488
489 if (ac!=NULL) {
490 /* Found it. Configure it.. */
491 diskPtr->blockSize = ac->clabel->blockSize;
492 diskPtr->numBlocks = ac->clabel->numBlocks;
493 /* Note: rf_protectedSectors is already
494 factored into numBlocks here */
495 raidPtr->raid_cinfo[r][c].ci_vp = ac->vp;
496 raidPtr->raid_cinfo[r][c].ci_dev = ac->dev;
497
498 memcpy(&raidPtr->raid_cinfo[r][c].ci_label,
499 ac->clabel, sizeof(*ac->clabel));
500 sprintf(diskPtr->devname, "/dev/%s",
501 ac->devname);
502
503 /* note the fact that this component was
504 autoconfigured. You'll need this info
505 later. Trust me :) */
506 diskPtr->auto_configured = 1;
507 diskPtr->dev = ac->dev;
508
509 /*
510 * we allow the user to specify that
511 * only a fraction of the disks should
512 * be used this is just for debug: it
513 * speeds up the parity scan
514 */
515
516 diskPtr->numBlocks = diskPtr->numBlocks *
517 rf_sizePercentage / 100;
518
519 /* XXX these will get set multiple times,
520 but since we're autoconfiguring, they'd
521 better be always the same each time!
522 If not, this is the least of your worries */
523
524 bs = diskPtr->blockSize;
525 min_numblks = diskPtr->numBlocks;
526
527 /* this gets done multiple times, but that's
528 fine -- the serial number will be the same
529 for all components, guaranteed */
530 raidPtr->serial_number =
531 ac->clabel->serial_number;
532 /* check the last time the label
533 was modified */
534 if (ac->clabel->mod_counter !=
535 mod_counter) {
536 /* Even though we've filled in all
537 of the above, we don't trust
538 this component since it's
539 modification counter is not
540 in sync with the rest, and we really
541 consider it to be failed. */
542 disks[r][c].status = rf_ds_failed;
543 numFailuresThisRow++;
544 } else {
545 if (ac->clabel->clean !=
546 RF_RAID_CLEAN) {
547 parity_good = RF_RAID_DIRTY;
548 }
549 }
550 } else {
551 /* Didn't find it at all!!
552 Component must really be dead */
553 disks[r][c].status = rf_ds_failed;
554 sprintf(disks[r][c].devname,"component%d",
555 r * raidPtr->numCol + c);
556 numFailuresThisRow++;
557 }
558 }
559 /* XXX fix for n-fault tolerant */
560 /* XXX this should probably check to see how many failures
561 we can handle for this configuration! */
562 if (numFailuresThisRow > 0)
563 raidPtr->status[r] = rf_rs_degraded;
564 }
565
566 /* close the device for the ones that didn't get used */
567
568 ac = auto_config;
569 while(ac!=NULL) {
570 if (ac->flag == 0) {
571 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
572 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
573 vput(ac->vp);
574 ac->vp = NULL;
575 #if DEBUG
576 printf("Released %s from auto-config set.\n",
577 ac->devname);
578 #endif
579 }
580 ac = ac->next;
581 }
582
583 raidPtr->mod_counter = mod_counter;
584
585 /* note the state of the parity, if any */
586 raidPtr->parity_good = parity_good;
587 raidPtr->sectorsPerDisk = min_numblks;
588 raidPtr->logBytesPerSector = ffs(bs) - 1;
589 raidPtr->bytesPerSector = bs;
590 raidPtr->sectorMask = bs - 1;
591 return (0);
592
593 fail:
594
595 rf_UnconfigureVnodes( raidPtr );
596
597 return (ret);
598
599 }
600
601 /* configure a single disk in the array */
602 int
603 rf_ConfigureDisk(raidPtr, buf, diskPtr, row, col)
604 RF_Raid_t *raidPtr;
605 char *buf;
606 RF_RaidDisk_t *diskPtr;
607 RF_RowCol_t row;
608 RF_RowCol_t col;
609 {
610 char *p;
611 int retcode;
612
613 struct partinfo dpart;
614 struct vnode *vp;
615 struct vattr va;
616 struct proc *proc;
617 int error;
618
619 retcode = 0;
620 p = rf_find_non_white(buf);
621 if (p[strlen(p) - 1] == '\n') {
622 /* strip off the newline */
623 p[strlen(p) - 1] = '\0';
624 }
625 (void) strcpy(diskPtr->devname, p);
626
627 proc = raidPtr->engine_thread;
628
629 /* Let's start by claiming the component is fine and well... */
630 diskPtr->status = rf_ds_optimal;
631
632 raidPtr->raid_cinfo[row][col].ci_vp = NULL;
633 raidPtr->raid_cinfo[row][col].ci_dev = NULL;
634
635 error = raidlookup(diskPtr->devname, proc, &vp);
636 if (error) {
637 printf("raidlookup on device: %s failed!\n", diskPtr->devname);
638 if (error == ENXIO) {
639 /* the component isn't there... must be dead :-( */
640 diskPtr->status = rf_ds_failed;
641 } else {
642 return (error);
643 }
644 }
645 if (diskPtr->status == rf_ds_optimal) {
646
647 if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
648 return (error);
649 }
650 error = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart,
651 FREAD, proc->p_ucred, proc);
652 if (error) {
653 return (error);
654 }
655
656 diskPtr->blockSize = dpart.disklab->d_secsize;
657
658 diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
659 diskPtr->partitionSize = dpart.part->p_size;
660
661 raidPtr->raid_cinfo[row][col].ci_vp = vp;
662 raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev;
663
664 /* This component was not automatically configured */
665 diskPtr->auto_configured = 0;
666 diskPtr->dev = va.va_rdev;
667
668 /* we allow the user to specify that only a fraction of the
669 * disks should be used this is just for debug: it speeds up
670 * the parity scan */
671 diskPtr->numBlocks = diskPtr->numBlocks *
672 rf_sizePercentage / 100;
673 }
674 return (0);
675 }
676
677 static void
678 rf_print_label_status( raidPtr, row, column, dev_name, ci_label )
679 RF_Raid_t *raidPtr;
680 int row;
681 int column;
682 char *dev_name;
683 RF_ComponentLabel_t *ci_label;
684 {
685
686 printf("raid%d: Component %s being configured at row: %d col: %d\n",
687 raidPtr->raidid, dev_name, row, column );
688 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
689 ci_label->row, ci_label->column,
690 ci_label->num_rows, ci_label->num_columns);
691 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
692 ci_label->version, ci_label->serial_number,
693 ci_label->mod_counter);
694 printf(" Clean: %s Status: %d\n",
695 ci_label->clean ? "Yes" : "No", ci_label->status );
696 }
697
698 static int rf_check_label_vitals( raidPtr, row, column, dev_name, ci_label,
699 serial_number, mod_counter )
700 RF_Raid_t *raidPtr;
701 int row;
702 int column;
703 char *dev_name;
704 RF_ComponentLabel_t *ci_label;
705 int serial_number;
706 int mod_counter;
707 {
708 int fatal_error = 0;
709
710 if (serial_number != ci_label->serial_number) {
711 printf("%s has a different serial number: %d %d\n",
712 dev_name, serial_number, ci_label->serial_number);
713 fatal_error = 1;
714 }
715 if (mod_counter != ci_label->mod_counter) {
716 printf("%s has a different modfication count: %d %d\n",
717 dev_name, mod_counter, ci_label->mod_counter);
718 }
719
720 if (row != ci_label->row) {
721 printf("Row out of alignment for: %s\n", dev_name);
722 fatal_error = 1;
723 }
724 if (column != ci_label->column) {
725 printf("Column out of alignment for: %s\n", dev_name);
726 fatal_error = 1;
727 }
728 if (raidPtr->numRow != ci_label->num_rows) {
729 printf("Number of rows do not match for: %s\n", dev_name);
730 fatal_error = 1;
731 }
732 if (raidPtr->numCol != ci_label->num_columns) {
733 printf("Number of columns do not match for: %s\n", dev_name);
734 fatal_error = 1;
735 }
736 if (ci_label->clean == 0) {
737 /* it's not clean, but that's not fatal */
738 printf("%s is not clean!\n", dev_name);
739 }
740 return(fatal_error);
741 }
742
743
744 /*
745
746 rf_CheckLabels() - check all the component labels for consistency.
747 Return an error if there is anything major amiss.
748
749 */
750
751 int
752 rf_CheckLabels( raidPtr, cfgPtr )
753 RF_Raid_t *raidPtr;
754 RF_Config_t *cfgPtr;
755 {
756 int r,c;
757 char *dev_name;
758 RF_ComponentLabel_t *ci_label;
759 int serial_number = 0;
760 int mod_number = 0;
761 int fatal_error = 0;
762 int mod_values[4];
763 int mod_count[4];
764 int ser_values[4];
765 int ser_count[4];
766 int num_ser;
767 int num_mod;
768 int i;
769 int found;
770 int hosed_row;
771 int hosed_column;
772 int too_fatal;
773 int parity_good;
774 int force;
775
776 hosed_row = -1;
777 hosed_column = -1;
778 too_fatal = 0;
779 force = cfgPtr->force;
780
781 /*
782 We're going to try to be a little intelligent here. If one
783 component's label is bogus, and we can identify that it's the
784 *only* one that's gone, we'll mark it as "failed" and allow
785 the configuration to proceed. This will be the *only* case
786 that we'll proceed if there would be (otherwise) fatal errors.
787
788 Basically we simply keep a count of how many components had
789 what serial number. If all but one agree, we simply mark
790 the disagreeing component as being failed, and allow
791 things to come up "normally".
792
793 We do this first for serial numbers, and then for "mod_counter".
794
795 */
796
797 num_ser = 0;
798 num_mod = 0;
799 for (r = 0; r < raidPtr->numRow && !fatal_error ; r++) {
800 for (c = 0; c < raidPtr->numCol; c++) {
801 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
802 found=0;
803 for(i=0;i<num_ser;i++) {
804 if (ser_values[i] == ci_label->serial_number) {
805 ser_count[i]++;
806 found=1;
807 break;
808 }
809 }
810 if (!found) {
811 ser_values[num_ser] = ci_label->serial_number;
812 ser_count[num_ser] = 1;
813 num_ser++;
814 if (num_ser>2) {
815 fatal_error = 1;
816 break;
817 }
818 }
819 found=0;
820 for(i=0;i<num_mod;i++) {
821 if (mod_values[i] == ci_label->mod_counter) {
822 mod_count[i]++;
823 found=1;
824 break;
825 }
826 }
827 if (!found) {
828 mod_values[num_mod] = ci_label->mod_counter;
829 mod_count[num_mod] = 1;
830 num_mod++;
831 if (num_mod>2) {
832 fatal_error = 1;
833 break;
834 }
835 }
836 }
837 }
838 #if DEBUG
839 printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
840 for(i=0;i<num_ser;i++) {
841 printf("%d %d\n", ser_values[i], ser_count[i]);
842 }
843 printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
844 for(i=0;i<num_mod;i++) {
845 printf("%d %d\n", mod_values[i], mod_count[i]);
846 }
847 #endif
848 serial_number = ser_values[0];
849 if (num_ser == 2) {
850 if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
851 /* Locate the maverick component */
852 if (ser_count[1] > ser_count[0]) {
853 serial_number = ser_values[1];
854 }
855 for (r = 0; r < raidPtr->numRow; r++) {
856 for (c = 0; c < raidPtr->numCol; c++) {
857 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
858 if (serial_number !=
859 ci_label->serial_number) {
860 hosed_row = r;
861 hosed_column = c;
862 break;
863 }
864 }
865 }
866 printf("Hosed component: %s\n",
867 &cfgPtr->devnames[hosed_row][hosed_column][0]);
868 if (!force) {
869 /* we'll fail this component, as if there are
870 other major errors, we arn't forcing things
871 and we'll abort the config anyways */
872 raidPtr->Disks[hosed_row][hosed_column].status
873 = rf_ds_failed;
874 raidPtr->numFailures++;
875 raidPtr->status[hosed_row] = rf_rs_degraded;
876 }
877 } else {
878 too_fatal = 1;
879 }
880 if (cfgPtr->parityConfig == '0') {
881 /* We've identified two different serial numbers.
882 RAID 0 can't cope with that, so we'll punt */
883 too_fatal = 1;
884 }
885
886 }
887
888 /* record the serial number for later. If we bail later, setting
889 this doesn't matter, otherwise we've got the best guess at the
890 correct serial number */
891 raidPtr->serial_number = serial_number;
892
893 mod_number = mod_values[0];
894 if (num_mod == 2) {
895 if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
896 /* Locate the maverick component */
897 if (mod_count[1] > mod_count[0]) {
898 mod_number = mod_values[1];
899 } else if (mod_count[1] < mod_count[0]) {
900 mod_number = mod_values[0];
901 } else {
902 /* counts of different modification values
903 are the same. Assume greater value is
904 the correct one, all other things
905 considered */
906 if (mod_values[0] > mod_values[1]) {
907 mod_number = mod_values[0];
908 } else {
909 mod_number = mod_values[1];
910 }
911
912 }
913 for (r = 0; r < raidPtr->numRow && !too_fatal ; r++) {
914 for (c = 0; c < raidPtr->numCol; c++) {
915 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
916 if (mod_number !=
917 ci_label->mod_counter) {
918 if ( ( hosed_row == r ) &&
919 ( hosed_column == c )) {
920 /* same one. Can
921 deal with it. */
922 } else {
923 hosed_row = r;
924 hosed_column = c;
925 if (num_ser != 1) {
926 too_fatal = 1;
927 break;
928 }
929 }
930 }
931 }
932 }
933 printf("Hosed component: %s\n",
934 &cfgPtr->devnames[hosed_row][hosed_column][0]);
935 if (!force) {
936 /* we'll fail this component, as if there are
937 other major errors, we arn't forcing things
938 and we'll abort the config anyways */
939 if (raidPtr->Disks[hosed_row][hosed_column].status != rf_ds_failed) {
940 raidPtr->Disks[hosed_row][hosed_column].status
941 = rf_ds_failed;
942 raidPtr->numFailures++;
943 raidPtr->status[hosed_row] = rf_rs_degraded;
944 }
945 }
946 } else {
947 too_fatal = 1;
948 }
949 if (cfgPtr->parityConfig == '0') {
950 /* We've identified two different mod counters.
951 RAID 0 can't cope with that, so we'll punt */
952 too_fatal = 1;
953 }
954 }
955
956 raidPtr->mod_counter = mod_number;
957
958 if (too_fatal) {
959 /* we've had both a serial number mismatch, and a mod_counter
960 mismatch -- and they involved two different components!!
961 Bail -- make things fail so that the user must force
962 the issue... */
963 hosed_row = -1;
964 hosed_column = -1;
965 }
966
967 if (num_ser > 2) {
968 printf("raid%d: Too many different serial numbers!\n",
969 raidPtr->raidid);
970 }
971
972 if (num_mod > 2) {
973 printf("raid%d: Too many different mod counters!\n",
974 raidPtr->raidid);
975 }
976
977 /* we start by assuming the parity will be good, and flee from
978 that notion at the slightest sign of trouble */
979
980 parity_good = RF_RAID_CLEAN;
981 for (r = 0; r < raidPtr->numRow; r++) {
982 for (c = 0; c < raidPtr->numCol; c++) {
983 dev_name = &cfgPtr->devnames[r][c][0];
984 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
985
986 if ((r == hosed_row) && (c == hosed_column)) {
987 printf("raid%d: Ignoring %s\n",
988 raidPtr->raidid, dev_name);
989 } else {
990 rf_print_label_status( raidPtr, r, c,
991 dev_name, ci_label );
992 if (rf_check_label_vitals( raidPtr, r, c,
993 dev_name, ci_label,
994 serial_number,
995 mod_number )) {
996 fatal_error = 1;
997 }
998 if (ci_label->clean != RF_RAID_CLEAN) {
999 parity_good = RF_RAID_DIRTY;
1000 }
1001 }
1002 }
1003 }
1004 if (fatal_error) {
1005 parity_good = RF_RAID_DIRTY;
1006 }
1007
1008 /* we note the state of the parity */
1009 raidPtr->parity_good = parity_good;
1010
1011 return(fatal_error);
1012 }
1013
1014 int
1015 rf_add_hot_spare(raidPtr, sparePtr)
1016 RF_Raid_t *raidPtr;
1017 RF_SingleComponent_t *sparePtr;
1018 {
1019 RF_RaidDisk_t *disks;
1020 RF_DiskQueue_t *spareQueues;
1021 int ret;
1022 unsigned int bs;
1023 int spare_number;
1024
1025 #if 0
1026 printf("Just in rf_add_hot_spare: %d\n",raidPtr->numSpare);
1027 printf("Num col: %d\n",raidPtr->numCol);
1028 #endif
1029 if (raidPtr->numSpare >= RF_MAXSPARE) {
1030 RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
1031 return(EINVAL);
1032 }
1033
1034 RF_LOCK_MUTEX(raidPtr->mutex);
1035
1036 /* the beginning of the spares... */
1037 disks = &raidPtr->Disks[0][raidPtr->numCol];
1038
1039 spare_number = raidPtr->numSpare;
1040
1041 ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
1042 &disks[spare_number], 0,
1043 raidPtr->numCol + spare_number);
1044
1045 if (ret)
1046 goto fail;
1047 if (disks[spare_number].status != rf_ds_optimal) {
1048 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
1049 sparePtr->component_name);
1050 ret=EINVAL;
1051 goto fail;
1052 } else {
1053 disks[spare_number].status = rf_ds_spare;
1054 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", spare_number,
1055 disks[spare_number].devname,
1056 (long int) disks[spare_number].numBlocks,
1057 disks[spare_number].blockSize,
1058 (long int) disks[spare_number].numBlocks *
1059 disks[spare_number].blockSize / 1024 / 1024);
1060 }
1061
1062
1063 /* check sizes and block sizes on the spare disk */
1064 bs = 1 << raidPtr->logBytesPerSector;
1065 if (disks[spare_number].blockSize != bs) {
1066 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
1067 ret = EINVAL;
1068 goto fail;
1069 }
1070 if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
1071 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
1072 disks[spare_number].devname,
1073 disks[spare_number].blockSize,
1074 (long int) raidPtr->sectorsPerDisk);
1075 ret = EINVAL;
1076 goto fail;
1077 } else {
1078 if (disks[spare_number].numBlocks >
1079 raidPtr->sectorsPerDisk) {
1080 RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[spare_number].devname,
1081 (long int) raidPtr->sectorsPerDisk);
1082
1083 disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
1084 }
1085 }
1086
1087 spareQueues = &raidPtr->Queues[0][raidPtr->numCol];
1088 ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
1089 0, raidPtr->numCol + spare_number,
1090 raidPtr->qType,
1091 raidPtr->sectorsPerDisk,
1092 raidPtr->Disks[0][raidPtr->numCol +
1093 spare_number].dev,
1094 raidPtr->maxOutstanding,
1095 &raidPtr->shutdownList,
1096 raidPtr->cleanupList);
1097
1098
1099 raidPtr->numSpare++;
1100 RF_UNLOCK_MUTEX(raidPtr->mutex);
1101 return (0);
1102
1103 fail:
1104 RF_UNLOCK_MUTEX(raidPtr->mutex);
1105 return(ret);
1106 }
1107
1108 int
1109 rf_remove_hot_spare(raidPtr,sparePtr)
1110 RF_Raid_t *raidPtr;
1111 RF_SingleComponent_t *sparePtr;
1112 {
1113 int spare_number;
1114
1115
1116 if (raidPtr->numSpare==0) {
1117 printf("No spares to remove!\n");
1118 return(EINVAL);
1119 }
1120
1121 spare_number = sparePtr->column;
1122
1123 return(EINVAL); /* XXX not implemented yet */
1124 #if 0
1125 if (spare_number < 0 || spare_number > raidPtr->numSpare) {
1126 return(EINVAL);
1127 }
1128
1129 /* verify that this spare isn't in use... */
1130
1131
1132
1133
1134 /* it's gone.. */
1135
1136 raidPtr->numSpare--;
1137
1138 return(0);
1139 #endif
1140 }
1141
1142
1143 int
1144 rf_delete_component(raidPtr,component)
1145 RF_Raid_t *raidPtr;
1146 RF_SingleComponent_t *component;
1147 {
1148 RF_RaidDisk_t *disks;
1149
1150 if ((component->row < 0) ||
1151 (component->row >= raidPtr->numRow) ||
1152 (component->column < 0) ||
1153 (component->column >= raidPtr->numCol)) {
1154 return(EINVAL);
1155 }
1156
1157 disks = &raidPtr->Disks[component->row][component->column];
1158
1159 /* 1. This component must be marked as 'failed' */
1160
1161 return(EINVAL); /* Not implemented yet. */
1162 }
1163
1164 int
1165 rf_incorporate_hot_spare(raidPtr,component)
1166 RF_Raid_t *raidPtr;
1167 RF_SingleComponent_t *component;
1168 {
1169
1170 /* Issues here include how to 'move' this in if there is IO
1171 taking place (e.g. component queues and such) */
1172
1173 return(EINVAL); /* Not implemented yet. */
1174 }
1175