rf_disks.c revision 1.43 1 /* $NetBSD: rf_disks.c,v 1.43 2003/06/28 14:21:42 darrenr Exp $ */
2 /*-
3 * Copyright (c) 1999 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1995 Carnegie-Mellon University.
40 * All rights reserved.
41 *
42 * Author: Mark Holland
43 *
44 * Permission to use, copy, modify and distribute this software and
45 * its documentation is hereby granted, provided that both the copyright
46 * notice and this permission notice appear in all copies of the
47 * software, derivative works or modified versions, and any portions
48 * thereof, and that both notices appear in supporting documentation.
49 *
50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53 *
54 * Carnegie Mellon requests users of this software to return to
55 *
56 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
57 * School of Computer Science
58 * Carnegie Mellon University
59 * Pittsburgh PA 15213-3890
60 *
61 * any improvements or extensions that they make and grant Carnegie the
62 * rights to redistribute these changes.
63 */
64
65 /***************************************************************
66 * rf_disks.c -- code to perform operations on the actual disks
67 ***************************************************************/
68
69 #include <sys/cdefs.h>
70 __KERNEL_RCSID(0, "$NetBSD: rf_disks.c,v 1.43 2003/06/28 14:21:42 darrenr Exp $");
71
72 #include <dev/raidframe/raidframevar.h>
73
74 #include "rf_raid.h"
75 #include "rf_alloclist.h"
76 #include "rf_utils.h"
77 #include "rf_general.h"
78 #include "rf_options.h"
79 #include "rf_kintf.h"
80 #include "rf_netbsd.h"
81
82 #include <sys/param.h>
83 #include <sys/systm.h>
84 #include <sys/proc.h>
85 #include <sys/ioctl.h>
86 #include <sys/fcntl.h>
87 #include <sys/vnode.h>
88
89 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
90 static void rf_print_label_status( RF_Raid_t *, int, int, char *,
91 RF_ComponentLabel_t *);
92 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
93 RF_ComponentLabel_t *, int, int );
94
95 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
96 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
97
98 /**************************************************************************
99 *
100 * initialize the disks comprising the array
101 *
102 * We want the spare disks to have regular row,col numbers so that we can
103 * easily substitue a spare for a failed disk. But, the driver code assumes
104 * throughout that the array contains numRow by numCol _non-spare_ disks, so
105 * it's not clear how to fit in the spares. This is an unfortunate holdover
106 * from raidSim. The quick and dirty fix is to make row zero bigger than the
107 * rest, and put all the spares in it. This probably needs to get changed
108 * eventually.
109 *
110 **************************************************************************/
111
112 int
113 rf_ConfigureDisks( listp, raidPtr, cfgPtr )
114 RF_ShutdownList_t **listp;
115 RF_Raid_t *raidPtr;
116 RF_Config_t *cfgPtr;
117 {
118 RF_RaidDisk_t **disks;
119 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
120 RF_RowCol_t r, c;
121 int bs, ret;
122 unsigned i, count, foundone = 0, numFailuresThisRow;
123 int force;
124
125 force = cfgPtr->force;
126
127 ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
128 if (ret)
129 goto fail;
130
131 disks = raidPtr->Disks;
132
133 for (r = 0; r < raidPtr->numRow; r++) {
134 numFailuresThisRow = 0;
135 for (c = 0; c < raidPtr->numCol; c++) {
136 ret = rf_ConfigureDisk(raidPtr,
137 &cfgPtr->devnames[r][c][0],
138 &disks[r][c], r, c);
139
140 if (ret)
141 goto fail;
142
143 if (disks[r][c].status == rf_ds_optimal) {
144 raidread_component_label(
145 raidPtr->raid_cinfo[r][c].ci_dev,
146 raidPtr->raid_cinfo[r][c].ci_vp,
147 &raidPtr->raid_cinfo[r][c].ci_label);
148 }
149
150 if (disks[r][c].status != rf_ds_optimal) {
151 numFailuresThisRow++;
152 } else {
153 if (disks[r][c].numBlocks < min_numblks)
154 min_numblks = disks[r][c].numBlocks;
155 DPRINTF7("Disk at row %d col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n",
156 r, c, disks[r][c].devname,
157 (long int) disks[r][c].numBlocks,
158 disks[r][c].blockSize,
159 (long int) disks[r][c].numBlocks *
160 disks[r][c].blockSize / 1024 / 1024);
161 }
162 }
163 /* XXX fix for n-fault tolerant */
164 /* XXX this should probably check to see how many failures
165 we can handle for this configuration! */
166 if (numFailuresThisRow > 0)
167 raidPtr->status[r] = rf_rs_degraded;
168 }
169
170 /* all disks must be the same size & have the same block size, bs must
171 * be a power of 2 */
172 bs = 0;
173 for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) {
174 for (c = 0; !foundone && c < raidPtr->numCol; c++) {
175 if (disks[r][c].status == rf_ds_optimal) {
176 bs = disks[r][c].blockSize;
177 foundone = 1;
178 }
179 }
180 }
181 if (!foundone) {
182 RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
183 ret = EINVAL;
184 goto fail;
185 }
186 for (count = 0, i = 1; i; i <<= 1)
187 if (bs & i)
188 count++;
189 if (count != 1) {
190 RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
191 ret = EINVAL;
192 goto fail;
193 }
194
195 if (rf_CheckLabels( raidPtr, cfgPtr )) {
196 printf("raid%d: There were fatal errors\n", raidPtr->raidid);
197 if (force != 0) {
198 printf("raid%d: Fatal errors being ignored.\n",
199 raidPtr->raidid);
200 } else {
201 ret = EINVAL;
202 goto fail;
203 }
204 }
205
206 for (r = 0; r < raidPtr->numRow; r++) {
207 for (c = 0; c < raidPtr->numCol; c++) {
208 if (disks[r][c].status == rf_ds_optimal) {
209 if (disks[r][c].blockSize != bs) {
210 RF_ERRORMSG2("Error: block size of disk at r %d c %d different from disk at r 0 c 0\n", r, c);
211 ret = EINVAL;
212 goto fail;
213 }
214 if (disks[r][c].numBlocks != min_numblks) {
215 RF_ERRORMSG3("WARNING: truncating disk at r %d c %d to %d blocks\n",
216 r, c, (int) min_numblks);
217 disks[r][c].numBlocks = min_numblks;
218 }
219 }
220 }
221 }
222
223 raidPtr->sectorsPerDisk = min_numblks;
224 raidPtr->logBytesPerSector = ffs(bs) - 1;
225 raidPtr->bytesPerSector = bs;
226 raidPtr->sectorMask = bs - 1;
227 return (0);
228
229 fail:
230
231 rf_UnconfigureVnodes( raidPtr );
232
233 return (ret);
234 }
235
236
237 /****************************************************************************
238 * set up the data structures describing the spare disks in the array
239 * recall from the above comment that the spare disk descriptors are stored
240 * in row zero, which is specially expanded to hold them.
241 ****************************************************************************/
242 int
243 rf_ConfigureSpareDisks( listp, raidPtr, cfgPtr )
244 RF_ShutdownList_t ** listp;
245 RF_Raid_t * raidPtr;
246 RF_Config_t * cfgPtr;
247 {
248 int i, ret;
249 unsigned int bs;
250 RF_RaidDisk_t *disks;
251 int num_spares_done;
252
253 num_spares_done = 0;
254
255 /* The space for the spares should have already been allocated by
256 * ConfigureDisks() */
257
258 disks = &raidPtr->Disks[0][raidPtr->numCol];
259 for (i = 0; i < raidPtr->numSpare; i++) {
260 ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
261 &disks[i], 0, raidPtr->numCol + i);
262 if (ret)
263 goto fail;
264 if (disks[i].status != rf_ds_optimal) {
265 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
266 &cfgPtr->spare_names[i][0]);
267 } else {
268 disks[i].status = rf_ds_spare; /* change status to
269 * spare */
270 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", i,
271 disks[i].devname,
272 (long int) disks[i].numBlocks, disks[i].blockSize,
273 (long int) disks[i].numBlocks *
274 disks[i].blockSize / 1024 / 1024);
275 }
276 num_spares_done++;
277 }
278
279 /* check sizes and block sizes on spare disks */
280 bs = 1 << raidPtr->logBytesPerSector;
281 for (i = 0; i < raidPtr->numSpare; i++) {
282 if (disks[i].blockSize != bs) {
283 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
284 ret = EINVAL;
285 goto fail;
286 }
287 if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
288 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
289 disks[i].devname, disks[i].blockSize,
290 (long int) raidPtr->sectorsPerDisk);
291 ret = EINVAL;
292 goto fail;
293 } else
294 if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
295 RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[i].devname, (long int) raidPtr->sectorsPerDisk);
296
297 disks[i].numBlocks = raidPtr->sectorsPerDisk;
298 }
299 }
300
301 return (0);
302
303 fail:
304
305 /* Release the hold on the main components. We've failed to allocate
306 * a spare, and since we're failing, we need to free things..
307
308 XXX failing to allocate a spare is *not* that big of a deal...
309 We *can* survive without it, if need be, esp. if we get hot
310 adding working.
311
312 If we don't fail out here, then we need a way to remove this spare...
313 that should be easier to do here than if we are "live"...
314
315 */
316
317 rf_UnconfigureVnodes( raidPtr );
318
319 return (ret);
320 }
321
322 static int
323 rf_AllocDiskStructures(raidPtr, cfgPtr)
324 RF_Raid_t *raidPtr;
325 RF_Config_t *cfgPtr;
326 {
327 RF_RaidDisk_t **disks;
328 int ret;
329 int r;
330
331 RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *),
332 (RF_RaidDisk_t **), raidPtr->cleanupList);
333 if (disks == NULL) {
334 ret = ENOMEM;
335 goto fail;
336 }
337 raidPtr->Disks = disks;
338 /* get space for the device-specific stuff... */
339 RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
340 sizeof(struct raidcinfo *), (struct raidcinfo **),
341 raidPtr->cleanupList);
342 if (raidPtr->raid_cinfo == NULL) {
343 ret = ENOMEM;
344 goto fail;
345 }
346
347 for (r = 0; r < raidPtr->numRow; r++) {
348 /* We allocate RF_MAXSPARE on the first row so that we
349 have room to do hot-swapping of spares */
350 RF_CallocAndAdd(disks[r], raidPtr->numCol
351 + ((r == 0) ? RF_MAXSPARE : 0),
352 sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
353 raidPtr->cleanupList);
354 if (disks[r] == NULL) {
355 ret = ENOMEM;
356 goto fail;
357 }
358 /* get more space for device specific stuff.. */
359 RF_CallocAndAdd(raidPtr->raid_cinfo[r],
360 raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0),
361 sizeof(struct raidcinfo), (struct raidcinfo *),
362 raidPtr->cleanupList);
363 if (raidPtr->raid_cinfo[r] == NULL) {
364 ret = ENOMEM;
365 goto fail;
366 }
367 }
368 return(0);
369 fail:
370 rf_UnconfigureVnodes( raidPtr );
371
372 return(ret);
373 }
374
375
376 /* configure a single disk during auto-configuration at boot */
377 int
378 rf_AutoConfigureDisks(raidPtr, cfgPtr, auto_config)
379 RF_Raid_t *raidPtr;
380 RF_Config_t *cfgPtr;
381 RF_AutoConfig_t *auto_config;
382 {
383 RF_RaidDisk_t **disks;
384 RF_RaidDisk_t *diskPtr;
385 RF_RowCol_t r, c;
386 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
387 int bs, ret;
388 int numFailuresThisRow;
389 RF_AutoConfig_t *ac;
390 int parity_good;
391 int mod_counter;
392 int mod_counter_found;
393
394 #if DEBUG
395 printf("Starting autoconfiguration of RAID set...\n");
396 #endif
397
398 ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
399 if (ret)
400 goto fail;
401
402 disks = raidPtr->Disks;
403
404 /* assume the parity will be fine.. */
405 parity_good = RF_RAID_CLEAN;
406
407 /* Check for mod_counters that are too low */
408 mod_counter_found = 0;
409 mod_counter = 0;
410 ac = auto_config;
411 while(ac!=NULL) {
412 if (mod_counter_found==0) {
413 mod_counter = ac->clabel->mod_counter;
414 mod_counter_found = 1;
415 } else {
416 if (ac->clabel->mod_counter > mod_counter) {
417 mod_counter = ac->clabel->mod_counter;
418 }
419 }
420 ac->flag = 0; /* clear the general purpose flag */
421 ac = ac->next;
422 }
423
424 bs = 0;
425 for (r = 0; r < raidPtr->numRow; r++) {
426 numFailuresThisRow = 0;
427 for (c = 0; c < raidPtr->numCol; c++) {
428 diskPtr = &disks[r][c];
429
430 /* find this row/col in the autoconfig */
431 #if DEBUG
432 printf("Looking for %d,%d in autoconfig\n",r,c);
433 #endif
434 ac = auto_config;
435 while(ac!=NULL) {
436 if (ac->clabel==NULL) {
437 /* big-time bad news. */
438 goto fail;
439 }
440 if ((ac->clabel->row == r) &&
441 (ac->clabel->column == c) &&
442 (ac->clabel->mod_counter == mod_counter)) {
443 /* it's this one... */
444 /* flag it as 'used', so we don't
445 free it later. */
446 ac->flag = 1;
447 #if DEBUG
448 printf("Found: %s at %d,%d\n",
449 ac->devname,r,c);
450 #endif
451
452 break;
453 }
454 ac=ac->next;
455 }
456
457 if (ac==NULL) {
458 /* we didn't find an exact match with a
459 correct mod_counter above... can we
460 find one with an incorrect mod_counter
461 to use instead? (this one, if we find
462 it, will be marked as failed once the
463 set configures)
464 */
465
466 ac = auto_config;
467 while(ac!=NULL) {
468 if (ac->clabel==NULL) {
469 /* big-time bad news. */
470 goto fail;
471 }
472 if ((ac->clabel->row == r) &&
473 (ac->clabel->column == c)) {
474 /* it's this one...
475 flag it as 'used', so we
476 don't free it later. */
477 ac->flag = 1;
478 #if DEBUG
479 printf("Found(low mod_counter): %s at %d,%d\n",
480 ac->devname,r,c);
481 #endif
482
483 break;
484 }
485 ac=ac->next;
486 }
487 }
488
489
490
491 if (ac!=NULL) {
492 /* Found it. Configure it.. */
493 diskPtr->blockSize = ac->clabel->blockSize;
494 diskPtr->numBlocks = ac->clabel->numBlocks;
495 /* Note: rf_protectedSectors is already
496 factored into numBlocks here */
497 raidPtr->raid_cinfo[r][c].ci_vp = ac->vp;
498 raidPtr->raid_cinfo[r][c].ci_dev = ac->dev;
499
500 memcpy(&raidPtr->raid_cinfo[r][c].ci_label,
501 ac->clabel, sizeof(*ac->clabel));
502 sprintf(diskPtr->devname, "/dev/%s",
503 ac->devname);
504
505 /* note the fact that this component was
506 autoconfigured. You'll need this info
507 later. Trust me :) */
508 diskPtr->auto_configured = 1;
509 diskPtr->dev = ac->dev;
510
511 /*
512 * we allow the user to specify that
513 * only a fraction of the disks should
514 * be used this is just for debug: it
515 * speeds up the parity scan
516 */
517
518 diskPtr->numBlocks = diskPtr->numBlocks *
519 rf_sizePercentage / 100;
520
521 /* XXX these will get set multiple times,
522 but since we're autoconfiguring, they'd
523 better be always the same each time!
524 If not, this is the least of your worries */
525
526 bs = diskPtr->blockSize;
527 min_numblks = diskPtr->numBlocks;
528
529 /* this gets done multiple times, but that's
530 fine -- the serial number will be the same
531 for all components, guaranteed */
532 raidPtr->serial_number =
533 ac->clabel->serial_number;
534 /* check the last time the label
535 was modified */
536 if (ac->clabel->mod_counter !=
537 mod_counter) {
538 /* Even though we've filled in all
539 of the above, we don't trust
540 this component since it's
541 modification counter is not
542 in sync with the rest, and we really
543 consider it to be failed. */
544 disks[r][c].status = rf_ds_failed;
545 numFailuresThisRow++;
546 } else {
547 if (ac->clabel->clean !=
548 RF_RAID_CLEAN) {
549 parity_good = RF_RAID_DIRTY;
550 }
551 }
552 } else {
553 /* Didn't find it at all!!
554 Component must really be dead */
555 disks[r][c].status = rf_ds_failed;
556 sprintf(disks[r][c].devname,"component%d",
557 r * raidPtr->numCol + c);
558 numFailuresThisRow++;
559 }
560 }
561 /* XXX fix for n-fault tolerant */
562 /* XXX this should probably check to see how many failures
563 we can handle for this configuration! */
564 if (numFailuresThisRow > 0)
565 raidPtr->status[r] = rf_rs_degraded;
566 }
567
568 /* close the device for the ones that didn't get used */
569
570 ac = auto_config;
571 while(ac!=NULL) {
572 if (ac->flag == 0) {
573 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
574 VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED, 0);
575 vput(ac->vp);
576 ac->vp = NULL;
577 #if DEBUG
578 printf("Released %s from auto-config set.\n",
579 ac->devname);
580 #endif
581 }
582 ac = ac->next;
583 }
584
585 raidPtr->mod_counter = mod_counter;
586
587 /* note the state of the parity, if any */
588 raidPtr->parity_good = parity_good;
589 raidPtr->sectorsPerDisk = min_numblks;
590 raidPtr->logBytesPerSector = ffs(bs) - 1;
591 raidPtr->bytesPerSector = bs;
592 raidPtr->sectorMask = bs - 1;
593 return (0);
594
595 fail:
596
597 rf_UnconfigureVnodes( raidPtr );
598
599 return (ret);
600
601 }
602
603 /* configure a single disk in the array */
604 int
605 rf_ConfigureDisk(raidPtr, buf, diskPtr, row, col)
606 RF_Raid_t *raidPtr;
607 char *buf;
608 RF_RaidDisk_t *diskPtr;
609 RF_RowCol_t row;
610 RF_RowCol_t col;
611 {
612 char *p;
613 struct partinfo dpart;
614 struct vnode *vp;
615 struct vattr va;
616 struct lwp *l;
617 int error;
618
619 p = rf_find_non_white(buf);
620 if (p[strlen(p) - 1] == '\n') {
621 /* strip off the newline */
622 p[strlen(p) - 1] = '\0';
623 }
624 (void) strcpy(diskPtr->devname, p);
625
626 l = LIST_FIRST(&raidPtr->engine_thread->p_lwps);
627
628 /* Let's start by claiming the component is fine and well... */
629 diskPtr->status = rf_ds_optimal;
630
631 raidPtr->raid_cinfo[row][col].ci_vp = NULL;
632 raidPtr->raid_cinfo[row][col].ci_dev = NULL;
633
634 error = raidlookup(diskPtr->devname, l, &vp);
635 if (error) {
636 printf("raidlookup on device: %s failed!\n", diskPtr->devname);
637 if (error == ENXIO) {
638 /* the component isn't there... must be dead :-( */
639 diskPtr->status = rf_ds_failed;
640 } else {
641 return (error);
642 }
643 }
644 if (diskPtr->status == rf_ds_optimal) {
645
646 if ((error = VOP_GETATTR(vp, &va,
647 l->l_proc->p_ucred, l)) != 0) {
648 return (error);
649 }
650 error = VOP_IOCTL(vp, DIOCGPART, &dpart,
651 FREAD, l->l_proc->p_ucred, l);
652 if (error) {
653 return (error);
654 }
655
656 diskPtr->blockSize = dpart.disklab->d_secsize;
657
658 diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
659 diskPtr->partitionSize = dpart.part->p_size;
660
661 raidPtr->raid_cinfo[row][col].ci_vp = vp;
662 raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev;
663
664 /* This component was not automatically configured */
665 diskPtr->auto_configured = 0;
666 diskPtr->dev = va.va_rdev;
667
668 /* we allow the user to specify that only a fraction of the
669 * disks should be used this is just for debug: it speeds up
670 * the parity scan */
671 diskPtr->numBlocks = diskPtr->numBlocks *
672 rf_sizePercentage / 100;
673 }
674 return (0);
675 }
676
677 static void
678 rf_print_label_status( raidPtr, row, column, dev_name, ci_label )
679 RF_Raid_t *raidPtr;
680 int row;
681 int column;
682 char *dev_name;
683 RF_ComponentLabel_t *ci_label;
684 {
685
686 printf("raid%d: Component %s being configured at row: %d col: %d\n",
687 raidPtr->raidid, dev_name, row, column );
688 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
689 ci_label->row, ci_label->column,
690 ci_label->num_rows, ci_label->num_columns);
691 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
692 ci_label->version, ci_label->serial_number,
693 ci_label->mod_counter);
694 printf(" Clean: %s Status: %d\n",
695 ci_label->clean ? "Yes" : "No", ci_label->status );
696 }
697
698 static int rf_check_label_vitals( raidPtr, row, column, dev_name, ci_label,
699 serial_number, mod_counter )
700 RF_Raid_t *raidPtr;
701 int row;
702 int column;
703 char *dev_name;
704 RF_ComponentLabel_t *ci_label;
705 int serial_number;
706 int mod_counter;
707 {
708 int fatal_error = 0;
709
710 if (serial_number != ci_label->serial_number) {
711 printf("%s has a different serial number: %d %d\n",
712 dev_name, serial_number, ci_label->serial_number);
713 fatal_error = 1;
714 }
715 if (mod_counter != ci_label->mod_counter) {
716 printf("%s has a different modfication count: %d %d\n",
717 dev_name, mod_counter, ci_label->mod_counter);
718 }
719
720 if (row != ci_label->row) {
721 printf("Row out of alignment for: %s\n", dev_name);
722 fatal_error = 1;
723 }
724 if (column != ci_label->column) {
725 printf("Column out of alignment for: %s\n", dev_name);
726 fatal_error = 1;
727 }
728 if (raidPtr->numRow != ci_label->num_rows) {
729 printf("Number of rows do not match for: %s\n", dev_name);
730 fatal_error = 1;
731 }
732 if (raidPtr->numCol != ci_label->num_columns) {
733 printf("Number of columns do not match for: %s\n", dev_name);
734 fatal_error = 1;
735 }
736 if (ci_label->clean == 0) {
737 /* it's not clean, but that's not fatal */
738 printf("%s is not clean!\n", dev_name);
739 }
740 return(fatal_error);
741 }
742
743
744 /*
745
746 rf_CheckLabels() - check all the component labels for consistency.
747 Return an error if there is anything major amiss.
748
749 */
750
751 int
752 rf_CheckLabels( raidPtr, cfgPtr )
753 RF_Raid_t *raidPtr;
754 RF_Config_t *cfgPtr;
755 {
756 int r,c;
757 char *dev_name;
758 RF_ComponentLabel_t *ci_label;
759 int serial_number = 0;
760 int mod_number = 0;
761 int fatal_error = 0;
762 int mod_values[4];
763 int mod_count[4];
764 int ser_values[4];
765 int ser_count[4];
766 int num_ser;
767 int num_mod;
768 int i;
769 int found;
770 int hosed_row;
771 int hosed_column;
772 int too_fatal;
773 int parity_good;
774 int force;
775
776 hosed_row = -1;
777 hosed_column = -1;
778 too_fatal = 0;
779 force = cfgPtr->force;
780
781 /*
782 We're going to try to be a little intelligent here. If one
783 component's label is bogus, and we can identify that it's the
784 *only* one that's gone, we'll mark it as "failed" and allow
785 the configuration to proceed. This will be the *only* case
786 that we'll proceed if there would be (otherwise) fatal errors.
787
788 Basically we simply keep a count of how many components had
789 what serial number. If all but one agree, we simply mark
790 the disagreeing component as being failed, and allow
791 things to come up "normally".
792
793 We do this first for serial numbers, and then for "mod_counter".
794
795 */
796
797 num_ser = 0;
798 num_mod = 0;
799 for (r = 0; r < raidPtr->numRow && !fatal_error ; r++) {
800 for (c = 0; c < raidPtr->numCol; c++) {
801 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
802 found=0;
803 for(i=0;i<num_ser;i++) {
804 if (ser_values[i] == ci_label->serial_number) {
805 ser_count[i]++;
806 found=1;
807 break;
808 }
809 }
810 if (!found) {
811 ser_values[num_ser] = ci_label->serial_number;
812 ser_count[num_ser] = 1;
813 num_ser++;
814 if (num_ser>2) {
815 fatal_error = 1;
816 break;
817 }
818 }
819 found=0;
820 for(i=0;i<num_mod;i++) {
821 if (mod_values[i] == ci_label->mod_counter) {
822 mod_count[i]++;
823 found=1;
824 break;
825 }
826 }
827 if (!found) {
828 mod_values[num_mod] = ci_label->mod_counter;
829 mod_count[num_mod] = 1;
830 num_mod++;
831 if (num_mod>2) {
832 fatal_error = 1;
833 break;
834 }
835 }
836 }
837 }
838 #if DEBUG
839 printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
840 for(i=0;i<num_ser;i++) {
841 printf("%d %d\n", ser_values[i], ser_count[i]);
842 }
843 printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
844 for(i=0;i<num_mod;i++) {
845 printf("%d %d\n", mod_values[i], mod_count[i]);
846 }
847 #endif
848 serial_number = ser_values[0];
849 if (num_ser == 2) {
850 if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
851 /* Locate the maverick component */
852 if (ser_count[1] > ser_count[0]) {
853 serial_number = ser_values[1];
854 }
855 for (r = 0; r < raidPtr->numRow; r++) {
856 for (c = 0; c < raidPtr->numCol; c++) {
857 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
858 if (serial_number !=
859 ci_label->serial_number) {
860 hosed_row = r;
861 hosed_column = c;
862 break;
863 }
864 }
865 }
866 printf("Hosed component: %s\n",
867 &cfgPtr->devnames[hosed_row][hosed_column][0]);
868 if (!force) {
869 /* we'll fail this component, as if there are
870 other major errors, we arn't forcing things
871 and we'll abort the config anyways */
872 raidPtr->Disks[hosed_row][hosed_column].status
873 = rf_ds_failed;
874 raidPtr->numFailures++;
875 raidPtr->status[hosed_row] = rf_rs_degraded;
876 }
877 } else {
878 too_fatal = 1;
879 }
880 if (cfgPtr->parityConfig == '0') {
881 /* We've identified two different serial numbers.
882 RAID 0 can't cope with that, so we'll punt */
883 too_fatal = 1;
884 }
885
886 }
887
888 /* record the serial number for later. If we bail later, setting
889 this doesn't matter, otherwise we've got the best guess at the
890 correct serial number */
891 raidPtr->serial_number = serial_number;
892
893 mod_number = mod_values[0];
894 if (num_mod == 2) {
895 if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
896 /* Locate the maverick component */
897 if (mod_count[1] > mod_count[0]) {
898 mod_number = mod_values[1];
899 } else if (mod_count[1] < mod_count[0]) {
900 mod_number = mod_values[0];
901 } else {
902 /* counts of different modification values
903 are the same. Assume greater value is
904 the correct one, all other things
905 considered */
906 if (mod_values[0] > mod_values[1]) {
907 mod_number = mod_values[0];
908 } else {
909 mod_number = mod_values[1];
910 }
911
912 }
913 for (r = 0; r < raidPtr->numRow && !too_fatal ; r++) {
914 for (c = 0; c < raidPtr->numCol; c++) {
915 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
916 if (mod_number !=
917 ci_label->mod_counter) {
918 if ( ( hosed_row == r ) &&
919 ( hosed_column == c )) {
920 /* same one. Can
921 deal with it. */
922 } else {
923 hosed_row = r;
924 hosed_column = c;
925 if (num_ser != 1) {
926 too_fatal = 1;
927 break;
928 }
929 }
930 }
931 }
932 }
933 printf("Hosed component: %s\n",
934 &cfgPtr->devnames[hosed_row][hosed_column][0]);
935 if (!force) {
936 /* we'll fail this component, as if there are
937 other major errors, we arn't forcing things
938 and we'll abort the config anyways */
939 if (raidPtr->Disks[hosed_row][hosed_column].status != rf_ds_failed) {
940 raidPtr->Disks[hosed_row][hosed_column].status
941 = rf_ds_failed;
942 raidPtr->numFailures++;
943 raidPtr->status[hosed_row] = rf_rs_degraded;
944 }
945 }
946 } else {
947 too_fatal = 1;
948 }
949 if (cfgPtr->parityConfig == '0') {
950 /* We've identified two different mod counters.
951 RAID 0 can't cope with that, so we'll punt */
952 too_fatal = 1;
953 }
954 }
955
956 raidPtr->mod_counter = mod_number;
957
958 if (too_fatal) {
959 /* we've had both a serial number mismatch, and a mod_counter
960 mismatch -- and they involved two different components!!
961 Bail -- make things fail so that the user must force
962 the issue... */
963 hosed_row = -1;
964 hosed_column = -1;
965 }
966
967 if (num_ser > 2) {
968 printf("raid%d: Too many different serial numbers!\n",
969 raidPtr->raidid);
970 }
971
972 if (num_mod > 2) {
973 printf("raid%d: Too many different mod counters!\n",
974 raidPtr->raidid);
975 }
976
977 /* we start by assuming the parity will be good, and flee from
978 that notion at the slightest sign of trouble */
979
980 parity_good = RF_RAID_CLEAN;
981 for (r = 0; r < raidPtr->numRow; r++) {
982 for (c = 0; c < raidPtr->numCol; c++) {
983 dev_name = &cfgPtr->devnames[r][c][0];
984 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
985
986 if ((r == hosed_row) && (c == hosed_column)) {
987 printf("raid%d: Ignoring %s\n",
988 raidPtr->raidid, dev_name);
989 } else {
990 rf_print_label_status( raidPtr, r, c,
991 dev_name, ci_label );
992 if (rf_check_label_vitals( raidPtr, r, c,
993 dev_name, ci_label,
994 serial_number,
995 mod_number )) {
996 fatal_error = 1;
997 }
998 if (ci_label->clean != RF_RAID_CLEAN) {
999 parity_good = RF_RAID_DIRTY;
1000 }
1001 }
1002 }
1003 }
1004 if (fatal_error) {
1005 parity_good = RF_RAID_DIRTY;
1006 }
1007
1008 /* we note the state of the parity */
1009 raidPtr->parity_good = parity_good;
1010
1011 return(fatal_error);
1012 }
1013
1014 int
1015 rf_add_hot_spare(raidPtr, sparePtr)
1016 RF_Raid_t *raidPtr;
1017 RF_SingleComponent_t *sparePtr;
1018 {
1019 RF_RaidDisk_t *disks;
1020 RF_DiskQueue_t *spareQueues;
1021 int ret;
1022 unsigned int bs;
1023 int spare_number;
1024
1025 ret=0;
1026
1027 if (raidPtr->numSpare >= RF_MAXSPARE) {
1028 RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
1029 return(EINVAL);
1030 }
1031
1032 RF_LOCK_MUTEX(raidPtr->mutex);
1033 while (raidPtr->adding_hot_spare==1) {
1034 ltsleep(&(raidPtr->adding_hot_spare), PRIBIO, "raidhs", 0,
1035 &(raidPtr->mutex));
1036 }
1037 raidPtr->adding_hot_spare=1;
1038 RF_UNLOCK_MUTEX(raidPtr->mutex);
1039
1040 /* the beginning of the spares... */
1041 disks = &raidPtr->Disks[0][raidPtr->numCol];
1042
1043 spare_number = raidPtr->numSpare;
1044
1045 ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
1046 &disks[spare_number], 0,
1047 raidPtr->numCol + spare_number);
1048
1049 if (ret)
1050 goto fail;
1051 if (disks[spare_number].status != rf_ds_optimal) {
1052 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
1053 sparePtr->component_name);
1054 rf_close_component(raidPtr, raidPtr->raid_cinfo[0][raidPtr->numCol+spare_number].ci_vp, 0);
1055 ret=EINVAL;
1056 goto fail;
1057 } else {
1058 disks[spare_number].status = rf_ds_spare;
1059 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", spare_number,
1060 disks[spare_number].devname,
1061 (long int) disks[spare_number].numBlocks,
1062 disks[spare_number].blockSize,
1063 (long int) disks[spare_number].numBlocks *
1064 disks[spare_number].blockSize / 1024 / 1024);
1065 }
1066
1067
1068 /* check sizes and block sizes on the spare disk */
1069 bs = 1 << raidPtr->logBytesPerSector;
1070 if (disks[spare_number].blockSize != bs) {
1071 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
1072 rf_close_component(raidPtr, raidPtr->raid_cinfo[0][raidPtr->numCol+spare_number].ci_vp, 0);
1073 ret = EINVAL;
1074 goto fail;
1075 }
1076 if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
1077 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
1078 disks[spare_number].devname,
1079 disks[spare_number].blockSize,
1080 (long int) raidPtr->sectorsPerDisk);
1081 rf_close_component(raidPtr, raidPtr->raid_cinfo[0][raidPtr->numCol+spare_number].ci_vp, 0);
1082 ret = EINVAL;
1083 goto fail;
1084 } else {
1085 if (disks[spare_number].numBlocks >
1086 raidPtr->sectorsPerDisk) {
1087 RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[spare_number].devname,
1088 (long int) raidPtr->sectorsPerDisk);
1089
1090 disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
1091 }
1092 }
1093
1094 spareQueues = &raidPtr->Queues[0][raidPtr->numCol];
1095 ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
1096 0, raidPtr->numCol + spare_number,
1097 raidPtr->qType,
1098 raidPtr->sectorsPerDisk,
1099 raidPtr->Disks[0][raidPtr->numCol +
1100 spare_number].dev,
1101 raidPtr->maxOutstanding,
1102 &raidPtr->shutdownList,
1103 raidPtr->cleanupList);
1104
1105 RF_LOCK_MUTEX(raidPtr->mutex);
1106 raidPtr->numSpare++;
1107 RF_UNLOCK_MUTEX(raidPtr->mutex);
1108
1109 fail:
1110 RF_LOCK_MUTEX(raidPtr->mutex);
1111 raidPtr->adding_hot_spare=0;
1112 wakeup(&(raidPtr->adding_hot_spare));
1113 RF_UNLOCK_MUTEX(raidPtr->mutex);
1114
1115 return(ret);
1116 }
1117
1118 int
1119 rf_remove_hot_spare(raidPtr,sparePtr)
1120 RF_Raid_t *raidPtr;
1121 RF_SingleComponent_t *sparePtr;
1122 {
1123 int spare_number;
1124
1125
1126 if (raidPtr->numSpare==0) {
1127 printf("No spares to remove!\n");
1128 return(EINVAL);
1129 }
1130
1131 spare_number = sparePtr->column;
1132
1133 return(EINVAL); /* XXX not implemented yet */
1134 #if 0
1135 if (spare_number < 0 || spare_number > raidPtr->numSpare) {
1136 return(EINVAL);
1137 }
1138
1139 /* verify that this spare isn't in use... */
1140
1141
1142
1143
1144 /* it's gone.. */
1145
1146 raidPtr->numSpare--;
1147
1148 return(0);
1149 #endif
1150 }
1151
1152
1153 int
1154 rf_delete_component(raidPtr,component)
1155 RF_Raid_t *raidPtr;
1156 RF_SingleComponent_t *component;
1157 {
1158 RF_RaidDisk_t *disks;
1159
1160 if ((component->row < 0) ||
1161 (component->row >= raidPtr->numRow) ||
1162 (component->column < 0) ||
1163 (component->column >= raidPtr->numCol)) {
1164 return(EINVAL);
1165 }
1166
1167 disks = &raidPtr->Disks[component->row][component->column];
1168
1169 /* 1. This component must be marked as 'failed' */
1170
1171 return(EINVAL); /* Not implemented yet. */
1172 }
1173
1174 int
1175 rf_incorporate_hot_spare(raidPtr,component)
1176 RF_Raid_t *raidPtr;
1177 RF_SingleComponent_t *component;
1178 {
1179
1180 /* Issues here include how to 'move' this in if there is IO
1181 taking place (e.g. component queues and such) */
1182
1183 return(EINVAL); /* Not implemented yet. */
1184 }
1185