rf_netbsdkintf.c revision 1.295.6.2 1 /* $NetBSD: rf_netbsdkintf.c,v 1.295.6.2 2012/08/13 19:41:29 riz Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: cd.c 1.6 90/11/28$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
89 * School of Computer Science
90 * Carnegie Mellon University
91 * Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97 /***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.295.6.2 2012/08/13 19:41:29 riz Exp $");
105
106 #ifdef _KERNEL_OPT
107 #include "opt_compat_netbsd.h"
108 #include "opt_raid_autoconfig.h"
109 #include "raid.h"
110 #endif
111
112 #include <sys/param.h>
113 #include <sys/errno.h>
114 #include <sys/pool.h>
115 #include <sys/proc.h>
116 #include <sys/queue.h>
117 #include <sys/disk.h>
118 #include <sys/device.h>
119 #include <sys/stat.h>
120 #include <sys/ioctl.h>
121 #include <sys/fcntl.h>
122 #include <sys/systm.h>
123 #include <sys/vnode.h>
124 #include <sys/disklabel.h>
125 #include <sys/conf.h>
126 #include <sys/buf.h>
127 #include <sys/bufq.h>
128 #include <sys/reboot.h>
129 #include <sys/kauth.h>
130
131 #include <prop/proplib.h>
132
133 #include <dev/raidframe/raidframevar.h>
134 #include <dev/raidframe/raidframeio.h>
135 #include <dev/raidframe/rf_paritymap.h>
136
137 #include "rf_raid.h"
138 #include "rf_copyback.h"
139 #include "rf_dag.h"
140 #include "rf_dagflags.h"
141 #include "rf_desc.h"
142 #include "rf_diskqueue.h"
143 #include "rf_etimer.h"
144 #include "rf_general.h"
145 #include "rf_kintf.h"
146 #include "rf_options.h"
147 #include "rf_driver.h"
148 #include "rf_parityscan.h"
149 #include "rf_threadstuff.h"
150
151 #ifdef COMPAT_50
152 #include "rf_compat50.h"
153 #endif
154
155 #ifdef DEBUG
156 int rf_kdebug_level = 0;
157 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
158 #else /* DEBUG */
159 #define db1_printf(a) { }
160 #endif /* DEBUG */
161
162 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
163
164 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
165 static rf_declare_mutex2(rf_sparet_wait_mutex);
166 static rf_declare_cond2(rf_sparet_wait_cv);
167 static rf_declare_cond2(rf_sparet_resp_cv);
168
169 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
170 * spare table */
171 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
172 * installation process */
173 #endif
174
175 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
176
177 /* prototypes */
178 static void KernelWakeupFunc(struct buf *);
179 static void InitBP(struct buf *, struct vnode *, unsigned,
180 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
181 void *, int, struct proc *);
182 static void raidinit(RF_Raid_t *);
183
184 void raidattach(int);
185 static int raid_match(device_t, cfdata_t, void *);
186 static void raid_attach(device_t, device_t, void *);
187 static int raid_detach(device_t, int);
188
189 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
190 daddr_t, daddr_t);
191 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
192 daddr_t, daddr_t, int);
193
194 static int raidwrite_component_label(unsigned,
195 dev_t, struct vnode *, RF_ComponentLabel_t *);
196 static int raidread_component_label(unsigned,
197 dev_t, struct vnode *, RF_ComponentLabel_t *);
198
199
200 dev_type_open(raidopen);
201 dev_type_close(raidclose);
202 dev_type_read(raidread);
203 dev_type_write(raidwrite);
204 dev_type_ioctl(raidioctl);
205 dev_type_strategy(raidstrategy);
206 dev_type_dump(raiddump);
207 dev_type_size(raidsize);
208
209 const struct bdevsw raid_bdevsw = {
210 raidopen, raidclose, raidstrategy, raidioctl,
211 raiddump, raidsize, D_DISK
212 };
213
214 const struct cdevsw raid_cdevsw = {
215 raidopen, raidclose, raidread, raidwrite, raidioctl,
216 nostop, notty, nopoll, nommap, nokqfilter, D_DISK
217 };
218
219 static struct dkdriver rf_dkdriver = { raidstrategy, minphys };
220
221 /* XXX Not sure if the following should be replacing the raidPtrs above,
222 or if it should be used in conjunction with that...
223 */
224
225 struct raid_softc {
226 device_t sc_dev;
227 int sc_flags; /* flags */
228 int sc_cflags; /* configuration flags */
229 uint64_t sc_size; /* size of the raid device */
230 char sc_xname[20]; /* XXX external name */
231 struct disk sc_dkdev; /* generic disk device info */
232 struct bufq_state *buf_queue; /* used for the device queue */
233 };
234 /* sc_flags */
235 #define RAIDF_INITED 0x01 /* unit has been initialized */
236 #define RAIDF_WLABEL 0x02 /* label area is writable */
237 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
238 #define RAIDF_SHUTDOWN 0x08 /* unit is being shutdown */
239 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
240 #define RAIDF_LOCKED 0x80 /* unit is locked */
241
242 #define raidunit(x) DISKUNIT(x)
243 int numraid = 0;
244
245 extern struct cfdriver raid_cd;
246 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
247 raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
248 DVF_DETACH_SHUTDOWN);
249
250 /*
251 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
252 * Be aware that large numbers can allow the driver to consume a lot of
253 * kernel memory, especially on writes, and in degraded mode reads.
254 *
255 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
256 * a single 64K write will typically require 64K for the old data,
257 * 64K for the old parity, and 64K for the new parity, for a total
258 * of 192K (if the parity buffer is not re-used immediately).
259 * Even it if is used immediately, that's still 128K, which when multiplied
260 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
261 *
262 * Now in degraded mode, for example, a 64K read on the above setup may
263 * require data reconstruction, which will require *all* of the 4 remaining
264 * disks to participate -- 4 * 32K/disk == 128K again.
265 */
266
267 #ifndef RAIDOUTSTANDING
268 #define RAIDOUTSTANDING 6
269 #endif
270
271 #define RAIDLABELDEV(dev) \
272 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
273
274 /* declared here, and made public, for the benefit of KVM stuff.. */
275 struct raid_softc *raid_softc;
276
277 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
278 struct disklabel *);
279 static void raidgetdisklabel(dev_t);
280 static void raidmakedisklabel(struct raid_softc *);
281
282 static int raidlock(struct raid_softc *);
283 static void raidunlock(struct raid_softc *);
284
285 static int raid_detach_unlocked(struct raid_softc *);
286
287 static void rf_markalldirty(RF_Raid_t *);
288 static void rf_set_properties(struct raid_softc *, RF_Raid_t *);
289
290 void rf_ReconThread(struct rf_recon_req *);
291 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
292 void rf_CopybackThread(RF_Raid_t *raidPtr);
293 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
294 int rf_autoconfig(device_t);
295 void rf_buildroothack(RF_ConfigSet_t *);
296
297 RF_AutoConfig_t *rf_find_raid_components(void);
298 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
299 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
300 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
301 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
302 int rf_set_autoconfig(RF_Raid_t *, int);
303 int rf_set_rootpartition(RF_Raid_t *, int);
304 void rf_release_all_vps(RF_ConfigSet_t *);
305 void rf_cleanup_config_set(RF_ConfigSet_t *);
306 int rf_have_enough_components(RF_ConfigSet_t *);
307 int rf_auto_config_set(RF_ConfigSet_t *, int *);
308 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
309
310 /*
311 * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
312 * Note that this is overridden by having RAID_AUTOCONFIG as an option
313 * in the kernel config file.
314 */
315 #ifdef RAID_AUTOCONFIG
316 int raidautoconfig = 1;
317 #else
318 int raidautoconfig = 0;
319 #endif
320 static bool raidautoconfigdone = false;
321
322 struct RF_Pools_s rf_pools;
323
324 void
325 raidattach(int num)
326 {
327 int raidID;
328 int i, rc;
329
330 aprint_debug("raidattach: Asked for %d units\n", num);
331
332 if (num <= 0) {
333 #ifdef DIAGNOSTIC
334 panic("raidattach: count <= 0");
335 #endif
336 return;
337 }
338 /* This is where all the initialization stuff gets done. */
339
340 numraid = num;
341
342 /* Make some space for requested number of units... */
343
344 RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
345 if (raidPtrs == NULL) {
346 panic("raidPtrs is NULL!!");
347 }
348
349 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
350 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
351 rf_init_cond2(rf_sparet_wait_cv, "sparetw");
352 rf_init_cond2(rf_sparet_resp_cv, "rfgst");
353
354 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
355 #endif
356
357 for (i = 0; i < num; i++)
358 raidPtrs[i] = NULL;
359 rc = rf_BootRaidframe();
360 if (rc == 0)
361 aprint_verbose("Kernelized RAIDframe activated\n");
362 else
363 panic("Serious error booting RAID!!");
364
365 /* put together some datastructures like the CCD device does.. This
366 * lets us lock the device and what-not when it gets opened. */
367
368 raid_softc = (struct raid_softc *)
369 malloc(num * sizeof(struct raid_softc),
370 M_RAIDFRAME, M_NOWAIT);
371 if (raid_softc == NULL) {
372 aprint_error("WARNING: no memory for RAIDframe driver\n");
373 return;
374 }
375
376 memset(raid_softc, 0, num * sizeof(struct raid_softc));
377
378 for (raidID = 0; raidID < num; raidID++) {
379 bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_DISK_DEFAULT_STRAT, BUFQ_SORT_RAWBLOCK);
380
381 RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
382 (RF_Raid_t *));
383 if (raidPtrs[raidID] == NULL) {
384 aprint_error("WARNING: raidPtrs[%d] is NULL\n", raidID);
385 numraid = raidID;
386 return;
387 }
388 }
389
390 if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) {
391 aprint_error("raidattach: config_cfattach_attach failed?\n");
392 }
393
394 raidautoconfigdone = false;
395
396 /*
397 * Register a finalizer which will be used to auto-config RAID
398 * sets once all real hardware devices have been found.
399 */
400 if (config_finalize_register(NULL, rf_autoconfig) != 0)
401 aprint_error("WARNING: unable to register RAIDframe finalizer\n");
402 }
403
404 int
405 rf_autoconfig(device_t self)
406 {
407 RF_AutoConfig_t *ac_list;
408 RF_ConfigSet_t *config_sets;
409
410 if (!raidautoconfig || raidautoconfigdone == true)
411 return (0);
412
413 /* XXX This code can only be run once. */
414 raidautoconfigdone = true;
415
416 /* 1. locate all RAID components on the system */
417 aprint_debug("Searching for RAID components...\n");
418 ac_list = rf_find_raid_components();
419
420 /* 2. Sort them into their respective sets. */
421 config_sets = rf_create_auto_sets(ac_list);
422
423 /*
424 * 3. Evaluate each set andconfigure the valid ones.
425 * This gets done in rf_buildroothack().
426 */
427 rf_buildroothack(config_sets);
428
429 return 1;
430 }
431
432 void
433 rf_buildroothack(RF_ConfigSet_t *config_sets)
434 {
435 RF_ConfigSet_t *cset;
436 RF_ConfigSet_t *next_cset;
437 int retcode;
438 int raidID;
439 int rootID;
440 int col;
441 int num_root;
442 char *devname;
443
444 rootID = 0;
445 num_root = 0;
446 cset = config_sets;
447 while (cset != NULL) {
448 next_cset = cset->next;
449 if (rf_have_enough_components(cset) &&
450 cset->ac->clabel->autoconfigure==1) {
451 retcode = rf_auto_config_set(cset,&raidID);
452 if (!retcode) {
453 aprint_debug("raid%d: configured ok\n", raidID);
454 if (cset->rootable) {
455 rootID = raidID;
456 num_root++;
457 }
458 } else {
459 /* The autoconfig didn't work :( */
460 aprint_debug("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
461 rf_release_all_vps(cset);
462 }
463 } else {
464 /* we're not autoconfiguring this set...
465 release the associated resources */
466 rf_release_all_vps(cset);
467 }
468 /* cleanup */
469 rf_cleanup_config_set(cset);
470 cset = next_cset;
471 }
472
473 /* if the user has specified what the root device should be
474 then we don't touch booted_device or boothowto... */
475
476 if (rootspec != NULL)
477 return;
478
479 /* we found something bootable... */
480
481 if (num_root == 1) {
482 booted_device = raid_softc[rootID].sc_dev;
483 } else if (num_root > 1) {
484
485 /*
486 * Maybe the MD code can help. If it cannot, then
487 * setroot() will discover that we have no
488 * booted_device and will ask the user if nothing was
489 * hardwired in the kernel config file
490 */
491
492 if (booted_device == NULL)
493 cpu_rootconf();
494 if (booted_device == NULL)
495 return;
496
497 num_root = 0;
498 for (raidID = 0; raidID < numraid; raidID++) {
499 if (raidPtrs[raidID]->valid == 0)
500 continue;
501
502 if (raidPtrs[raidID]->root_partition == 0)
503 continue;
504
505 for (col = 0; col < raidPtrs[raidID]->numCol; col++) {
506 devname = raidPtrs[raidID]->Disks[col].devname;
507 devname += sizeof("/dev/") - 1;
508 if (strncmp(devname, device_xname(booted_device),
509 strlen(device_xname(booted_device))) != 0)
510 continue;
511 aprint_debug("raid%d includes boot device %s\n",
512 raidID, devname);
513 num_root++;
514 rootID = raidID;
515 }
516 }
517
518 if (num_root == 1) {
519 booted_device = raid_softc[rootID].sc_dev;
520 } else {
521 /* we can't guess.. require the user to answer... */
522 boothowto |= RB_ASKNAME;
523 }
524 }
525 }
526
527
528 int
529 raidsize(dev_t dev)
530 {
531 struct raid_softc *rs;
532 struct disklabel *lp;
533 int part, unit, omask, size;
534
535 unit = raidunit(dev);
536 if (unit >= numraid)
537 return (-1);
538 rs = &raid_softc[unit];
539
540 if ((rs->sc_flags & RAIDF_INITED) == 0)
541 return (-1);
542
543 part = DISKPART(dev);
544 omask = rs->sc_dkdev.dk_openmask & (1 << part);
545 lp = rs->sc_dkdev.dk_label;
546
547 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
548 return (-1);
549
550 if (lp->d_partitions[part].p_fstype != FS_SWAP)
551 size = -1;
552 else
553 size = lp->d_partitions[part].p_size *
554 (lp->d_secsize / DEV_BSIZE);
555
556 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
557 return (-1);
558
559 return (size);
560
561 }
562
563 int
564 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
565 {
566 int unit = raidunit(dev);
567 struct raid_softc *rs;
568 const struct bdevsw *bdev;
569 struct disklabel *lp;
570 RF_Raid_t *raidPtr;
571 daddr_t offset;
572 int part, c, sparecol, j, scol, dumpto;
573 int error = 0;
574
575 if (unit >= numraid)
576 return (ENXIO);
577
578 rs = &raid_softc[unit];
579 raidPtr = raidPtrs[unit];
580
581 if ((rs->sc_flags & RAIDF_INITED) == 0)
582 return ENXIO;
583
584 /* we only support dumping to RAID 1 sets */
585 if (raidPtr->Layout.numDataCol != 1 ||
586 raidPtr->Layout.numParityCol != 1)
587 return EINVAL;
588
589
590 if ((error = raidlock(rs)) != 0)
591 return error;
592
593 if (size % DEV_BSIZE != 0) {
594 error = EINVAL;
595 goto out;
596 }
597
598 if (blkno + size / DEV_BSIZE > rs->sc_size) {
599 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
600 "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
601 size / DEV_BSIZE, rs->sc_size);
602 error = EINVAL;
603 goto out;
604 }
605
606 part = DISKPART(dev);
607 lp = rs->sc_dkdev.dk_label;
608 offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
609
610 /* figure out what device is alive.. */
611
612 /*
613 Look for a component to dump to. The preference for the
614 component to dump to is as follows:
615 1) the master
616 2) a used_spare of the master
617 3) the slave
618 4) a used_spare of the slave
619 */
620
621 dumpto = -1;
622 for (c = 0; c < raidPtr->numCol; c++) {
623 if (raidPtr->Disks[c].status == rf_ds_optimal) {
624 /* this might be the one */
625 dumpto = c;
626 break;
627 }
628 }
629
630 /*
631 At this point we have possibly selected a live master or a
632 live slave. We now check to see if there is a spared
633 master (or a spared slave), if we didn't find a live master
634 or a live slave.
635 */
636
637 for (c = 0; c < raidPtr->numSpare; c++) {
638 sparecol = raidPtr->numCol + c;
639 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
640 /* How about this one? */
641 scol = -1;
642 for(j=0;j<raidPtr->numCol;j++) {
643 if (raidPtr->Disks[j].spareCol == sparecol) {
644 scol = j;
645 break;
646 }
647 }
648 if (scol == 0) {
649 /*
650 We must have found a spared master!
651 We'll take that over anything else
652 found so far. (We couldn't have
653 found a real master before, since
654 this is a used spare, and it's
655 saying that it's replacing the
656 master.) On reboot (with
657 autoconfiguration turned on)
658 sparecol will become the 1st
659 component (component0) of this set.
660 */
661 dumpto = sparecol;
662 break;
663 } else if (scol != -1) {
664 /*
665 Must be a spared slave. We'll dump
666 to that if we havn't found anything
667 else so far.
668 */
669 if (dumpto == -1)
670 dumpto = sparecol;
671 }
672 }
673 }
674
675 if (dumpto == -1) {
676 /* we couldn't find any live components to dump to!?!?
677 */
678 error = EINVAL;
679 goto out;
680 }
681
682 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
683
684 /*
685 Note that blkno is relative to this particular partition.
686 By adding the offset of this partition in the RAID
687 set, and also adding RF_PROTECTED_SECTORS, we get a
688 value that is relative to the partition used for the
689 underlying component.
690 */
691
692 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
693 blkno + offset, va, size);
694
695 out:
696 raidunlock(rs);
697
698 return error;
699 }
700 /* ARGSUSED */
701 int
702 raidopen(dev_t dev, int flags, int fmt,
703 struct lwp *l)
704 {
705 int unit = raidunit(dev);
706 struct raid_softc *rs;
707 struct disklabel *lp;
708 int part, pmask;
709 int error = 0;
710
711 if (unit >= numraid)
712 return (ENXIO);
713 rs = &raid_softc[unit];
714
715 if ((error = raidlock(rs)) != 0)
716 return (error);
717
718 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
719 error = EBUSY;
720 goto bad;
721 }
722
723 lp = rs->sc_dkdev.dk_label;
724
725 part = DISKPART(dev);
726
727 /*
728 * If there are wedges, and this is not RAW_PART, then we
729 * need to fail.
730 */
731 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
732 error = EBUSY;
733 goto bad;
734 }
735 pmask = (1 << part);
736
737 if ((rs->sc_flags & RAIDF_INITED) &&
738 (rs->sc_dkdev.dk_openmask == 0))
739 raidgetdisklabel(dev);
740
741 /* make sure that this partition exists */
742
743 if (part != RAW_PART) {
744 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
745 ((part >= lp->d_npartitions) ||
746 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
747 error = ENXIO;
748 goto bad;
749 }
750 }
751 /* Prevent this unit from being unconfigured while open. */
752 switch (fmt) {
753 case S_IFCHR:
754 rs->sc_dkdev.dk_copenmask |= pmask;
755 break;
756
757 case S_IFBLK:
758 rs->sc_dkdev.dk_bopenmask |= pmask;
759 break;
760 }
761
762 if ((rs->sc_dkdev.dk_openmask == 0) &&
763 ((rs->sc_flags & RAIDF_INITED) != 0)) {
764 /* First one... mark things as dirty... Note that we *MUST*
765 have done a configure before this. I DO NOT WANT TO BE
766 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
767 THAT THEY BELONG TOGETHER!!!!! */
768 /* XXX should check to see if we're only open for reading
769 here... If so, we needn't do this, but then need some
770 other way of keeping track of what's happened.. */
771
772 rf_markalldirty(raidPtrs[unit]);
773 }
774
775
776 rs->sc_dkdev.dk_openmask =
777 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
778
779 bad:
780 raidunlock(rs);
781
782 return (error);
783
784
785 }
786 /* ARGSUSED */
787 int
788 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
789 {
790 int unit = raidunit(dev);
791 struct raid_softc *rs;
792 int error = 0;
793 int part;
794
795 if (unit >= numraid)
796 return (ENXIO);
797 rs = &raid_softc[unit];
798
799 if ((error = raidlock(rs)) != 0)
800 return (error);
801
802 part = DISKPART(dev);
803
804 /* ...that much closer to allowing unconfiguration... */
805 switch (fmt) {
806 case S_IFCHR:
807 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
808 break;
809
810 case S_IFBLK:
811 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
812 break;
813 }
814 rs->sc_dkdev.dk_openmask =
815 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
816
817 if ((rs->sc_dkdev.dk_openmask == 0) &&
818 ((rs->sc_flags & RAIDF_INITED) != 0)) {
819 /* Last one... device is not unconfigured yet.
820 Device shutdown has taken care of setting the
821 clean bits if RAIDF_INITED is not set
822 mark things as clean... */
823
824 rf_update_component_labels(raidPtrs[unit],
825 RF_FINAL_COMPONENT_UPDATE);
826
827 /* If the kernel is shutting down, it will detach
828 * this RAID set soon enough.
829 */
830 }
831
832 raidunlock(rs);
833 return (0);
834
835 }
836
837 void
838 raidstrategy(struct buf *bp)
839 {
840 unsigned int raidID = raidunit(bp->b_dev);
841 RF_Raid_t *raidPtr;
842 struct raid_softc *rs = &raid_softc[raidID];
843 int wlabel;
844
845 if ((rs->sc_flags & RAIDF_INITED) ==0) {
846 bp->b_error = ENXIO;
847 goto done;
848 }
849 if (raidID >= numraid || !raidPtrs[raidID]) {
850 bp->b_error = ENODEV;
851 goto done;
852 }
853 raidPtr = raidPtrs[raidID];
854 if (!raidPtr->valid) {
855 bp->b_error = ENODEV;
856 goto done;
857 }
858 if (bp->b_bcount == 0) {
859 db1_printf(("b_bcount is zero..\n"));
860 goto done;
861 }
862
863 /*
864 * Do bounds checking and adjust transfer. If there's an
865 * error, the bounds check will flag that for us.
866 */
867
868 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
869 if (DISKPART(bp->b_dev) == RAW_PART) {
870 uint64_t size; /* device size in DEV_BSIZE unit */
871
872 if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
873 size = raidPtr->totalSectors <<
874 (raidPtr->logBytesPerSector - DEV_BSHIFT);
875 } else {
876 size = raidPtr->totalSectors >>
877 (DEV_BSHIFT - raidPtr->logBytesPerSector);
878 }
879 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
880 goto done;
881 }
882 } else {
883 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
884 db1_printf(("Bounds check failed!!:%d %d\n",
885 (int) bp->b_blkno, (int) wlabel));
886 goto done;
887 }
888 }
889
890 rf_lock_mutex2(raidPtr->iodone_lock);
891
892 bp->b_resid = 0;
893
894 /* stuff it onto our queue */
895 bufq_put(rs->buf_queue, bp);
896
897 /* scheduled the IO to happen at the next convenient time */
898 rf_signal_cond2(raidPtr->iodone_cv);
899 rf_unlock_mutex2(raidPtr->iodone_lock);
900
901 return;
902
903 done:
904 bp->b_resid = bp->b_bcount;
905 biodone(bp);
906 }
907 /* ARGSUSED */
908 int
909 raidread(dev_t dev, struct uio *uio, int flags)
910 {
911 int unit = raidunit(dev);
912 struct raid_softc *rs;
913
914 if (unit >= numraid)
915 return (ENXIO);
916 rs = &raid_softc[unit];
917
918 if ((rs->sc_flags & RAIDF_INITED) == 0)
919 return (ENXIO);
920
921 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
922
923 }
924 /* ARGSUSED */
925 int
926 raidwrite(dev_t dev, struct uio *uio, int flags)
927 {
928 int unit = raidunit(dev);
929 struct raid_softc *rs;
930
931 if (unit >= numraid)
932 return (ENXIO);
933 rs = &raid_softc[unit];
934
935 if ((rs->sc_flags & RAIDF_INITED) == 0)
936 return (ENXIO);
937
938 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
939
940 }
941
942 static int
943 raid_detach_unlocked(struct raid_softc *rs)
944 {
945 int error;
946 RF_Raid_t *raidPtr;
947
948 raidPtr = raidPtrs[device_unit(rs->sc_dev)];
949
950 /*
951 * If somebody has a partition mounted, we shouldn't
952 * shutdown.
953 */
954 if (rs->sc_dkdev.dk_openmask != 0)
955 return EBUSY;
956
957 if ((rs->sc_flags & RAIDF_INITED) == 0)
958 ; /* not initialized: nothing to do */
959 else if ((error = rf_Shutdown(raidPtr)) != 0)
960 return error;
961 else
962 rs->sc_flags &= ~(RAIDF_INITED|RAIDF_SHUTDOWN);
963
964 /* Detach the disk. */
965 dkwedge_delall(&rs->sc_dkdev);
966 disk_detach(&rs->sc_dkdev);
967 disk_destroy(&rs->sc_dkdev);
968
969 aprint_normal_dev(rs->sc_dev, "detached\n");
970
971 return 0;
972 }
973
974 int
975 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
976 {
977 int unit = raidunit(dev);
978 int error = 0;
979 int part, pmask, s;
980 cfdata_t cf;
981 struct raid_softc *rs;
982 RF_Config_t *k_cfg, *u_cfg;
983 RF_Raid_t *raidPtr;
984 RF_RaidDisk_t *diskPtr;
985 RF_AccTotals_t *totals;
986 RF_DeviceConfig_t *d_cfg, **ucfgp;
987 u_char *specific_buf;
988 int retcode = 0;
989 int column;
990 /* int raidid; */
991 struct rf_recon_req *rrcopy, *rr;
992 RF_ComponentLabel_t *clabel;
993 RF_ComponentLabel_t *ci_label;
994 RF_ComponentLabel_t **clabel_ptr;
995 RF_SingleComponent_t *sparePtr,*componentPtr;
996 RF_SingleComponent_t component;
997 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
998 int i, j, d;
999 #ifdef __HAVE_OLD_DISKLABEL
1000 struct disklabel newlabel;
1001 #endif
1002 struct dkwedge_info *dkw;
1003
1004 if (unit >= numraid)
1005 return (ENXIO);
1006 rs = &raid_softc[unit];
1007 raidPtr = raidPtrs[unit];
1008
1009 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1010 (int) DISKPART(dev), (int) unit, cmd));
1011
1012 /* Must be open for writes for these commands... */
1013 switch (cmd) {
1014 #ifdef DIOCGSECTORSIZE
1015 case DIOCGSECTORSIZE:
1016 *(u_int *)data = raidPtr->bytesPerSector;
1017 return 0;
1018 case DIOCGMEDIASIZE:
1019 *(off_t *)data =
1020 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
1021 return 0;
1022 #endif
1023 case DIOCSDINFO:
1024 case DIOCWDINFO:
1025 #ifdef __HAVE_OLD_DISKLABEL
1026 case ODIOCWDINFO:
1027 case ODIOCSDINFO:
1028 #endif
1029 case DIOCWLABEL:
1030 case DIOCAWEDGE:
1031 case DIOCDWEDGE:
1032 case DIOCSSTRATEGY:
1033 if ((flag & FWRITE) == 0)
1034 return (EBADF);
1035 }
1036
1037 /* Must be initialized for these... */
1038 switch (cmd) {
1039 case DIOCGDINFO:
1040 case DIOCSDINFO:
1041 case DIOCWDINFO:
1042 #ifdef __HAVE_OLD_DISKLABEL
1043 case ODIOCGDINFO:
1044 case ODIOCWDINFO:
1045 case ODIOCSDINFO:
1046 case ODIOCGDEFLABEL:
1047 #endif
1048 case DIOCGPART:
1049 case DIOCWLABEL:
1050 case DIOCGDEFLABEL:
1051 case DIOCAWEDGE:
1052 case DIOCDWEDGE:
1053 case DIOCLWEDGES:
1054 case DIOCCACHESYNC:
1055 case RAIDFRAME_SHUTDOWN:
1056 case RAIDFRAME_REWRITEPARITY:
1057 case RAIDFRAME_GET_INFO:
1058 case RAIDFRAME_RESET_ACCTOTALS:
1059 case RAIDFRAME_GET_ACCTOTALS:
1060 case RAIDFRAME_KEEP_ACCTOTALS:
1061 case RAIDFRAME_GET_SIZE:
1062 case RAIDFRAME_FAIL_DISK:
1063 case RAIDFRAME_COPYBACK:
1064 case RAIDFRAME_CHECK_RECON_STATUS:
1065 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1066 case RAIDFRAME_GET_COMPONENT_LABEL:
1067 case RAIDFRAME_SET_COMPONENT_LABEL:
1068 case RAIDFRAME_ADD_HOT_SPARE:
1069 case RAIDFRAME_REMOVE_HOT_SPARE:
1070 case RAIDFRAME_INIT_LABELS:
1071 case RAIDFRAME_REBUILD_IN_PLACE:
1072 case RAIDFRAME_CHECK_PARITY:
1073 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1074 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1075 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1076 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1077 case RAIDFRAME_SET_AUTOCONFIG:
1078 case RAIDFRAME_SET_ROOT:
1079 case RAIDFRAME_DELETE_COMPONENT:
1080 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1081 case RAIDFRAME_PARITYMAP_STATUS:
1082 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1083 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1084 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1085 case DIOCGSTRATEGY:
1086 case DIOCSSTRATEGY:
1087 if ((rs->sc_flags & RAIDF_INITED) == 0)
1088 return (ENXIO);
1089 }
1090
1091 switch (cmd) {
1092 #ifdef COMPAT_50
1093 case RAIDFRAME_GET_INFO50:
1094 return rf_get_info50(raidPtr, data);
1095
1096 case RAIDFRAME_CONFIGURE50:
1097 if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0)
1098 return retcode;
1099 goto config;
1100 #endif
1101 /* configure the system */
1102 case RAIDFRAME_CONFIGURE:
1103
1104 if (raidPtr->valid) {
1105 /* There is a valid RAID set running on this unit! */
1106 printf("raid%d: Device already configured!\n",unit);
1107 return(EINVAL);
1108 }
1109
1110 /* copy-in the configuration information */
1111 /* data points to a pointer to the configuration structure */
1112
1113 u_cfg = *((RF_Config_t **) data);
1114 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1115 if (k_cfg == NULL) {
1116 return (ENOMEM);
1117 }
1118 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1119 if (retcode) {
1120 RF_Free(k_cfg, sizeof(RF_Config_t));
1121 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1122 retcode));
1123 return (retcode);
1124 }
1125 goto config;
1126 config:
1127 /* allocate a buffer for the layout-specific data, and copy it
1128 * in */
1129 if (k_cfg->layoutSpecificSize) {
1130 if (k_cfg->layoutSpecificSize > 10000) {
1131 /* sanity check */
1132 RF_Free(k_cfg, sizeof(RF_Config_t));
1133 return (EINVAL);
1134 }
1135 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1136 (u_char *));
1137 if (specific_buf == NULL) {
1138 RF_Free(k_cfg, sizeof(RF_Config_t));
1139 return (ENOMEM);
1140 }
1141 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1142 k_cfg->layoutSpecificSize);
1143 if (retcode) {
1144 RF_Free(k_cfg, sizeof(RF_Config_t));
1145 RF_Free(specific_buf,
1146 k_cfg->layoutSpecificSize);
1147 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1148 retcode));
1149 return (retcode);
1150 }
1151 } else
1152 specific_buf = NULL;
1153 k_cfg->layoutSpecific = specific_buf;
1154
1155 /* should do some kind of sanity check on the configuration.
1156 * Store the sum of all the bytes in the last byte? */
1157
1158 /* configure the system */
1159
1160 /*
1161 * Clear the entire RAID descriptor, just to make sure
1162 * there is no stale data left in the case of a
1163 * reconfiguration
1164 */
1165 memset(raidPtr, 0, sizeof(*raidPtr));
1166 raidPtr->raidid = unit;
1167
1168 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1169
1170 if (retcode == 0) {
1171
1172 /* allow this many simultaneous IO's to
1173 this RAID device */
1174 raidPtr->openings = RAIDOUTSTANDING;
1175
1176 raidinit(raidPtr);
1177 rf_markalldirty(raidPtr);
1178 }
1179 /* free the buffers. No return code here. */
1180 if (k_cfg->layoutSpecificSize) {
1181 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1182 }
1183 RF_Free(k_cfg, sizeof(RF_Config_t));
1184
1185 return (retcode);
1186
1187 /* shutdown the system */
1188 case RAIDFRAME_SHUTDOWN:
1189
1190 part = DISKPART(dev);
1191 pmask = (1 << part);
1192
1193 if ((error = raidlock(rs)) != 0)
1194 return (error);
1195
1196 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
1197 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1198 (rs->sc_dkdev.dk_copenmask & pmask)))
1199 retcode = EBUSY;
1200 else {
1201 rs->sc_flags |= RAIDF_SHUTDOWN;
1202 rs->sc_dkdev.dk_copenmask &= ~pmask;
1203 rs->sc_dkdev.dk_bopenmask &= ~pmask;
1204 rs->sc_dkdev.dk_openmask &= ~pmask;
1205 retcode = 0;
1206 }
1207
1208 raidunlock(rs);
1209
1210 if (retcode != 0)
1211 return retcode;
1212
1213 /* free the pseudo device attach bits */
1214
1215 cf = device_cfdata(rs->sc_dev);
1216 if ((retcode = config_detach(rs->sc_dev, DETACH_QUIET)) == 0)
1217 free(cf, M_RAIDFRAME);
1218
1219 return (retcode);
1220 case RAIDFRAME_GET_COMPONENT_LABEL:
1221 clabel_ptr = (RF_ComponentLabel_t **) data;
1222 /* need to read the component label for the disk indicated
1223 by row,column in clabel */
1224
1225 /*
1226 * Perhaps there should be an option to skip the in-core
1227 * copy and hit the disk, as with disklabel(8).
1228 */
1229 RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *));
1230
1231 retcode = copyin(*clabel_ptr, clabel, sizeof(*clabel));
1232
1233 if (retcode) {
1234 RF_Free(clabel, sizeof(*clabel));
1235 return retcode;
1236 }
1237
1238 clabel->row = 0; /* Don't allow looking at anything else.*/
1239
1240 column = clabel->column;
1241
1242 if ((column < 0) || (column >= raidPtr->numCol +
1243 raidPtr->numSpare)) {
1244 RF_Free(clabel, sizeof(*clabel));
1245 return EINVAL;
1246 }
1247
1248 RF_Free(clabel, sizeof(*clabel));
1249
1250 clabel = raidget_component_label(raidPtr, column);
1251
1252 return copyout(clabel, *clabel_ptr, sizeof(**clabel_ptr));
1253
1254 #if 0
1255 case RAIDFRAME_SET_COMPONENT_LABEL:
1256 clabel = (RF_ComponentLabel_t *) data;
1257
1258 /* XXX check the label for valid stuff... */
1259 /* Note that some things *should not* get modified --
1260 the user should be re-initing the labels instead of
1261 trying to patch things.
1262 */
1263
1264 raidid = raidPtr->raidid;
1265 #ifdef DEBUG
1266 printf("raid%d: Got component label:\n", raidid);
1267 printf("raid%d: Version: %d\n", raidid, clabel->version);
1268 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1269 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1270 printf("raid%d: Column: %d\n", raidid, clabel->column);
1271 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1272 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1273 printf("raid%d: Status: %d\n", raidid, clabel->status);
1274 #endif
1275 clabel->row = 0;
1276 column = clabel->column;
1277
1278 if ((column < 0) || (column >= raidPtr->numCol)) {
1279 return(EINVAL);
1280 }
1281
1282 /* XXX this isn't allowed to do anything for now :-) */
1283
1284 /* XXX and before it is, we need to fill in the rest
1285 of the fields!?!?!?! */
1286 memcpy(raidget_component_label(raidPtr, column),
1287 clabel, sizeof(*clabel));
1288 raidflush_component_label(raidPtr, column);
1289 return (0);
1290 #endif
1291
1292 case RAIDFRAME_INIT_LABELS:
1293 clabel = (RF_ComponentLabel_t *) data;
1294 /*
1295 we only want the serial number from
1296 the above. We get all the rest of the information
1297 from the config that was used to create this RAID
1298 set.
1299 */
1300
1301 raidPtr->serial_number = clabel->serial_number;
1302
1303 for(column=0;column<raidPtr->numCol;column++) {
1304 diskPtr = &raidPtr->Disks[column];
1305 if (!RF_DEAD_DISK(diskPtr->status)) {
1306 ci_label = raidget_component_label(raidPtr,
1307 column);
1308 /* Zeroing this is important. */
1309 memset(ci_label, 0, sizeof(*ci_label));
1310 raid_init_component_label(raidPtr, ci_label);
1311 ci_label->serial_number =
1312 raidPtr->serial_number;
1313 ci_label->row = 0; /* we dont' pretend to support more */
1314 rf_component_label_set_partitionsize(ci_label,
1315 diskPtr->partitionSize);
1316 ci_label->column = column;
1317 raidflush_component_label(raidPtr, column);
1318 }
1319 /* XXXjld what about the spares? */
1320 }
1321
1322 return (retcode);
1323 case RAIDFRAME_SET_AUTOCONFIG:
1324 d = rf_set_autoconfig(raidPtr, *(int *) data);
1325 printf("raid%d: New autoconfig value is: %d\n",
1326 raidPtr->raidid, d);
1327 *(int *) data = d;
1328 return (retcode);
1329
1330 case RAIDFRAME_SET_ROOT:
1331 d = rf_set_rootpartition(raidPtr, *(int *) data);
1332 printf("raid%d: New rootpartition value is: %d\n",
1333 raidPtr->raidid, d);
1334 *(int *) data = d;
1335 return (retcode);
1336
1337 /* initialize all parity */
1338 case RAIDFRAME_REWRITEPARITY:
1339
1340 if (raidPtr->Layout.map->faultsTolerated == 0) {
1341 /* Parity for RAID 0 is trivially correct */
1342 raidPtr->parity_good = RF_RAID_CLEAN;
1343 return(0);
1344 }
1345
1346 if (raidPtr->parity_rewrite_in_progress == 1) {
1347 /* Re-write is already in progress! */
1348 return(EINVAL);
1349 }
1350
1351 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1352 rf_RewriteParityThread,
1353 raidPtr,"raid_parity");
1354 return (retcode);
1355
1356
1357 case RAIDFRAME_ADD_HOT_SPARE:
1358 sparePtr = (RF_SingleComponent_t *) data;
1359 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1360 retcode = rf_add_hot_spare(raidPtr, &component);
1361 return(retcode);
1362
1363 case RAIDFRAME_REMOVE_HOT_SPARE:
1364 return(retcode);
1365
1366 case RAIDFRAME_DELETE_COMPONENT:
1367 componentPtr = (RF_SingleComponent_t *)data;
1368 memcpy( &component, componentPtr,
1369 sizeof(RF_SingleComponent_t));
1370 retcode = rf_delete_component(raidPtr, &component);
1371 return(retcode);
1372
1373 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1374 componentPtr = (RF_SingleComponent_t *)data;
1375 memcpy( &component, componentPtr,
1376 sizeof(RF_SingleComponent_t));
1377 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1378 return(retcode);
1379
1380 case RAIDFRAME_REBUILD_IN_PLACE:
1381
1382 if (raidPtr->Layout.map->faultsTolerated == 0) {
1383 /* Can't do this on a RAID 0!! */
1384 return(EINVAL);
1385 }
1386
1387 if (raidPtr->recon_in_progress == 1) {
1388 /* a reconstruct is already in progress! */
1389 return(EINVAL);
1390 }
1391
1392 componentPtr = (RF_SingleComponent_t *) data;
1393 memcpy( &component, componentPtr,
1394 sizeof(RF_SingleComponent_t));
1395 component.row = 0; /* we don't support any more */
1396 column = component.column;
1397
1398 if ((column < 0) || (column >= raidPtr->numCol)) {
1399 return(EINVAL);
1400 }
1401
1402 rf_lock_mutex2(raidPtr->mutex);
1403 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1404 (raidPtr->numFailures > 0)) {
1405 /* XXX 0 above shouldn't be constant!!! */
1406 /* some component other than this has failed.
1407 Let's not make things worse than they already
1408 are... */
1409 printf("raid%d: Unable to reconstruct to disk at:\n",
1410 raidPtr->raidid);
1411 printf("raid%d: Col: %d Too many failures.\n",
1412 raidPtr->raidid, column);
1413 rf_unlock_mutex2(raidPtr->mutex);
1414 return (EINVAL);
1415 }
1416 if (raidPtr->Disks[column].status ==
1417 rf_ds_reconstructing) {
1418 printf("raid%d: Unable to reconstruct to disk at:\n",
1419 raidPtr->raidid);
1420 printf("raid%d: Col: %d Reconstruction already occuring!\n", raidPtr->raidid, column);
1421
1422 rf_unlock_mutex2(raidPtr->mutex);
1423 return (EINVAL);
1424 }
1425 if (raidPtr->Disks[column].status == rf_ds_spared) {
1426 rf_unlock_mutex2(raidPtr->mutex);
1427 return (EINVAL);
1428 }
1429 rf_unlock_mutex2(raidPtr->mutex);
1430
1431 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1432 if (rrcopy == NULL)
1433 return(ENOMEM);
1434
1435 rrcopy->raidPtr = (void *) raidPtr;
1436 rrcopy->col = column;
1437
1438 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1439 rf_ReconstructInPlaceThread,
1440 rrcopy,"raid_reconip");
1441 return(retcode);
1442
1443 case RAIDFRAME_GET_INFO:
1444 if (!raidPtr->valid)
1445 return (ENODEV);
1446 ucfgp = (RF_DeviceConfig_t **) data;
1447 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1448 (RF_DeviceConfig_t *));
1449 if (d_cfg == NULL)
1450 return (ENOMEM);
1451 d_cfg->rows = 1; /* there is only 1 row now */
1452 d_cfg->cols = raidPtr->numCol;
1453 d_cfg->ndevs = raidPtr->numCol;
1454 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1455 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1456 return (ENOMEM);
1457 }
1458 d_cfg->nspares = raidPtr->numSpare;
1459 if (d_cfg->nspares >= RF_MAX_DISKS) {
1460 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1461 return (ENOMEM);
1462 }
1463 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1464 d = 0;
1465 for (j = 0; j < d_cfg->cols; j++) {
1466 d_cfg->devs[d] = raidPtr->Disks[j];
1467 d++;
1468 }
1469 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1470 d_cfg->spares[i] = raidPtr->Disks[j];
1471 }
1472 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1473 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1474
1475 return (retcode);
1476
1477 case RAIDFRAME_CHECK_PARITY:
1478 *(int *) data = raidPtr->parity_good;
1479 return (0);
1480
1481 case RAIDFRAME_PARITYMAP_STATUS:
1482 if (rf_paritymap_ineligible(raidPtr))
1483 return EINVAL;
1484 rf_paritymap_status(raidPtr->parity_map,
1485 (struct rf_pmstat *)data);
1486 return 0;
1487
1488 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1489 if (rf_paritymap_ineligible(raidPtr))
1490 return EINVAL;
1491 if (raidPtr->parity_map == NULL)
1492 return ENOENT; /* ??? */
1493 if (0 != rf_paritymap_set_params(raidPtr->parity_map,
1494 (struct rf_pmparams *)data, 1))
1495 return EINVAL;
1496 return 0;
1497
1498 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1499 if (rf_paritymap_ineligible(raidPtr))
1500 return EINVAL;
1501 *(int *) data = rf_paritymap_get_disable(raidPtr);
1502 return 0;
1503
1504 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1505 if (rf_paritymap_ineligible(raidPtr))
1506 return EINVAL;
1507 rf_paritymap_set_disable(raidPtr, *(int *)data);
1508 /* XXX should errors be passed up? */
1509 return 0;
1510
1511 case RAIDFRAME_RESET_ACCTOTALS:
1512 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1513 return (0);
1514
1515 case RAIDFRAME_GET_ACCTOTALS:
1516 totals = (RF_AccTotals_t *) data;
1517 *totals = raidPtr->acc_totals;
1518 return (0);
1519
1520 case RAIDFRAME_KEEP_ACCTOTALS:
1521 raidPtr->keep_acc_totals = *(int *)data;
1522 return (0);
1523
1524 case RAIDFRAME_GET_SIZE:
1525 *(int *) data = raidPtr->totalSectors;
1526 return (0);
1527
1528 /* fail a disk & optionally start reconstruction */
1529 case RAIDFRAME_FAIL_DISK:
1530
1531 if (raidPtr->Layout.map->faultsTolerated == 0) {
1532 /* Can't do this on a RAID 0!! */
1533 return(EINVAL);
1534 }
1535
1536 rr = (struct rf_recon_req *) data;
1537 rr->row = 0;
1538 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1539 return (EINVAL);
1540
1541
1542 rf_lock_mutex2(raidPtr->mutex);
1543 if (raidPtr->status == rf_rs_reconstructing) {
1544 /* you can't fail a disk while we're reconstructing! */
1545 /* XXX wrong for RAID6 */
1546 rf_unlock_mutex2(raidPtr->mutex);
1547 return (EINVAL);
1548 }
1549 if ((raidPtr->Disks[rr->col].status ==
1550 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1551 /* some other component has failed. Let's not make
1552 things worse. XXX wrong for RAID6 */
1553 rf_unlock_mutex2(raidPtr->mutex);
1554 return (EINVAL);
1555 }
1556 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1557 /* Can't fail a spared disk! */
1558 rf_unlock_mutex2(raidPtr->mutex);
1559 return (EINVAL);
1560 }
1561 rf_unlock_mutex2(raidPtr->mutex);
1562
1563 /* make a copy of the recon request so that we don't rely on
1564 * the user's buffer */
1565 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1566 if (rrcopy == NULL)
1567 return(ENOMEM);
1568 memcpy(rrcopy, rr, sizeof(*rr));
1569 rrcopy->raidPtr = (void *) raidPtr;
1570
1571 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1572 rf_ReconThread,
1573 rrcopy,"raid_recon");
1574 return (0);
1575
1576 /* invoke a copyback operation after recon on whatever disk
1577 * needs it, if any */
1578 case RAIDFRAME_COPYBACK:
1579
1580 if (raidPtr->Layout.map->faultsTolerated == 0) {
1581 /* This makes no sense on a RAID 0!! */
1582 return(EINVAL);
1583 }
1584
1585 if (raidPtr->copyback_in_progress == 1) {
1586 /* Copyback is already in progress! */
1587 return(EINVAL);
1588 }
1589
1590 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1591 rf_CopybackThread,
1592 raidPtr,"raid_copyback");
1593 return (retcode);
1594
1595 /* return the percentage completion of reconstruction */
1596 case RAIDFRAME_CHECK_RECON_STATUS:
1597 if (raidPtr->Layout.map->faultsTolerated == 0) {
1598 /* This makes no sense on a RAID 0, so tell the
1599 user it's done. */
1600 *(int *) data = 100;
1601 return(0);
1602 }
1603 if (raidPtr->status != rf_rs_reconstructing)
1604 *(int *) data = 100;
1605 else {
1606 if (raidPtr->reconControl->numRUsTotal > 0) {
1607 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1608 } else {
1609 *(int *) data = 0;
1610 }
1611 }
1612 return (0);
1613 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1614 progressInfoPtr = (RF_ProgressInfo_t **) data;
1615 if (raidPtr->status != rf_rs_reconstructing) {
1616 progressInfo.remaining = 0;
1617 progressInfo.completed = 100;
1618 progressInfo.total = 100;
1619 } else {
1620 progressInfo.total =
1621 raidPtr->reconControl->numRUsTotal;
1622 progressInfo.completed =
1623 raidPtr->reconControl->numRUsComplete;
1624 progressInfo.remaining = progressInfo.total -
1625 progressInfo.completed;
1626 }
1627 retcode = copyout(&progressInfo, *progressInfoPtr,
1628 sizeof(RF_ProgressInfo_t));
1629 return (retcode);
1630
1631 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1632 if (raidPtr->Layout.map->faultsTolerated == 0) {
1633 /* This makes no sense on a RAID 0, so tell the
1634 user it's done. */
1635 *(int *) data = 100;
1636 return(0);
1637 }
1638 if (raidPtr->parity_rewrite_in_progress == 1) {
1639 *(int *) data = 100 *
1640 raidPtr->parity_rewrite_stripes_done /
1641 raidPtr->Layout.numStripe;
1642 } else {
1643 *(int *) data = 100;
1644 }
1645 return (0);
1646
1647 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1648 progressInfoPtr = (RF_ProgressInfo_t **) data;
1649 if (raidPtr->parity_rewrite_in_progress == 1) {
1650 progressInfo.total = raidPtr->Layout.numStripe;
1651 progressInfo.completed =
1652 raidPtr->parity_rewrite_stripes_done;
1653 progressInfo.remaining = progressInfo.total -
1654 progressInfo.completed;
1655 } else {
1656 progressInfo.remaining = 0;
1657 progressInfo.completed = 100;
1658 progressInfo.total = 100;
1659 }
1660 retcode = copyout(&progressInfo, *progressInfoPtr,
1661 sizeof(RF_ProgressInfo_t));
1662 return (retcode);
1663
1664 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1665 if (raidPtr->Layout.map->faultsTolerated == 0) {
1666 /* This makes no sense on a RAID 0 */
1667 *(int *) data = 100;
1668 return(0);
1669 }
1670 if (raidPtr->copyback_in_progress == 1) {
1671 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1672 raidPtr->Layout.numStripe;
1673 } else {
1674 *(int *) data = 100;
1675 }
1676 return (0);
1677
1678 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1679 progressInfoPtr = (RF_ProgressInfo_t **) data;
1680 if (raidPtr->copyback_in_progress == 1) {
1681 progressInfo.total = raidPtr->Layout.numStripe;
1682 progressInfo.completed =
1683 raidPtr->copyback_stripes_done;
1684 progressInfo.remaining = progressInfo.total -
1685 progressInfo.completed;
1686 } else {
1687 progressInfo.remaining = 0;
1688 progressInfo.completed = 100;
1689 progressInfo.total = 100;
1690 }
1691 retcode = copyout(&progressInfo, *progressInfoPtr,
1692 sizeof(RF_ProgressInfo_t));
1693 return (retcode);
1694
1695 /* the sparetable daemon calls this to wait for the kernel to
1696 * need a spare table. this ioctl does not return until a
1697 * spare table is needed. XXX -- calling mpsleep here in the
1698 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1699 * -- I should either compute the spare table in the kernel,
1700 * or have a different -- XXX XXX -- interface (a different
1701 * character device) for delivering the table -- XXX */
1702 #if 0
1703 case RAIDFRAME_SPARET_WAIT:
1704 rf_lock_mutex2(rf_sparet_wait_mutex);
1705 while (!rf_sparet_wait_queue)
1706 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1707 waitreq = rf_sparet_wait_queue;
1708 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1709 rf_unlock_mutex2(rf_sparet_wait_mutex);
1710
1711 /* structure assignment */
1712 *((RF_SparetWait_t *) data) = *waitreq;
1713
1714 RF_Free(waitreq, sizeof(*waitreq));
1715 return (0);
1716
1717 /* wakes up a process waiting on SPARET_WAIT and puts an error
1718 * code in it that will cause the dameon to exit */
1719 case RAIDFRAME_ABORT_SPARET_WAIT:
1720 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1721 waitreq->fcol = -1;
1722 rf_lock_mutex2(rf_sparet_wait_mutex);
1723 waitreq->next = rf_sparet_wait_queue;
1724 rf_sparet_wait_queue = waitreq;
1725 rf_broadcast_conf2(rf_sparet_wait_cv);
1726 rf_unlock_mutex2(rf_sparet_wait_mutex);
1727 return (0);
1728
1729 /* used by the spare table daemon to deliver a spare table
1730 * into the kernel */
1731 case RAIDFRAME_SEND_SPARET:
1732
1733 /* install the spare table */
1734 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1735
1736 /* respond to the requestor. the return status of the spare
1737 * table installation is passed in the "fcol" field */
1738 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1739 waitreq->fcol = retcode;
1740 rf_lock_mutex2(rf_sparet_wait_mutex);
1741 waitreq->next = rf_sparet_resp_queue;
1742 rf_sparet_resp_queue = waitreq;
1743 rf_broadcast_cond2(rf_sparet_resp_cv);
1744 rf_unlock_mutex2(rf_sparet_wait_mutex);
1745
1746 return (retcode);
1747 #endif
1748
1749 default:
1750 break; /* fall through to the os-specific code below */
1751
1752 }
1753
1754 if (!raidPtr->valid)
1755 return (EINVAL);
1756
1757 /*
1758 * Add support for "regular" device ioctls here.
1759 */
1760
1761 error = disk_ioctl(&rs->sc_dkdev, cmd, data, flag, l);
1762 if (error != EPASSTHROUGH)
1763 return (error);
1764
1765 switch (cmd) {
1766 case DIOCGDINFO:
1767 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1768 break;
1769 #ifdef __HAVE_OLD_DISKLABEL
1770 case ODIOCGDINFO:
1771 newlabel = *(rs->sc_dkdev.dk_label);
1772 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1773 return ENOTTY;
1774 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1775 break;
1776 #endif
1777
1778 case DIOCGPART:
1779 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1780 ((struct partinfo *) data)->part =
1781 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1782 break;
1783
1784 case DIOCWDINFO:
1785 case DIOCSDINFO:
1786 #ifdef __HAVE_OLD_DISKLABEL
1787 case ODIOCWDINFO:
1788 case ODIOCSDINFO:
1789 #endif
1790 {
1791 struct disklabel *lp;
1792 #ifdef __HAVE_OLD_DISKLABEL
1793 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1794 memset(&newlabel, 0, sizeof newlabel);
1795 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1796 lp = &newlabel;
1797 } else
1798 #endif
1799 lp = (struct disklabel *)data;
1800
1801 if ((error = raidlock(rs)) != 0)
1802 return (error);
1803
1804 rs->sc_flags |= RAIDF_LABELLING;
1805
1806 error = setdisklabel(rs->sc_dkdev.dk_label,
1807 lp, 0, rs->sc_dkdev.dk_cpulabel);
1808 if (error == 0) {
1809 if (cmd == DIOCWDINFO
1810 #ifdef __HAVE_OLD_DISKLABEL
1811 || cmd == ODIOCWDINFO
1812 #endif
1813 )
1814 error = writedisklabel(RAIDLABELDEV(dev),
1815 raidstrategy, rs->sc_dkdev.dk_label,
1816 rs->sc_dkdev.dk_cpulabel);
1817 }
1818 rs->sc_flags &= ~RAIDF_LABELLING;
1819
1820 raidunlock(rs);
1821
1822 if (error)
1823 return (error);
1824 break;
1825 }
1826
1827 case DIOCWLABEL:
1828 if (*(int *) data != 0)
1829 rs->sc_flags |= RAIDF_WLABEL;
1830 else
1831 rs->sc_flags &= ~RAIDF_WLABEL;
1832 break;
1833
1834 case DIOCGDEFLABEL:
1835 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1836 break;
1837
1838 #ifdef __HAVE_OLD_DISKLABEL
1839 case ODIOCGDEFLABEL:
1840 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1841 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1842 return ENOTTY;
1843 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1844 break;
1845 #endif
1846
1847 case DIOCAWEDGE:
1848 case DIOCDWEDGE:
1849 dkw = (void *)data;
1850
1851 /* If the ioctl happens here, the parent is us. */
1852 (void)strcpy(dkw->dkw_parent, rs->sc_xname);
1853 return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw);
1854
1855 case DIOCLWEDGES:
1856 return dkwedge_list(&rs->sc_dkdev,
1857 (struct dkwedge_list *)data, l);
1858 case DIOCCACHESYNC:
1859 return rf_sync_component_caches(raidPtr);
1860
1861 case DIOCGSTRATEGY:
1862 {
1863 struct disk_strategy *dks = (void *)data;
1864
1865 s = splbio();
1866 strlcpy(dks->dks_name, bufq_getstrategyname(rs->buf_queue),
1867 sizeof(dks->dks_name));
1868 splx(s);
1869 dks->dks_paramlen = 0;
1870
1871 return 0;
1872 }
1873
1874 case DIOCSSTRATEGY:
1875 {
1876 struct disk_strategy *dks = (void *)data;
1877 struct bufq_state *new;
1878 struct bufq_state *old;
1879
1880 if (dks->dks_param != NULL) {
1881 return EINVAL;
1882 }
1883 dks->dks_name[sizeof(dks->dks_name) - 1] = 0; /* ensure term */
1884 error = bufq_alloc(&new, dks->dks_name,
1885 BUFQ_EXACT|BUFQ_SORT_RAWBLOCK);
1886 if (error) {
1887 return error;
1888 }
1889 s = splbio();
1890 old = rs->buf_queue;
1891 bufq_move(new, old);
1892 rs->buf_queue = new;
1893 splx(s);
1894 bufq_free(old);
1895
1896 return 0;
1897 }
1898
1899 default:
1900 retcode = ENOTTY;
1901 }
1902 return (retcode);
1903
1904 }
1905
1906
1907 /* raidinit -- complete the rest of the initialization for the
1908 RAIDframe device. */
1909
1910
1911 static void
1912 raidinit(RF_Raid_t *raidPtr)
1913 {
1914 cfdata_t cf;
1915 struct raid_softc *rs;
1916 int unit;
1917
1918 unit = raidPtr->raidid;
1919
1920 rs = &raid_softc[unit];
1921
1922 /* XXX should check return code first... */
1923 rs->sc_flags |= RAIDF_INITED;
1924
1925 /* XXX doesn't check bounds. */
1926 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1927
1928 /* attach the pseudo device */
1929 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1930 cf->cf_name = raid_cd.cd_name;
1931 cf->cf_atname = raid_cd.cd_name;
1932 cf->cf_unit = unit;
1933 cf->cf_fstate = FSTATE_STAR;
1934
1935 rs->sc_dev = config_attach_pseudo(cf);
1936
1937 if (rs->sc_dev == NULL) {
1938 printf("raid%d: config_attach_pseudo failed\n",
1939 raidPtr->raidid);
1940 rs->sc_flags &= ~RAIDF_INITED;
1941 free(cf, M_RAIDFRAME);
1942 return;
1943 }
1944
1945 /* disk_attach actually creates space for the CPU disklabel, among
1946 * other things, so it's critical to call this *BEFORE* we try putzing
1947 * with disklabels. */
1948
1949 disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1950 disk_attach(&rs->sc_dkdev);
1951 disk_blocksize(&rs->sc_dkdev, raidPtr->bytesPerSector);
1952
1953 /* XXX There may be a weird interaction here between this, and
1954 * protectedSectors, as used in RAIDframe. */
1955
1956 rs->sc_size = raidPtr->totalSectors;
1957
1958 dkwedge_discover(&rs->sc_dkdev);
1959
1960 rf_set_properties(rs, raidPtr);
1961
1962 }
1963 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1964 /* wake up the daemon & tell it to get us a spare table
1965 * XXX
1966 * the entries in the queues should be tagged with the raidPtr
1967 * so that in the extremely rare case that two recons happen at once,
1968 * we know for which device were requesting a spare table
1969 * XXX
1970 *
1971 * XXX This code is not currently used. GO
1972 */
1973 int
1974 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1975 {
1976 int retcode;
1977
1978 rf_lock_mutex2(rf_sparet_wait_mutex);
1979 req->next = rf_sparet_wait_queue;
1980 rf_sparet_wait_queue = req;
1981 rf_broadcast_cond2(rf_sparet_wait_cv);
1982
1983 /* mpsleep unlocks the mutex */
1984 while (!rf_sparet_resp_queue) {
1985 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
1986 }
1987 req = rf_sparet_resp_queue;
1988 rf_sparet_resp_queue = req->next;
1989 rf_unlock_mutex2(rf_sparet_wait_mutex);
1990
1991 retcode = req->fcol;
1992 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1993 * alloc'd */
1994 return (retcode);
1995 }
1996 #endif
1997
1998 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1999 * bp & passes it down.
2000 * any calls originating in the kernel must use non-blocking I/O
2001 * do some extra sanity checking to return "appropriate" error values for
2002 * certain conditions (to make some standard utilities work)
2003 *
2004 * Formerly known as: rf_DoAccessKernel
2005 */
2006 void
2007 raidstart(RF_Raid_t *raidPtr)
2008 {
2009 RF_SectorCount_t num_blocks, pb, sum;
2010 RF_RaidAddr_t raid_addr;
2011 struct partition *pp;
2012 daddr_t blocknum;
2013 int unit;
2014 struct raid_softc *rs;
2015 int do_async;
2016 struct buf *bp;
2017 int rc;
2018
2019 unit = raidPtr->raidid;
2020 rs = &raid_softc[unit];
2021
2022 /* quick check to see if anything has died recently */
2023 rf_lock_mutex2(raidPtr->mutex);
2024 if (raidPtr->numNewFailures > 0) {
2025 rf_unlock_mutex2(raidPtr->mutex);
2026 rf_update_component_labels(raidPtr,
2027 RF_NORMAL_COMPONENT_UPDATE);
2028 rf_lock_mutex2(raidPtr->mutex);
2029 raidPtr->numNewFailures--;
2030 }
2031
2032 /* Check to see if we're at the limit... */
2033 while (raidPtr->openings > 0) {
2034 rf_unlock_mutex2(raidPtr->mutex);
2035
2036 /* get the next item, if any, from the queue */
2037 if ((bp = bufq_get(rs->buf_queue)) == NULL) {
2038 /* nothing more to do */
2039 return;
2040 }
2041
2042 /* Ok, for the bp we have here, bp->b_blkno is relative to the
2043 * partition.. Need to make it absolute to the underlying
2044 * device.. */
2045
2046 blocknum = bp->b_blkno << DEV_BSHIFT >> raidPtr->logBytesPerSector;
2047 if (DISKPART(bp->b_dev) != RAW_PART) {
2048 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
2049 blocknum += pp->p_offset;
2050 }
2051
2052 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
2053 (int) blocknum));
2054
2055 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
2056 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
2057
2058 /* *THIS* is where we adjust what block we're going to...
2059 * but DO NOT TOUCH bp->b_blkno!!! */
2060 raid_addr = blocknum;
2061
2062 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
2063 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
2064 sum = raid_addr + num_blocks + pb;
2065 if (1 || rf_debugKernelAccess) {
2066 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
2067 (int) raid_addr, (int) sum, (int) num_blocks,
2068 (int) pb, (int) bp->b_resid));
2069 }
2070 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
2071 || (sum < num_blocks) || (sum < pb)) {
2072 bp->b_error = ENOSPC;
2073 bp->b_resid = bp->b_bcount;
2074 biodone(bp);
2075 rf_lock_mutex2(raidPtr->mutex);
2076 continue;
2077 }
2078 /*
2079 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
2080 */
2081
2082 if (bp->b_bcount & raidPtr->sectorMask) {
2083 bp->b_error = EINVAL;
2084 bp->b_resid = bp->b_bcount;
2085 biodone(bp);
2086 rf_lock_mutex2(raidPtr->mutex);
2087 continue;
2088
2089 }
2090 db1_printf(("Calling DoAccess..\n"));
2091
2092
2093 rf_lock_mutex2(raidPtr->mutex);
2094 raidPtr->openings--;
2095 rf_unlock_mutex2(raidPtr->mutex);
2096
2097 /*
2098 * Everything is async.
2099 */
2100 do_async = 1;
2101
2102 disk_busy(&rs->sc_dkdev);
2103
2104 /* XXX we're still at splbio() here... do we *really*
2105 need to be? */
2106
2107 /* don't ever condition on bp->b_flags & B_WRITE.
2108 * always condition on B_READ instead */
2109
2110 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
2111 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
2112 do_async, raid_addr, num_blocks,
2113 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
2114
2115 if (rc) {
2116 bp->b_error = rc;
2117 bp->b_resid = bp->b_bcount;
2118 biodone(bp);
2119 /* continue loop */
2120 }
2121
2122 rf_lock_mutex2(raidPtr->mutex);
2123 }
2124 rf_unlock_mutex2(raidPtr->mutex);
2125 }
2126
2127
2128
2129
2130 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
2131
2132 int
2133 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
2134 {
2135 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2136 struct buf *bp;
2137
2138 req->queue = queue;
2139 bp = req->bp;
2140
2141 switch (req->type) {
2142 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2143 /* XXX need to do something extra here.. */
2144 /* I'm leaving this in, as I've never actually seen it used,
2145 * and I'd like folks to report it... GO */
2146 printf(("WAKEUP CALLED\n"));
2147 queue->numOutstanding++;
2148
2149 bp->b_flags = 0;
2150 bp->b_private = req;
2151
2152 KernelWakeupFunc(bp);
2153 break;
2154
2155 case RF_IO_TYPE_READ:
2156 case RF_IO_TYPE_WRITE:
2157 #if RF_ACC_TRACE > 0
2158 if (req->tracerec) {
2159 RF_ETIMER_START(req->tracerec->timer);
2160 }
2161 #endif
2162 InitBP(bp, queue->rf_cinfo->ci_vp,
2163 op, queue->rf_cinfo->ci_dev,
2164 req->sectorOffset, req->numSector,
2165 req->buf, KernelWakeupFunc, (void *) req,
2166 queue->raidPtr->logBytesPerSector, req->b_proc);
2167
2168 if (rf_debugKernelAccess) {
2169 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2170 (long) bp->b_blkno));
2171 }
2172 queue->numOutstanding++;
2173 queue->last_deq_sector = req->sectorOffset;
2174 /* acc wouldn't have been let in if there were any pending
2175 * reqs at any other priority */
2176 queue->curPriority = req->priority;
2177
2178 db1_printf(("Going for %c to unit %d col %d\n",
2179 req->type, queue->raidPtr->raidid,
2180 queue->col));
2181 db1_printf(("sector %d count %d (%d bytes) %d\n",
2182 (int) req->sectorOffset, (int) req->numSector,
2183 (int) (req->numSector <<
2184 queue->raidPtr->logBytesPerSector),
2185 (int) queue->raidPtr->logBytesPerSector));
2186
2187 /*
2188 * XXX: drop lock here since this can block at
2189 * least with backing SCSI devices. Retake it
2190 * to minimize fuss with calling interfaces.
2191 */
2192
2193 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2194 bdev_strategy(bp);
2195 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2196 break;
2197
2198 default:
2199 panic("bad req->type in rf_DispatchKernelIO");
2200 }
2201 db1_printf(("Exiting from DispatchKernelIO\n"));
2202
2203 return (0);
2204 }
2205 /* this is the callback function associated with a I/O invoked from
2206 kernel code.
2207 */
2208 static void
2209 KernelWakeupFunc(struct buf *bp)
2210 {
2211 RF_DiskQueueData_t *req = NULL;
2212 RF_DiskQueue_t *queue;
2213
2214 db1_printf(("recovering the request queue:\n"));
2215
2216 req = bp->b_private;
2217
2218 queue = (RF_DiskQueue_t *) req->queue;
2219
2220 rf_lock_mutex2(queue->raidPtr->iodone_lock);
2221
2222 #if RF_ACC_TRACE > 0
2223 if (req->tracerec) {
2224 RF_ETIMER_STOP(req->tracerec->timer);
2225 RF_ETIMER_EVAL(req->tracerec->timer);
2226 rf_lock_mutex2(rf_tracing_mutex);
2227 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2228 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2229 req->tracerec->num_phys_ios++;
2230 rf_unlock_mutex2(rf_tracing_mutex);
2231 }
2232 #endif
2233
2234 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2235 * ballistic, and mark the component as hosed... */
2236
2237 if (bp->b_error != 0) {
2238 /* Mark the disk as dead */
2239 /* but only mark it once... */
2240 /* and only if it wouldn't leave this RAID set
2241 completely broken */
2242 if (((queue->raidPtr->Disks[queue->col].status ==
2243 rf_ds_optimal) ||
2244 (queue->raidPtr->Disks[queue->col].status ==
2245 rf_ds_used_spare)) &&
2246 (queue->raidPtr->numFailures <
2247 queue->raidPtr->Layout.map->faultsTolerated)) {
2248 printf("raid%d: IO Error. Marking %s as failed.\n",
2249 queue->raidPtr->raidid,
2250 queue->raidPtr->Disks[queue->col].devname);
2251 queue->raidPtr->Disks[queue->col].status =
2252 rf_ds_failed;
2253 queue->raidPtr->status = rf_rs_degraded;
2254 queue->raidPtr->numFailures++;
2255 queue->raidPtr->numNewFailures++;
2256 } else { /* Disk is already dead... */
2257 /* printf("Disk already marked as dead!\n"); */
2258 }
2259
2260 }
2261
2262 /* Fill in the error value */
2263 req->error = bp->b_error;
2264
2265 /* Drop this one on the "finished" queue... */
2266 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2267
2268 /* Let the raidio thread know there is work to be done. */
2269 rf_signal_cond2(queue->raidPtr->iodone_cv);
2270
2271 rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2272 }
2273
2274
2275 /*
2276 * initialize a buf structure for doing an I/O in the kernel.
2277 */
2278 static void
2279 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2280 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2281 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2282 struct proc *b_proc)
2283 {
2284 /* bp->b_flags = B_PHYS | rw_flag; */
2285 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */
2286 bp->b_oflags = 0;
2287 bp->b_cflags = 0;
2288 bp->b_bcount = numSect << logBytesPerSector;
2289 bp->b_bufsize = bp->b_bcount;
2290 bp->b_error = 0;
2291 bp->b_dev = dev;
2292 bp->b_data = bf;
2293 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2294 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2295 if (bp->b_bcount == 0) {
2296 panic("bp->b_bcount is zero in InitBP!!");
2297 }
2298 bp->b_proc = b_proc;
2299 bp->b_iodone = cbFunc;
2300 bp->b_private = cbArg;
2301 }
2302
2303 static void
2304 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2305 struct disklabel *lp)
2306 {
2307 memset(lp, 0, sizeof(*lp));
2308
2309 /* fabricate a label... */
2310 lp->d_secperunit = raidPtr->totalSectors;
2311 lp->d_secsize = raidPtr->bytesPerSector;
2312 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2313 lp->d_ntracks = 4 * raidPtr->numCol;
2314 lp->d_ncylinders = raidPtr->totalSectors /
2315 (lp->d_nsectors * lp->d_ntracks);
2316 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2317
2318 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2319 lp->d_type = DTYPE_RAID;
2320 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2321 lp->d_rpm = 3600;
2322 lp->d_interleave = 1;
2323 lp->d_flags = 0;
2324
2325 lp->d_partitions[RAW_PART].p_offset = 0;
2326 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2327 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2328 lp->d_npartitions = RAW_PART + 1;
2329
2330 lp->d_magic = DISKMAGIC;
2331 lp->d_magic2 = DISKMAGIC;
2332 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2333
2334 }
2335 /*
2336 * Read the disklabel from the raid device. If one is not present, fake one
2337 * up.
2338 */
2339 static void
2340 raidgetdisklabel(dev_t dev)
2341 {
2342 int unit = raidunit(dev);
2343 struct raid_softc *rs = &raid_softc[unit];
2344 const char *errstring;
2345 struct disklabel *lp = rs->sc_dkdev.dk_label;
2346 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2347 RF_Raid_t *raidPtr;
2348
2349 db1_printf(("Getting the disklabel...\n"));
2350
2351 memset(clp, 0, sizeof(*clp));
2352
2353 raidPtr = raidPtrs[unit];
2354
2355 raidgetdefaultlabel(raidPtr, rs, lp);
2356
2357 /*
2358 * Call the generic disklabel extraction routine.
2359 */
2360 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2361 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2362 if (errstring)
2363 raidmakedisklabel(rs);
2364 else {
2365 int i;
2366 struct partition *pp;
2367
2368 /*
2369 * Sanity check whether the found disklabel is valid.
2370 *
2371 * This is necessary since total size of the raid device
2372 * may vary when an interleave is changed even though exactly
2373 * same components are used, and old disklabel may used
2374 * if that is found.
2375 */
2376 if (lp->d_secperunit != rs->sc_size)
2377 printf("raid%d: WARNING: %s: "
2378 "total sector size in disklabel (%" PRIu32 ") != "
2379 "the size of raid (%" PRIu64 ")\n", unit, rs->sc_xname,
2380 lp->d_secperunit, rs->sc_size);
2381 for (i = 0; i < lp->d_npartitions; i++) {
2382 pp = &lp->d_partitions[i];
2383 if (pp->p_offset + pp->p_size > rs->sc_size)
2384 printf("raid%d: WARNING: %s: end of partition `%c' "
2385 "exceeds the size of raid (%" PRIu64 ")\n",
2386 unit, rs->sc_xname, 'a' + i, rs->sc_size);
2387 }
2388 }
2389
2390 }
2391 /*
2392 * Take care of things one might want to take care of in the event
2393 * that a disklabel isn't present.
2394 */
2395 static void
2396 raidmakedisklabel(struct raid_softc *rs)
2397 {
2398 struct disklabel *lp = rs->sc_dkdev.dk_label;
2399 db1_printf(("Making a label..\n"));
2400
2401 /*
2402 * For historical reasons, if there's no disklabel present
2403 * the raw partition must be marked FS_BSDFFS.
2404 */
2405
2406 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2407
2408 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2409
2410 lp->d_checksum = dkcksum(lp);
2411 }
2412 /*
2413 * Wait interruptibly for an exclusive lock.
2414 *
2415 * XXX
2416 * Several drivers do this; it should be abstracted and made MP-safe.
2417 * (Hmm... where have we seen this warning before :-> GO )
2418 */
2419 static int
2420 raidlock(struct raid_softc *rs)
2421 {
2422 int error;
2423
2424 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2425 rs->sc_flags |= RAIDF_WANTED;
2426 if ((error =
2427 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2428 return (error);
2429 }
2430 rs->sc_flags |= RAIDF_LOCKED;
2431 return (0);
2432 }
2433 /*
2434 * Unlock and wake up any waiters.
2435 */
2436 static void
2437 raidunlock(struct raid_softc *rs)
2438 {
2439
2440 rs->sc_flags &= ~RAIDF_LOCKED;
2441 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2442 rs->sc_flags &= ~RAIDF_WANTED;
2443 wakeup(rs);
2444 }
2445 }
2446
2447
2448 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2449 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2450 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2451
2452 static daddr_t
2453 rf_component_info_offset(void)
2454 {
2455
2456 return RF_COMPONENT_INFO_OFFSET;
2457 }
2458
2459 static daddr_t
2460 rf_component_info_size(unsigned secsize)
2461 {
2462 daddr_t info_size;
2463
2464 KASSERT(secsize);
2465 if (secsize > RF_COMPONENT_INFO_SIZE)
2466 info_size = secsize;
2467 else
2468 info_size = RF_COMPONENT_INFO_SIZE;
2469
2470 return info_size;
2471 }
2472
2473 static daddr_t
2474 rf_parity_map_offset(RF_Raid_t *raidPtr)
2475 {
2476 daddr_t map_offset;
2477
2478 KASSERT(raidPtr->bytesPerSector);
2479 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2480 map_offset = raidPtr->bytesPerSector;
2481 else
2482 map_offset = RF_COMPONENT_INFO_SIZE;
2483 map_offset += rf_component_info_offset();
2484
2485 return map_offset;
2486 }
2487
2488 static daddr_t
2489 rf_parity_map_size(RF_Raid_t *raidPtr)
2490 {
2491 daddr_t map_size;
2492
2493 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2494 map_size = raidPtr->bytesPerSector;
2495 else
2496 map_size = RF_PARITY_MAP_SIZE;
2497
2498 return map_size;
2499 }
2500
2501 int
2502 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2503 {
2504 RF_ComponentLabel_t *clabel;
2505
2506 clabel = raidget_component_label(raidPtr, col);
2507 clabel->clean = RF_RAID_CLEAN;
2508 raidflush_component_label(raidPtr, col);
2509 return(0);
2510 }
2511
2512
2513 int
2514 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2515 {
2516 RF_ComponentLabel_t *clabel;
2517
2518 clabel = raidget_component_label(raidPtr, col);
2519 clabel->clean = RF_RAID_DIRTY;
2520 raidflush_component_label(raidPtr, col);
2521 return(0);
2522 }
2523
2524 int
2525 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2526 {
2527 KASSERT(raidPtr->bytesPerSector);
2528 return raidread_component_label(raidPtr->bytesPerSector,
2529 raidPtr->Disks[col].dev,
2530 raidPtr->raid_cinfo[col].ci_vp,
2531 &raidPtr->raid_cinfo[col].ci_label);
2532 }
2533
2534 RF_ComponentLabel_t *
2535 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2536 {
2537 return &raidPtr->raid_cinfo[col].ci_label;
2538 }
2539
2540 int
2541 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2542 {
2543 RF_ComponentLabel_t *label;
2544
2545 label = &raidPtr->raid_cinfo[col].ci_label;
2546 label->mod_counter = raidPtr->mod_counter;
2547 #ifndef RF_NO_PARITY_MAP
2548 label->parity_map_modcount = label->mod_counter;
2549 #endif
2550 return raidwrite_component_label(raidPtr->bytesPerSector,
2551 raidPtr->Disks[col].dev,
2552 raidPtr->raid_cinfo[col].ci_vp, label);
2553 }
2554
2555
2556 static int
2557 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2558 RF_ComponentLabel_t *clabel)
2559 {
2560 return raidread_component_area(dev, b_vp, clabel,
2561 sizeof(RF_ComponentLabel_t),
2562 rf_component_info_offset(),
2563 rf_component_info_size(secsize));
2564 }
2565
2566 /* ARGSUSED */
2567 static int
2568 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2569 size_t msize, daddr_t offset, daddr_t dsize)
2570 {
2571 struct buf *bp;
2572 const struct bdevsw *bdev;
2573 int error;
2574
2575 /* XXX should probably ensure that we don't try to do this if
2576 someone has changed rf_protected_sectors. */
2577
2578 if (b_vp == NULL) {
2579 /* For whatever reason, this component is not valid.
2580 Don't try to read a component label from it. */
2581 return(EINVAL);
2582 }
2583
2584 /* get a block of the appropriate size... */
2585 bp = geteblk((int)dsize);
2586 bp->b_dev = dev;
2587
2588 /* get our ducks in a row for the read */
2589 bp->b_blkno = offset / DEV_BSIZE;
2590 bp->b_bcount = dsize;
2591 bp->b_flags |= B_READ;
2592 bp->b_resid = dsize;
2593
2594 bdev = bdevsw_lookup(bp->b_dev);
2595 if (bdev == NULL)
2596 return (ENXIO);
2597 (*bdev->d_strategy)(bp);
2598
2599 error = biowait(bp);
2600
2601 if (!error) {
2602 memcpy(data, bp->b_data, msize);
2603 }
2604
2605 brelse(bp, 0);
2606 return(error);
2607 }
2608
2609
2610 static int
2611 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2612 RF_ComponentLabel_t *clabel)
2613 {
2614 return raidwrite_component_area(dev, b_vp, clabel,
2615 sizeof(RF_ComponentLabel_t),
2616 rf_component_info_offset(),
2617 rf_component_info_size(secsize), 0);
2618 }
2619
2620 /* ARGSUSED */
2621 static int
2622 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2623 size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2624 {
2625 struct buf *bp;
2626 const struct bdevsw *bdev;
2627 int error;
2628
2629 /* get a block of the appropriate size... */
2630 bp = geteblk((int)dsize);
2631 bp->b_dev = dev;
2632
2633 /* get our ducks in a row for the write */
2634 bp->b_blkno = offset / DEV_BSIZE;
2635 bp->b_bcount = dsize;
2636 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2637 bp->b_resid = dsize;
2638
2639 memset(bp->b_data, 0, dsize);
2640 memcpy(bp->b_data, data, msize);
2641
2642 bdev = bdevsw_lookup(bp->b_dev);
2643 if (bdev == NULL)
2644 return (ENXIO);
2645 (*bdev->d_strategy)(bp);
2646 if (asyncp)
2647 return 0;
2648 error = biowait(bp);
2649 brelse(bp, 0);
2650 if (error) {
2651 #if 1
2652 printf("Failed to write RAID component info!\n");
2653 #endif
2654 }
2655
2656 return(error);
2657 }
2658
2659 void
2660 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2661 {
2662 int c;
2663
2664 for (c = 0; c < raidPtr->numCol; c++) {
2665 /* Skip dead disks. */
2666 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2667 continue;
2668 /* XXXjld: what if an error occurs here? */
2669 raidwrite_component_area(raidPtr->Disks[c].dev,
2670 raidPtr->raid_cinfo[c].ci_vp, map,
2671 RF_PARITYMAP_NBYTE,
2672 rf_parity_map_offset(raidPtr),
2673 rf_parity_map_size(raidPtr), 0);
2674 }
2675 }
2676
2677 void
2678 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2679 {
2680 struct rf_paritymap_ondisk tmp;
2681 int c,first;
2682
2683 first=1;
2684 for (c = 0; c < raidPtr->numCol; c++) {
2685 /* Skip dead disks. */
2686 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2687 continue;
2688 raidread_component_area(raidPtr->Disks[c].dev,
2689 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2690 RF_PARITYMAP_NBYTE,
2691 rf_parity_map_offset(raidPtr),
2692 rf_parity_map_size(raidPtr));
2693 if (first) {
2694 memcpy(map, &tmp, sizeof(*map));
2695 first = 0;
2696 } else {
2697 rf_paritymap_merge(map, &tmp);
2698 }
2699 }
2700 }
2701
2702 void
2703 rf_markalldirty(RF_Raid_t *raidPtr)
2704 {
2705 RF_ComponentLabel_t *clabel;
2706 int sparecol;
2707 int c;
2708 int j;
2709 int scol = -1;
2710
2711 raidPtr->mod_counter++;
2712 for (c = 0; c < raidPtr->numCol; c++) {
2713 /* we don't want to touch (at all) a disk that has
2714 failed */
2715 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2716 clabel = raidget_component_label(raidPtr, c);
2717 if (clabel->status == rf_ds_spared) {
2718 /* XXX do something special...
2719 but whatever you do, don't
2720 try to access it!! */
2721 } else {
2722 raidmarkdirty(raidPtr, c);
2723 }
2724 }
2725 }
2726
2727 for( c = 0; c < raidPtr->numSpare ; c++) {
2728 sparecol = raidPtr->numCol + c;
2729 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2730 /*
2731
2732 we claim this disk is "optimal" if it's
2733 rf_ds_used_spare, as that means it should be
2734 directly substitutable for the disk it replaced.
2735 We note that too...
2736
2737 */
2738
2739 for(j=0;j<raidPtr->numCol;j++) {
2740 if (raidPtr->Disks[j].spareCol == sparecol) {
2741 scol = j;
2742 break;
2743 }
2744 }
2745
2746 clabel = raidget_component_label(raidPtr, sparecol);
2747 /* make sure status is noted */
2748
2749 raid_init_component_label(raidPtr, clabel);
2750
2751 clabel->row = 0;
2752 clabel->column = scol;
2753 /* Note: we *don't* change status from rf_ds_used_spare
2754 to rf_ds_optimal */
2755 /* clabel.status = rf_ds_optimal; */
2756
2757 raidmarkdirty(raidPtr, sparecol);
2758 }
2759 }
2760 }
2761
2762
2763 void
2764 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2765 {
2766 RF_ComponentLabel_t *clabel;
2767 int sparecol;
2768 int c;
2769 int j;
2770 int scol;
2771
2772 scol = -1;
2773
2774 /* XXX should do extra checks to make sure things really are clean,
2775 rather than blindly setting the clean bit... */
2776
2777 raidPtr->mod_counter++;
2778
2779 for (c = 0; c < raidPtr->numCol; c++) {
2780 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2781 clabel = raidget_component_label(raidPtr, c);
2782 /* make sure status is noted */
2783 clabel->status = rf_ds_optimal;
2784
2785 /* note what unit we are configured as */
2786 clabel->last_unit = raidPtr->raidid;
2787
2788 raidflush_component_label(raidPtr, c);
2789 if (final == RF_FINAL_COMPONENT_UPDATE) {
2790 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2791 raidmarkclean(raidPtr, c);
2792 }
2793 }
2794 }
2795 /* else we don't touch it.. */
2796 }
2797
2798 for( c = 0; c < raidPtr->numSpare ; c++) {
2799 sparecol = raidPtr->numCol + c;
2800 /* Need to ensure that the reconstruct actually completed! */
2801 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2802 /*
2803
2804 we claim this disk is "optimal" if it's
2805 rf_ds_used_spare, as that means it should be
2806 directly substitutable for the disk it replaced.
2807 We note that too...
2808
2809 */
2810
2811 for(j=0;j<raidPtr->numCol;j++) {
2812 if (raidPtr->Disks[j].spareCol == sparecol) {
2813 scol = j;
2814 break;
2815 }
2816 }
2817
2818 /* XXX shouldn't *really* need this... */
2819 clabel = raidget_component_label(raidPtr, sparecol);
2820 /* make sure status is noted */
2821
2822 raid_init_component_label(raidPtr, clabel);
2823
2824 clabel->column = scol;
2825 clabel->status = rf_ds_optimal;
2826 clabel->last_unit = raidPtr->raidid;
2827
2828 raidflush_component_label(raidPtr, sparecol);
2829 if (final == RF_FINAL_COMPONENT_UPDATE) {
2830 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2831 raidmarkclean(raidPtr, sparecol);
2832 }
2833 }
2834 }
2835 }
2836 }
2837
2838 void
2839 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2840 {
2841
2842 if (vp != NULL) {
2843 if (auto_configured == 1) {
2844 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2845 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2846 vput(vp);
2847
2848 } else {
2849 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2850 }
2851 }
2852 }
2853
2854
2855 void
2856 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2857 {
2858 int r,c;
2859 struct vnode *vp;
2860 int acd;
2861
2862
2863 /* We take this opportunity to close the vnodes like we should.. */
2864
2865 for (c = 0; c < raidPtr->numCol; c++) {
2866 vp = raidPtr->raid_cinfo[c].ci_vp;
2867 acd = raidPtr->Disks[c].auto_configured;
2868 rf_close_component(raidPtr, vp, acd);
2869 raidPtr->raid_cinfo[c].ci_vp = NULL;
2870 raidPtr->Disks[c].auto_configured = 0;
2871 }
2872
2873 for (r = 0; r < raidPtr->numSpare; r++) {
2874 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2875 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2876 rf_close_component(raidPtr, vp, acd);
2877 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2878 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2879 }
2880 }
2881
2882
2883 void
2884 rf_ReconThread(struct rf_recon_req *req)
2885 {
2886 int s;
2887 RF_Raid_t *raidPtr;
2888
2889 s = splbio();
2890 raidPtr = (RF_Raid_t *) req->raidPtr;
2891 raidPtr->recon_in_progress = 1;
2892
2893 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2894 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2895
2896 RF_Free(req, sizeof(*req));
2897
2898 raidPtr->recon_in_progress = 0;
2899 splx(s);
2900
2901 /* That's all... */
2902 kthread_exit(0); /* does not return */
2903 }
2904
2905 void
2906 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2907 {
2908 int retcode;
2909 int s;
2910
2911 raidPtr->parity_rewrite_stripes_done = 0;
2912 raidPtr->parity_rewrite_in_progress = 1;
2913 s = splbio();
2914 retcode = rf_RewriteParity(raidPtr);
2915 splx(s);
2916 if (retcode) {
2917 printf("raid%d: Error re-writing parity (%d)!\n",
2918 raidPtr->raidid, retcode);
2919 } else {
2920 /* set the clean bit! If we shutdown correctly,
2921 the clean bit on each component label will get
2922 set */
2923 raidPtr->parity_good = RF_RAID_CLEAN;
2924 }
2925 raidPtr->parity_rewrite_in_progress = 0;
2926
2927 /* Anyone waiting for us to stop? If so, inform them... */
2928 if (raidPtr->waitShutdown) {
2929 wakeup(&raidPtr->parity_rewrite_in_progress);
2930 }
2931
2932 /* That's all... */
2933 kthread_exit(0); /* does not return */
2934 }
2935
2936
2937 void
2938 rf_CopybackThread(RF_Raid_t *raidPtr)
2939 {
2940 int s;
2941
2942 raidPtr->copyback_in_progress = 1;
2943 s = splbio();
2944 rf_CopybackReconstructedData(raidPtr);
2945 splx(s);
2946 raidPtr->copyback_in_progress = 0;
2947
2948 /* That's all... */
2949 kthread_exit(0); /* does not return */
2950 }
2951
2952
2953 void
2954 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
2955 {
2956 int s;
2957 RF_Raid_t *raidPtr;
2958
2959 s = splbio();
2960 raidPtr = req->raidPtr;
2961 raidPtr->recon_in_progress = 1;
2962 rf_ReconstructInPlace(raidPtr, req->col);
2963 RF_Free(req, sizeof(*req));
2964 raidPtr->recon_in_progress = 0;
2965 splx(s);
2966
2967 /* That's all... */
2968 kthread_exit(0); /* does not return */
2969 }
2970
2971 static RF_AutoConfig_t *
2972 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
2973 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
2974 unsigned secsize)
2975 {
2976 int good_one = 0;
2977 RF_ComponentLabel_t *clabel;
2978 RF_AutoConfig_t *ac;
2979
2980 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
2981 if (clabel == NULL) {
2982 oomem:
2983 while(ac_list) {
2984 ac = ac_list;
2985 if (ac->clabel)
2986 free(ac->clabel, M_RAIDFRAME);
2987 ac_list = ac_list->next;
2988 free(ac, M_RAIDFRAME);
2989 }
2990 printf("RAID auto config: out of memory!\n");
2991 return NULL; /* XXX probably should panic? */
2992 }
2993
2994 if (!raidread_component_label(secsize, dev, vp, clabel)) {
2995 /* Got the label. Does it look reasonable? */
2996 if (rf_reasonable_label(clabel, numsecs) &&
2997 (rf_component_label_partitionsize(clabel) <= size)) {
2998 #ifdef DEBUG
2999 printf("Component on: %s: %llu\n",
3000 cname, (unsigned long long)size);
3001 rf_print_component_label(clabel);
3002 #endif
3003 /* if it's reasonable, add it, else ignore it. */
3004 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
3005 M_NOWAIT);
3006 if (ac == NULL) {
3007 free(clabel, M_RAIDFRAME);
3008 goto oomem;
3009 }
3010 strlcpy(ac->devname, cname, sizeof(ac->devname));
3011 ac->dev = dev;
3012 ac->vp = vp;
3013 ac->clabel = clabel;
3014 ac->next = ac_list;
3015 ac_list = ac;
3016 good_one = 1;
3017 }
3018 }
3019 if (!good_one) {
3020 /* cleanup */
3021 free(clabel, M_RAIDFRAME);
3022 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3023 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3024 vput(vp);
3025 }
3026 return ac_list;
3027 }
3028
3029 RF_AutoConfig_t *
3030 rf_find_raid_components(void)
3031 {
3032 struct vnode *vp;
3033 struct disklabel label;
3034 device_t dv;
3035 deviter_t di;
3036 dev_t dev;
3037 int bmajor, bminor, wedge, rf_part_found;
3038 int error;
3039 int i;
3040 RF_AutoConfig_t *ac_list;
3041 uint64_t numsecs;
3042 unsigned secsize;
3043
3044 /* initialize the AutoConfig list */
3045 ac_list = NULL;
3046
3047 /* we begin by trolling through *all* the devices on the system */
3048
3049 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
3050 dv = deviter_next(&di)) {
3051
3052 /* we are only interested in disks... */
3053 if (device_class(dv) != DV_DISK)
3054 continue;
3055
3056 /* we don't care about floppies... */
3057 if (device_is_a(dv, "fd")) {
3058 continue;
3059 }
3060
3061 /* we don't care about CD's... */
3062 if (device_is_a(dv, "cd")) {
3063 continue;
3064 }
3065
3066 /* we don't care about md's... */
3067 if (device_is_a(dv, "md")) {
3068 continue;
3069 }
3070
3071 /* hdfd is the Atari/Hades floppy driver */
3072 if (device_is_a(dv, "hdfd")) {
3073 continue;
3074 }
3075
3076 /* fdisa is the Atari/Milan floppy driver */
3077 if (device_is_a(dv, "fdisa")) {
3078 continue;
3079 }
3080
3081 /* need to find the device_name_to_block_device_major stuff */
3082 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
3083
3084 rf_part_found = 0; /*No raid partition as yet*/
3085
3086 /* get a vnode for the raw partition of this disk */
3087
3088 wedge = device_is_a(dv, "dk");
3089 bminor = minor(device_unit(dv));
3090 dev = wedge ? makedev(bmajor, bminor) :
3091 MAKEDISKDEV(bmajor, bminor, RAW_PART);
3092 if (bdevvp(dev, &vp))
3093 panic("RAID can't alloc vnode");
3094
3095 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
3096
3097 if (error) {
3098 /* "Who cares." Continue looking
3099 for something that exists*/
3100 vput(vp);
3101 continue;
3102 }
3103
3104 error = getdisksize(vp, &numsecs, &secsize);
3105 if (error) {
3106 vput(vp);
3107 continue;
3108 }
3109 if (wedge) {
3110 struct dkwedge_info dkw;
3111 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
3112 NOCRED);
3113 if (error) {
3114 printf("RAIDframe: can't get wedge info for "
3115 "dev %s (%d)\n", device_xname(dv), error);
3116 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3117 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3118 vput(vp);
3119 continue;
3120 }
3121
3122 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
3123 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3124 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3125 vput(vp);
3126 continue;
3127 }
3128
3129 ac_list = rf_get_component(ac_list, dev, vp,
3130 device_xname(dv), dkw.dkw_size, numsecs, secsize);
3131 rf_part_found = 1; /*There is a raid component on this disk*/
3132 continue;
3133 }
3134
3135 /* Ok, the disk exists. Go get the disklabel. */
3136 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
3137 if (error) {
3138 /*
3139 * XXX can't happen - open() would
3140 * have errored out (or faked up one)
3141 */
3142 if (error != ENOTTY)
3143 printf("RAIDframe: can't get label for dev "
3144 "%s (%d)\n", device_xname(dv), error);
3145 }
3146
3147 /* don't need this any more. We'll allocate it again
3148 a little later if we really do... */
3149 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3150 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3151 vput(vp);
3152
3153 if (error)
3154 continue;
3155
3156 rf_part_found = 0; /*No raid partitions yet*/
3157 for (i = 0; i < label.d_npartitions; i++) {
3158 char cname[sizeof(ac_list->devname)];
3159
3160 /* We only support partitions marked as RAID */
3161 if (label.d_partitions[i].p_fstype != FS_RAID)
3162 continue;
3163
3164 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
3165 if (bdevvp(dev, &vp))
3166 panic("RAID can't alloc vnode");
3167
3168 error = VOP_OPEN(vp, FREAD, NOCRED);
3169 if (error) {
3170 /* Whatever... */
3171 vput(vp);
3172 continue;
3173 }
3174 snprintf(cname, sizeof(cname), "%s%c",
3175 device_xname(dv), 'a' + i);
3176 ac_list = rf_get_component(ac_list, dev, vp, cname,
3177 label.d_partitions[i].p_size, numsecs, secsize);
3178 rf_part_found = 1; /*There is at least one raid partition on this disk*/
3179 }
3180
3181 /*
3182 *If there is no raid component on this disk, either in a
3183 *disklabel or inside a wedge, check the raw partition as well,
3184 *as it is possible to configure raid components on raw disk
3185 *devices.
3186 */
3187
3188 if (!rf_part_found) {
3189 char cname[sizeof(ac_list->devname)];
3190
3191 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
3192 if (bdevvp(dev, &vp))
3193 panic("RAID can't alloc vnode");
3194
3195 error = VOP_OPEN(vp, FREAD, NOCRED);
3196 if (error) {
3197 /* Whatever... */
3198 vput(vp);
3199 continue;
3200 }
3201 snprintf(cname, sizeof(cname), "%s%c",
3202 device_xname(dv), 'a' + RAW_PART);
3203 ac_list = rf_get_component(ac_list, dev, vp, cname,
3204 label.d_partitions[RAW_PART].p_size, numsecs, secsize);
3205 }
3206 }
3207 deviter_release(&di);
3208 return ac_list;
3209 }
3210
3211
3212 int
3213 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3214 {
3215
3216 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
3217 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
3218 ((clabel->clean == RF_RAID_CLEAN) ||
3219 (clabel->clean == RF_RAID_DIRTY)) &&
3220 clabel->row >=0 &&
3221 clabel->column >= 0 &&
3222 clabel->num_rows > 0 &&
3223 clabel->num_columns > 0 &&
3224 clabel->row < clabel->num_rows &&
3225 clabel->column < clabel->num_columns &&
3226 clabel->blockSize > 0 &&
3227 /*
3228 * numBlocksHi may contain garbage, but it is ok since
3229 * the type is unsigned. If it is really garbage,
3230 * rf_fix_old_label_size() will fix it.
3231 */
3232 rf_component_label_numblocks(clabel) > 0) {
3233 /*
3234 * label looks reasonable enough...
3235 * let's make sure it has no old garbage.
3236 */
3237 if (numsecs)
3238 rf_fix_old_label_size(clabel, numsecs);
3239 return(1);
3240 }
3241 return(0);
3242 }
3243
3244
3245 /*
3246 * For reasons yet unknown, some old component labels have garbage in
3247 * the newer numBlocksHi region, and this causes lossage. Since those
3248 * disks will also have numsecs set to less than 32 bits of sectors,
3249 * we can determine when this corruption has occured, and fix it.
3250 *
3251 * The exact same problem, with the same unknown reason, happens to
3252 * the partitionSizeHi member as well.
3253 */
3254 static void
3255 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3256 {
3257
3258 if (numsecs < ((uint64_t)1 << 32)) {
3259 if (clabel->numBlocksHi) {
3260 printf("WARNING: total sectors < 32 bits, yet "
3261 "numBlocksHi set\n"
3262 "WARNING: resetting numBlocksHi to zero.\n");
3263 clabel->numBlocksHi = 0;
3264 }
3265
3266 if (clabel->partitionSizeHi) {
3267 printf("WARNING: total sectors < 32 bits, yet "
3268 "partitionSizeHi set\n"
3269 "WARNING: resetting partitionSizeHi to zero.\n");
3270 clabel->partitionSizeHi = 0;
3271 }
3272 }
3273 }
3274
3275
3276 #ifdef DEBUG
3277 void
3278 rf_print_component_label(RF_ComponentLabel_t *clabel)
3279 {
3280 uint64_t numBlocks;
3281
3282 numBlocks = rf_component_label_numblocks(clabel);
3283
3284 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3285 clabel->row, clabel->column,
3286 clabel->num_rows, clabel->num_columns);
3287 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3288 clabel->version, clabel->serial_number,
3289 clabel->mod_counter);
3290 printf(" Clean: %s Status: %d\n",
3291 clabel->clean ? "Yes" : "No", clabel->status);
3292 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3293 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3294 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3295 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3296 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3297 printf(" Contains root partition: %s\n",
3298 clabel->root_partition ? "Yes" : "No");
3299 printf(" Last configured as: raid%d\n", clabel->last_unit);
3300 #if 0
3301 printf(" Config order: %d\n", clabel->config_order);
3302 #endif
3303
3304 }
3305 #endif
3306
3307 RF_ConfigSet_t *
3308 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3309 {
3310 RF_AutoConfig_t *ac;
3311 RF_ConfigSet_t *config_sets;
3312 RF_ConfigSet_t *cset;
3313 RF_AutoConfig_t *ac_next;
3314
3315
3316 config_sets = NULL;
3317
3318 /* Go through the AutoConfig list, and figure out which components
3319 belong to what sets. */
3320 ac = ac_list;
3321 while(ac!=NULL) {
3322 /* we're going to putz with ac->next, so save it here
3323 for use at the end of the loop */
3324 ac_next = ac->next;
3325
3326 if (config_sets == NULL) {
3327 /* will need at least this one... */
3328 config_sets = (RF_ConfigSet_t *)
3329 malloc(sizeof(RF_ConfigSet_t),
3330 M_RAIDFRAME, M_NOWAIT);
3331 if (config_sets == NULL) {
3332 panic("rf_create_auto_sets: No memory!");
3333 }
3334 /* this one is easy :) */
3335 config_sets->ac = ac;
3336 config_sets->next = NULL;
3337 config_sets->rootable = 0;
3338 ac->next = NULL;
3339 } else {
3340 /* which set does this component fit into? */
3341 cset = config_sets;
3342 while(cset!=NULL) {
3343 if (rf_does_it_fit(cset, ac)) {
3344 /* looks like it matches... */
3345 ac->next = cset->ac;
3346 cset->ac = ac;
3347 break;
3348 }
3349 cset = cset->next;
3350 }
3351 if (cset==NULL) {
3352 /* didn't find a match above... new set..*/
3353 cset = (RF_ConfigSet_t *)
3354 malloc(sizeof(RF_ConfigSet_t),
3355 M_RAIDFRAME, M_NOWAIT);
3356 if (cset == NULL) {
3357 panic("rf_create_auto_sets: No memory!");
3358 }
3359 cset->ac = ac;
3360 ac->next = NULL;
3361 cset->next = config_sets;
3362 cset->rootable = 0;
3363 config_sets = cset;
3364 }
3365 }
3366 ac = ac_next;
3367 }
3368
3369
3370 return(config_sets);
3371 }
3372
3373 static int
3374 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3375 {
3376 RF_ComponentLabel_t *clabel1, *clabel2;
3377
3378 /* If this one matches the *first* one in the set, that's good
3379 enough, since the other members of the set would have been
3380 through here too... */
3381 /* note that we are not checking partitionSize here..
3382
3383 Note that we are also not checking the mod_counters here.
3384 If everything else matches execpt the mod_counter, that's
3385 good enough for this test. We will deal with the mod_counters
3386 a little later in the autoconfiguration process.
3387
3388 (clabel1->mod_counter == clabel2->mod_counter) &&
3389
3390 The reason we don't check for this is that failed disks
3391 will have lower modification counts. If those disks are
3392 not added to the set they used to belong to, then they will
3393 form their own set, which may result in 2 different sets,
3394 for example, competing to be configured at raid0, and
3395 perhaps competing to be the root filesystem set. If the
3396 wrong ones get configured, or both attempt to become /,
3397 weird behaviour and or serious lossage will occur. Thus we
3398 need to bring them into the fold here, and kick them out at
3399 a later point.
3400
3401 */
3402
3403 clabel1 = cset->ac->clabel;
3404 clabel2 = ac->clabel;
3405 if ((clabel1->version == clabel2->version) &&
3406 (clabel1->serial_number == clabel2->serial_number) &&
3407 (clabel1->num_rows == clabel2->num_rows) &&
3408 (clabel1->num_columns == clabel2->num_columns) &&
3409 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3410 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3411 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3412 (clabel1->parityConfig == clabel2->parityConfig) &&
3413 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3414 (clabel1->blockSize == clabel2->blockSize) &&
3415 rf_component_label_numblocks(clabel1) ==
3416 rf_component_label_numblocks(clabel2) &&
3417 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3418 (clabel1->root_partition == clabel2->root_partition) &&
3419 (clabel1->last_unit == clabel2->last_unit) &&
3420 (clabel1->config_order == clabel2->config_order)) {
3421 /* if it get's here, it almost *has* to be a match */
3422 } else {
3423 /* it's not consistent with somebody in the set..
3424 punt */
3425 return(0);
3426 }
3427 /* all was fine.. it must fit... */
3428 return(1);
3429 }
3430
3431 int
3432 rf_have_enough_components(RF_ConfigSet_t *cset)
3433 {
3434 RF_AutoConfig_t *ac;
3435 RF_AutoConfig_t *auto_config;
3436 RF_ComponentLabel_t *clabel;
3437 int c;
3438 int num_cols;
3439 int num_missing;
3440 int mod_counter;
3441 int mod_counter_found;
3442 int even_pair_failed;
3443 char parity_type;
3444
3445
3446 /* check to see that we have enough 'live' components
3447 of this set. If so, we can configure it if necessary */
3448
3449 num_cols = cset->ac->clabel->num_columns;
3450 parity_type = cset->ac->clabel->parityConfig;
3451
3452 /* XXX Check for duplicate components!?!?!? */
3453
3454 /* Determine what the mod_counter is supposed to be for this set. */
3455
3456 mod_counter_found = 0;
3457 mod_counter = 0;
3458 ac = cset->ac;
3459 while(ac!=NULL) {
3460 if (mod_counter_found==0) {
3461 mod_counter = ac->clabel->mod_counter;
3462 mod_counter_found = 1;
3463 } else {
3464 if (ac->clabel->mod_counter > mod_counter) {
3465 mod_counter = ac->clabel->mod_counter;
3466 }
3467 }
3468 ac = ac->next;
3469 }
3470
3471 num_missing = 0;
3472 auto_config = cset->ac;
3473
3474 even_pair_failed = 0;
3475 for(c=0; c<num_cols; c++) {
3476 ac = auto_config;
3477 while(ac!=NULL) {
3478 if ((ac->clabel->column == c) &&
3479 (ac->clabel->mod_counter == mod_counter)) {
3480 /* it's this one... */
3481 #ifdef DEBUG
3482 printf("Found: %s at %d\n",
3483 ac->devname,c);
3484 #endif
3485 break;
3486 }
3487 ac=ac->next;
3488 }
3489 if (ac==NULL) {
3490 /* Didn't find one here! */
3491 /* special case for RAID 1, especially
3492 where there are more than 2
3493 components (where RAIDframe treats
3494 things a little differently :( ) */
3495 if (parity_type == '1') {
3496 if (c%2 == 0) { /* even component */
3497 even_pair_failed = 1;
3498 } else { /* odd component. If
3499 we're failed, and
3500 so is the even
3501 component, it's
3502 "Good Night, Charlie" */
3503 if (even_pair_failed == 1) {
3504 return(0);
3505 }
3506 }
3507 } else {
3508 /* normal accounting */
3509 num_missing++;
3510 }
3511 }
3512 if ((parity_type == '1') && (c%2 == 1)) {
3513 /* Just did an even component, and we didn't
3514 bail.. reset the even_pair_failed flag,
3515 and go on to the next component.... */
3516 even_pair_failed = 0;
3517 }
3518 }
3519
3520 clabel = cset->ac->clabel;
3521
3522 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3523 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3524 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3525 /* XXX this needs to be made *much* more general */
3526 /* Too many failures */
3527 return(0);
3528 }
3529 /* otherwise, all is well, and we've got enough to take a kick
3530 at autoconfiguring this set */
3531 return(1);
3532 }
3533
3534 void
3535 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3536 RF_Raid_t *raidPtr)
3537 {
3538 RF_ComponentLabel_t *clabel;
3539 int i;
3540
3541 clabel = ac->clabel;
3542
3543 /* 1. Fill in the common stuff */
3544 config->numRow = clabel->num_rows = 1;
3545 config->numCol = clabel->num_columns;
3546 config->numSpare = 0; /* XXX should this be set here? */
3547 config->sectPerSU = clabel->sectPerSU;
3548 config->SUsPerPU = clabel->SUsPerPU;
3549 config->SUsPerRU = clabel->SUsPerRU;
3550 config->parityConfig = clabel->parityConfig;
3551 /* XXX... */
3552 strcpy(config->diskQueueType,"fifo");
3553 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3554 config->layoutSpecificSize = 0; /* XXX ?? */
3555
3556 while(ac!=NULL) {
3557 /* row/col values will be in range due to the checks
3558 in reasonable_label() */
3559 strcpy(config->devnames[0][ac->clabel->column],
3560 ac->devname);
3561 ac = ac->next;
3562 }
3563
3564 for(i=0;i<RF_MAXDBGV;i++) {
3565 config->debugVars[i][0] = 0;
3566 }
3567 }
3568
3569 int
3570 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3571 {
3572 RF_ComponentLabel_t *clabel;
3573 int column;
3574 int sparecol;
3575
3576 raidPtr->autoconfigure = new_value;
3577
3578 for(column=0; column<raidPtr->numCol; column++) {
3579 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3580 clabel = raidget_component_label(raidPtr, column);
3581 clabel->autoconfigure = new_value;
3582 raidflush_component_label(raidPtr, column);
3583 }
3584 }
3585 for(column = 0; column < raidPtr->numSpare ; column++) {
3586 sparecol = raidPtr->numCol + column;
3587 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3588 clabel = raidget_component_label(raidPtr, sparecol);
3589 clabel->autoconfigure = new_value;
3590 raidflush_component_label(raidPtr, sparecol);
3591 }
3592 }
3593 return(new_value);
3594 }
3595
3596 int
3597 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3598 {
3599 RF_ComponentLabel_t *clabel;
3600 int column;
3601 int sparecol;
3602
3603 raidPtr->root_partition = new_value;
3604 for(column=0; column<raidPtr->numCol; column++) {
3605 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3606 clabel = raidget_component_label(raidPtr, column);
3607 clabel->root_partition = new_value;
3608 raidflush_component_label(raidPtr, column);
3609 }
3610 }
3611 for(column = 0; column < raidPtr->numSpare ; column++) {
3612 sparecol = raidPtr->numCol + column;
3613 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3614 clabel = raidget_component_label(raidPtr, sparecol);
3615 clabel->root_partition = new_value;
3616 raidflush_component_label(raidPtr, sparecol);
3617 }
3618 }
3619 return(new_value);
3620 }
3621
3622 void
3623 rf_release_all_vps(RF_ConfigSet_t *cset)
3624 {
3625 RF_AutoConfig_t *ac;
3626
3627 ac = cset->ac;
3628 while(ac!=NULL) {
3629 /* Close the vp, and give it back */
3630 if (ac->vp) {
3631 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3632 VOP_CLOSE(ac->vp, FREAD, NOCRED);
3633 vput(ac->vp);
3634 ac->vp = NULL;
3635 }
3636 ac = ac->next;
3637 }
3638 }
3639
3640
3641 void
3642 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3643 {
3644 RF_AutoConfig_t *ac;
3645 RF_AutoConfig_t *next_ac;
3646
3647 ac = cset->ac;
3648 while(ac!=NULL) {
3649 next_ac = ac->next;
3650 /* nuke the label */
3651 free(ac->clabel, M_RAIDFRAME);
3652 /* cleanup the config structure */
3653 free(ac, M_RAIDFRAME);
3654 /* "next.." */
3655 ac = next_ac;
3656 }
3657 /* and, finally, nuke the config set */
3658 free(cset, M_RAIDFRAME);
3659 }
3660
3661
3662 void
3663 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3664 {
3665 /* current version number */
3666 clabel->version = RF_COMPONENT_LABEL_VERSION;
3667 clabel->serial_number = raidPtr->serial_number;
3668 clabel->mod_counter = raidPtr->mod_counter;
3669
3670 clabel->num_rows = 1;
3671 clabel->num_columns = raidPtr->numCol;
3672 clabel->clean = RF_RAID_DIRTY; /* not clean */
3673 clabel->status = rf_ds_optimal; /* "It's good!" */
3674
3675 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3676 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3677 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3678
3679 clabel->blockSize = raidPtr->bytesPerSector;
3680 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3681
3682 /* XXX not portable */
3683 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3684 clabel->maxOutstanding = raidPtr->maxOutstanding;
3685 clabel->autoconfigure = raidPtr->autoconfigure;
3686 clabel->root_partition = raidPtr->root_partition;
3687 clabel->last_unit = raidPtr->raidid;
3688 clabel->config_order = raidPtr->config_order;
3689
3690 #ifndef RF_NO_PARITY_MAP
3691 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3692 #endif
3693 }
3694
3695 int
3696 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
3697 {
3698 RF_Raid_t *raidPtr;
3699 RF_Config_t *config;
3700 int raidID;
3701 int retcode;
3702
3703 #ifdef DEBUG
3704 printf("RAID autoconfigure\n");
3705 #endif
3706
3707 retcode = 0;
3708 *unit = -1;
3709
3710 /* 1. Create a config structure */
3711
3712 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3713 M_RAIDFRAME,
3714 M_NOWAIT);
3715 if (config==NULL) {
3716 printf("Out of mem!?!?\n");
3717 /* XXX do something more intelligent here. */
3718 return(1);
3719 }
3720
3721 memset(config, 0, sizeof(RF_Config_t));
3722
3723 /*
3724 2. Figure out what RAID ID this one is supposed to live at
3725 See if we can get the same RAID dev that it was configured
3726 on last time..
3727 */
3728
3729 raidID = cset->ac->clabel->last_unit;
3730 if ((raidID < 0) || (raidID >= numraid)) {
3731 /* let's not wander off into lala land. */
3732 raidID = numraid - 1;
3733 }
3734 if (raidPtrs[raidID]->valid != 0) {
3735
3736 /*
3737 Nope... Go looking for an alternative...
3738 Start high so we don't immediately use raid0 if that's
3739 not taken.
3740 */
3741
3742 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3743 if (raidPtrs[raidID]->valid == 0) {
3744 /* can use this one! */
3745 break;
3746 }
3747 }
3748 }
3749
3750 if (raidID < 0) {
3751 /* punt... */
3752 printf("Unable to auto configure this set!\n");
3753 printf("(Out of RAID devs!)\n");
3754 free(config, M_RAIDFRAME);
3755 return(1);
3756 }
3757
3758 #ifdef DEBUG
3759 printf("Configuring raid%d:\n",raidID);
3760 #endif
3761
3762 raidPtr = raidPtrs[raidID];
3763
3764 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3765 raidPtr->raidid = raidID;
3766 raidPtr->openings = RAIDOUTSTANDING;
3767
3768 /* 3. Build the configuration structure */
3769 rf_create_configuration(cset->ac, config, raidPtr);
3770
3771 /* 4. Do the configuration */
3772 retcode = rf_Configure(raidPtr, config, cset->ac);
3773
3774 if (retcode == 0) {
3775
3776 raidinit(raidPtrs[raidID]);
3777
3778 rf_markalldirty(raidPtrs[raidID]);
3779 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3780 if (cset->ac->clabel->root_partition==1) {
3781 /* everything configured just fine. Make a note
3782 that this set is eligible to be root. */
3783 cset->rootable = 1;
3784 /* XXX do this here? */
3785 raidPtrs[raidID]->root_partition = 1;
3786 }
3787 }
3788
3789 /* 5. Cleanup */
3790 free(config, M_RAIDFRAME);
3791
3792 *unit = raidID;
3793 return(retcode);
3794 }
3795
3796 void
3797 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3798 {
3799 struct buf *bp;
3800
3801 bp = (struct buf *)desc->bp;
3802 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3803 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
3804 }
3805
3806 void
3807 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3808 size_t xmin, size_t xmax)
3809 {
3810 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3811 pool_sethiwat(p, xmax);
3812 pool_prime(p, xmin);
3813 pool_setlowat(p, xmin);
3814 }
3815
3816 /*
3817 * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see
3818 * if there is IO pending and if that IO could possibly be done for a
3819 * given RAID set. Returns 0 if IO is waiting and can be done, 1
3820 * otherwise.
3821 *
3822 */
3823
3824 int
3825 rf_buf_queue_check(int raidid)
3826 {
3827 if ((bufq_peek(raid_softc[raidid].buf_queue) != NULL) &&
3828 raidPtrs[raidid]->openings > 0) {
3829 /* there is work to do */
3830 return 0;
3831 }
3832 /* default is nothing to do */
3833 return 1;
3834 }
3835
3836 int
3837 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3838 {
3839 uint64_t numsecs;
3840 unsigned secsize;
3841 int error;
3842
3843 error = getdisksize(vp, &numsecs, &secsize);
3844 if (error == 0) {
3845 diskPtr->blockSize = secsize;
3846 diskPtr->numBlocks = numsecs - rf_protectedSectors;
3847 diskPtr->partitionSize = numsecs;
3848 return 0;
3849 }
3850 return error;
3851 }
3852
3853 static int
3854 raid_match(device_t self, cfdata_t cfdata, void *aux)
3855 {
3856 return 1;
3857 }
3858
3859 static void
3860 raid_attach(device_t parent, device_t self, void *aux)
3861 {
3862
3863 }
3864
3865
3866 static int
3867 raid_detach(device_t self, int flags)
3868 {
3869 int error;
3870 struct raid_softc *rs = &raid_softc[device_unit(self)];
3871
3872 if ((error = raidlock(rs)) != 0)
3873 return (error);
3874
3875 error = raid_detach_unlocked(rs);
3876
3877 raidunlock(rs);
3878
3879 return error;
3880 }
3881
3882 static void
3883 rf_set_properties(struct raid_softc *rs, RF_Raid_t *raidPtr)
3884 {
3885 prop_dictionary_t disk_info, odisk_info, geom;
3886 disk_info = prop_dictionary_create();
3887 geom = prop_dictionary_create();
3888 prop_dictionary_set_uint64(geom, "sectors-per-unit",
3889 raidPtr->totalSectors);
3890 prop_dictionary_set_uint32(geom, "sector-size",
3891 raidPtr->bytesPerSector);
3892
3893 prop_dictionary_set_uint16(geom, "sectors-per-track",
3894 raidPtr->Layout.dataSectorsPerStripe);
3895 prop_dictionary_set_uint16(geom, "tracks-per-cylinder",
3896 4 * raidPtr->numCol);
3897
3898 prop_dictionary_set_uint64(geom, "cylinders-per-unit",
3899 raidPtr->totalSectors / (raidPtr->Layout.dataSectorsPerStripe *
3900 (4 * raidPtr->numCol)));
3901
3902 prop_dictionary_set(disk_info, "geometry", geom);
3903 prop_object_release(geom);
3904 prop_dictionary_set(device_properties(rs->sc_dev),
3905 "disk-info", disk_info);
3906 odisk_info = rs->sc_dkdev.dk_info;
3907 rs->sc_dkdev.dk_info = disk_info;
3908 if (odisk_info)
3909 prop_object_release(odisk_info);
3910 }
3911
3912 /*
3913 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3914 * We end up returning whatever error was returned by the first cache flush
3915 * that fails.
3916 */
3917
3918 int
3919 rf_sync_component_caches(RF_Raid_t *raidPtr)
3920 {
3921 int c, sparecol;
3922 int e,error;
3923 int force = 1;
3924
3925 error = 0;
3926 for (c = 0; c < raidPtr->numCol; c++) {
3927 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3928 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3929 &force, FWRITE, NOCRED);
3930 if (e) {
3931 if (e != ENODEV)
3932 printf("raid%d: cache flush to component %s failed.\n",
3933 raidPtr->raidid, raidPtr->Disks[c].devname);
3934 if (error == 0) {
3935 error = e;
3936 }
3937 }
3938 }
3939 }
3940
3941 for( c = 0; c < raidPtr->numSpare ; c++) {
3942 sparecol = raidPtr->numCol + c;
3943 /* Need to ensure that the reconstruct actually completed! */
3944 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3945 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3946 DIOCCACHESYNC, &force, FWRITE, NOCRED);
3947 if (e) {
3948 if (e != ENODEV)
3949 printf("raid%d: cache flush to component %s failed.\n",
3950 raidPtr->raidid, raidPtr->Disks[sparecol].devname);
3951 if (error == 0) {
3952 error = e;
3953 }
3954 }
3955 }
3956 }
3957 return error;
3958 }
3959