rf_netbsdkintf.c revision 1.357 1 /* $NetBSD: rf_netbsdkintf.c,v 1.357 2019/01/08 07:18:18 mrg Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: cd.c 1.6 90/11/28$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
89 * School of Computer Science
90 * Carnegie Mellon University
91 * Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97 /***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.357 2019/01/08 07:18:18 mrg Exp $");
105
106 #ifdef _KERNEL_OPT
107 #include "opt_compat_netbsd.h"
108 #include "opt_compat_netbsd32.h"
109 #include "opt_raid_autoconfig.h"
110 #endif
111
112 #include <sys/param.h>
113 #include <sys/errno.h>
114 #include <sys/pool.h>
115 #include <sys/proc.h>
116 #include <sys/queue.h>
117 #include <sys/disk.h>
118 #include <sys/device.h>
119 #include <sys/stat.h>
120 #include <sys/ioctl.h>
121 #include <sys/fcntl.h>
122 #include <sys/systm.h>
123 #include <sys/vnode.h>
124 #include <sys/disklabel.h>
125 #include <sys/conf.h>
126 #include <sys/buf.h>
127 #include <sys/bufq.h>
128 #include <sys/reboot.h>
129 #include <sys/kauth.h>
130 #include <sys/module.h>
131
132 #include <prop/proplib.h>
133
134 #include <dev/raidframe/raidframevar.h>
135 #include <dev/raidframe/raidframeio.h>
136 #include <dev/raidframe/rf_paritymap.h>
137
138 #include "rf_raid.h"
139 #include "rf_copyback.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_desc.h"
143 #include "rf_diskqueue.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_kintf.h"
147 #include "rf_options.h"
148 #include "rf_driver.h"
149 #include "rf_parityscan.h"
150 #include "rf_threadstuff.h"
151
152 #ifdef COMPAT_50
153 #include "rf_compat50.h"
154 #endif
155
156 #ifdef COMPAT_80
157 #include "rf_compat80.h"
158 #endif
159
160 #ifdef COMPAT_NETBSD32
161 #include "rf_compat32.h"
162 #endif
163
164 #include "ioconf.h"
165
166 #ifdef DEBUG
167 int rf_kdebug_level = 0;
168 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
169 #else /* DEBUG */
170 #define db1_printf(a) { }
171 #endif /* DEBUG */
172
173 #ifdef DEBUG_ROOT
174 #define DPRINTF(a, ...) printf(a, __VA_ARGS__)
175 #else
176 #define DPRINTF(a, ...)
177 #endif
178
179 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
180 static rf_declare_mutex2(rf_sparet_wait_mutex);
181 static rf_declare_cond2(rf_sparet_wait_cv);
182 static rf_declare_cond2(rf_sparet_resp_cv);
183
184 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
185 * spare table */
186 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
187 * installation process */
188 #endif
189
190 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
191
192 /* prototypes */
193 static void KernelWakeupFunc(struct buf *);
194 static void InitBP(struct buf *, struct vnode *, unsigned,
195 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
196 void *, int, struct proc *);
197 struct raid_softc;
198 static void raidinit(struct raid_softc *);
199 static int raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp);
200 static int rf_get_component_caches(RF_Raid_t *raidPtr, int *);
201
202 static int raid_match(device_t, cfdata_t, void *);
203 static void raid_attach(device_t, device_t, void *);
204 static int raid_detach(device_t, int);
205
206 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
207 daddr_t, daddr_t);
208 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
209 daddr_t, daddr_t, int);
210
211 static int raidwrite_component_label(unsigned,
212 dev_t, struct vnode *, RF_ComponentLabel_t *);
213 static int raidread_component_label(unsigned,
214 dev_t, struct vnode *, RF_ComponentLabel_t *);
215
216 static int raid_diskstart(device_t, struct buf *bp);
217 static int raid_dumpblocks(device_t, void *, daddr_t, int);
218 static int raid_lastclose(device_t);
219
220 static dev_type_open(raidopen);
221 static dev_type_close(raidclose);
222 static dev_type_read(raidread);
223 static dev_type_write(raidwrite);
224 static dev_type_ioctl(raidioctl);
225 static dev_type_strategy(raidstrategy);
226 static dev_type_dump(raiddump);
227 static dev_type_size(raidsize);
228
229 const struct bdevsw raid_bdevsw = {
230 .d_open = raidopen,
231 .d_close = raidclose,
232 .d_strategy = raidstrategy,
233 .d_ioctl = raidioctl,
234 .d_dump = raiddump,
235 .d_psize = raidsize,
236 .d_discard = nodiscard,
237 .d_flag = D_DISK
238 };
239
240 const struct cdevsw raid_cdevsw = {
241 .d_open = raidopen,
242 .d_close = raidclose,
243 .d_read = raidread,
244 .d_write = raidwrite,
245 .d_ioctl = raidioctl,
246 .d_stop = nostop,
247 .d_tty = notty,
248 .d_poll = nopoll,
249 .d_mmap = nommap,
250 .d_kqfilter = nokqfilter,
251 .d_discard = nodiscard,
252 .d_flag = D_DISK
253 };
254
255 static struct dkdriver rf_dkdriver = {
256 .d_open = raidopen,
257 .d_close = raidclose,
258 .d_strategy = raidstrategy,
259 .d_diskstart = raid_diskstart,
260 .d_dumpblocks = raid_dumpblocks,
261 .d_lastclose = raid_lastclose,
262 .d_minphys = minphys
263 };
264
265 struct raid_softc {
266 struct dk_softc sc_dksc;
267 int sc_unit;
268 int sc_flags; /* flags */
269 int sc_cflags; /* configuration flags */
270 kmutex_t sc_mutex; /* interlock mutex */
271 kcondvar_t sc_cv; /* and the condvar */
272 uint64_t sc_size; /* size of the raid device */
273 char sc_xname[20]; /* XXX external name */
274 RF_Raid_t sc_r;
275 LIST_ENTRY(raid_softc) sc_link;
276 };
277 /* sc_flags */
278 #define RAIDF_INITED 0x01 /* unit has been initialized */
279 #define RAIDF_SHUTDOWN 0x02 /* unit is being shutdown */
280 #define RAIDF_DETACH 0x04 /* detach after final close */
281 #define RAIDF_WANTED 0x08 /* someone waiting to obtain a lock */
282 #define RAIDF_LOCKED 0x10 /* unit is locked */
283 #define RAIDF_UNIT_CHANGED 0x20 /* unit is being changed */
284
285 #define raidunit(x) DISKUNIT(x)
286 #define raidsoftc(dev) (((struct raid_softc *)device_private(dev))->sc_r.softc)
287
288 extern struct cfdriver raid_cd;
289 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
290 raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
291 DVF_DETACH_SHUTDOWN);
292
293 /* Internal representation of a rf_recon_req */
294 struct rf_recon_req_internal {
295 RF_RowCol_t col;
296 RF_ReconReqFlags_t flags;
297 void *raidPtr;
298 };
299
300 /*
301 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
302 * Be aware that large numbers can allow the driver to consume a lot of
303 * kernel memory, especially on writes, and in degraded mode reads.
304 *
305 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
306 * a single 64K write will typically require 64K for the old data,
307 * 64K for the old parity, and 64K for the new parity, for a total
308 * of 192K (if the parity buffer is not re-used immediately).
309 * Even it if is used immediately, that's still 128K, which when multiplied
310 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
311 *
312 * Now in degraded mode, for example, a 64K read on the above setup may
313 * require data reconstruction, which will require *all* of the 4 remaining
314 * disks to participate -- 4 * 32K/disk == 128K again.
315 */
316
317 #ifndef RAIDOUTSTANDING
318 #define RAIDOUTSTANDING 6
319 #endif
320
321 #define RAIDLABELDEV(dev) \
322 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
323
324 /* declared here, and made public, for the benefit of KVM stuff.. */
325
326 static int raidlock(struct raid_softc *);
327 static void raidunlock(struct raid_softc *);
328
329 static int raid_detach_unlocked(struct raid_softc *);
330
331 static void rf_markalldirty(RF_Raid_t *);
332 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
333
334 void rf_ReconThread(struct rf_recon_req_internal *);
335 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
336 void rf_CopybackThread(RF_Raid_t *raidPtr);
337 void rf_ReconstructInPlaceThread(struct rf_recon_req_internal *);
338 int rf_autoconfig(device_t);
339 void rf_buildroothack(RF_ConfigSet_t *);
340
341 RF_AutoConfig_t *rf_find_raid_components(void);
342 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
343 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
344 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
345 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
346 int rf_set_autoconfig(RF_Raid_t *, int);
347 int rf_set_rootpartition(RF_Raid_t *, int);
348 void rf_release_all_vps(RF_ConfigSet_t *);
349 void rf_cleanup_config_set(RF_ConfigSet_t *);
350 int rf_have_enough_components(RF_ConfigSet_t *);
351 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
352 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
353
354 /*
355 * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
356 * Note that this is overridden by having RAID_AUTOCONFIG as an option
357 * in the kernel config file.
358 */
359 #ifdef RAID_AUTOCONFIG
360 int raidautoconfig = 1;
361 #else
362 int raidautoconfig = 0;
363 #endif
364 static bool raidautoconfigdone = false;
365
366 struct RF_Pools_s rf_pools;
367
368 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
369 static kmutex_t raid_lock;
370
371 static struct raid_softc *
372 raidcreate(int unit) {
373 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
374 sc->sc_unit = unit;
375 cv_init(&sc->sc_cv, "raidunit");
376 mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_NONE);
377 return sc;
378 }
379
380 static void
381 raiddestroy(struct raid_softc *sc) {
382 cv_destroy(&sc->sc_cv);
383 mutex_destroy(&sc->sc_mutex);
384 kmem_free(sc, sizeof(*sc));
385 }
386
387 static struct raid_softc *
388 raidget(int unit, bool create) {
389 struct raid_softc *sc;
390 if (unit < 0) {
391 #ifdef DIAGNOSTIC
392 panic("%s: unit %d!", __func__, unit);
393 #endif
394 return NULL;
395 }
396 mutex_enter(&raid_lock);
397 LIST_FOREACH(sc, &raids, sc_link) {
398 if (sc->sc_unit == unit) {
399 mutex_exit(&raid_lock);
400 return sc;
401 }
402 }
403 mutex_exit(&raid_lock);
404 if (!create)
405 return NULL;
406 if ((sc = raidcreate(unit)) == NULL)
407 return NULL;
408 mutex_enter(&raid_lock);
409 LIST_INSERT_HEAD(&raids, sc, sc_link);
410 mutex_exit(&raid_lock);
411 return sc;
412 }
413
414 static void
415 raidput(struct raid_softc *sc) {
416 mutex_enter(&raid_lock);
417 LIST_REMOVE(sc, sc_link);
418 mutex_exit(&raid_lock);
419 raiddestroy(sc);
420 }
421
422 void
423 raidattach(int num)
424 {
425
426 /*
427 * Device attachment and associated initialization now occurs
428 * as part of the module initialization.
429 */
430 }
431
432 int
433 rf_autoconfig(device_t self)
434 {
435 RF_AutoConfig_t *ac_list;
436 RF_ConfigSet_t *config_sets;
437
438 if (!raidautoconfig || raidautoconfigdone == true)
439 return (0);
440
441 /* XXX This code can only be run once. */
442 raidautoconfigdone = true;
443
444 #ifdef __HAVE_CPU_BOOTCONF
445 /*
446 * 0. find the boot device if needed first so we can use it later
447 * this needs to be done before we autoconfigure any raid sets,
448 * because if we use wedges we are not going to be able to open
449 * the boot device later
450 */
451 if (booted_device == NULL)
452 cpu_bootconf();
453 #endif
454 /* 1. locate all RAID components on the system */
455 aprint_debug("Searching for RAID components...\n");
456 ac_list = rf_find_raid_components();
457
458 /* 2. Sort them into their respective sets. */
459 config_sets = rf_create_auto_sets(ac_list);
460
461 /*
462 * 3. Evaluate each set and configure the valid ones.
463 * This gets done in rf_buildroothack().
464 */
465 rf_buildroothack(config_sets);
466
467 return 1;
468 }
469
470 static int
471 rf_containsboot(RF_Raid_t *r, device_t bdv) {
472 const char *bootname = device_xname(bdv);
473 size_t len = strlen(bootname);
474
475 for (int col = 0; col < r->numCol; col++) {
476 const char *devname = r->Disks[col].devname;
477 devname += sizeof("/dev/") - 1;
478 if (strncmp(devname, "dk", 2) == 0) {
479 const char *parent =
480 dkwedge_get_parent_name(r->Disks[col].dev);
481 if (parent != NULL)
482 devname = parent;
483 }
484 if (strncmp(devname, bootname, len) == 0) {
485 struct raid_softc *sc = r->softc;
486 aprint_debug("raid%d includes boot device %s\n",
487 sc->sc_unit, devname);
488 return 1;
489 }
490 }
491 return 0;
492 }
493
494 void
495 rf_buildroothack(RF_ConfigSet_t *config_sets)
496 {
497 RF_ConfigSet_t *cset;
498 RF_ConfigSet_t *next_cset;
499 int num_root;
500 struct raid_softc *sc, *rsc;
501 struct dk_softc *dksc;
502
503 sc = rsc = NULL;
504 num_root = 0;
505 cset = config_sets;
506 while (cset != NULL) {
507 next_cset = cset->next;
508 if (rf_have_enough_components(cset) &&
509 cset->ac->clabel->autoconfigure == 1) {
510 sc = rf_auto_config_set(cset);
511 if (sc != NULL) {
512 aprint_debug("raid%d: configured ok\n",
513 sc->sc_unit);
514 if (cset->rootable) {
515 rsc = sc;
516 num_root++;
517 }
518 } else {
519 /* The autoconfig didn't work :( */
520 aprint_debug("Autoconfig failed\n");
521 rf_release_all_vps(cset);
522 }
523 } else {
524 /* we're not autoconfiguring this set...
525 release the associated resources */
526 rf_release_all_vps(cset);
527 }
528 /* cleanup */
529 rf_cleanup_config_set(cset);
530 cset = next_cset;
531 }
532 dksc = &rsc->sc_dksc;
533
534 /* if the user has specified what the root device should be
535 then we don't touch booted_device or boothowto... */
536
537 if (rootspec != NULL)
538 return;
539
540 /* we found something bootable... */
541
542 /*
543 * XXX: The following code assumes that the root raid
544 * is the first ('a') partition. This is about the best
545 * we can do with a BSD disklabel, but we might be able
546 * to do better with a GPT label, by setting a specified
547 * attribute to indicate the root partition. We can then
548 * stash the partition number in the r->root_partition
549 * high bits (the bottom 2 bits are already used). For
550 * now we just set booted_partition to 0 when we override
551 * root.
552 */
553 if (num_root == 1) {
554 device_t candidate_root;
555 if (dksc->sc_dkdev.dk_nwedges != 0) {
556 char cname[sizeof(cset->ac->devname)];
557 /* XXX: assume partition 'a' first */
558 snprintf(cname, sizeof(cname), "%s%c",
559 device_xname(dksc->sc_dev), 'a');
560 candidate_root = dkwedge_find_by_wname(cname);
561 DPRINTF("%s: candidate wedge root=%s\n", __func__,
562 cname);
563 if (candidate_root == NULL) {
564 /*
565 * If that is not found, because we don't use
566 * disklabel, return the first dk child
567 * XXX: we can skip the 'a' check above
568 * and always do this...
569 */
570 size_t i = 0;
571 candidate_root = dkwedge_find_by_parent(
572 device_xname(dksc->sc_dev), &i);
573 }
574 DPRINTF("%s: candidate wedge root=%p\n", __func__,
575 candidate_root);
576 } else
577 candidate_root = dksc->sc_dev;
578 DPRINTF("%s: candidate root=%p\n", __func__, candidate_root);
579 DPRINTF("%s: booted_device=%p root_partition=%d "
580 "contains_boot=%d\n", __func__, booted_device,
581 rsc->sc_r.root_partition,
582 rf_containsboot(&rsc->sc_r, booted_device));
583 if (booted_device == NULL ||
584 rsc->sc_r.root_partition == 1 ||
585 rf_containsboot(&rsc->sc_r, booted_device)) {
586 booted_device = candidate_root;
587 booted_method = "raidframe/single";
588 booted_partition = 0; /* XXX assume 'a' */
589 }
590 } else if (num_root > 1) {
591 DPRINTF("%s: many roots=%d, %p\n", __func__, num_root,
592 booted_device);
593
594 /*
595 * Maybe the MD code can help. If it cannot, then
596 * setroot() will discover that we have no
597 * booted_device and will ask the user if nothing was
598 * hardwired in the kernel config file
599 */
600 if (booted_device == NULL)
601 return;
602
603 num_root = 0;
604 mutex_enter(&raid_lock);
605 LIST_FOREACH(sc, &raids, sc_link) {
606 RF_Raid_t *r = &sc->sc_r;
607 if (r->valid == 0)
608 continue;
609
610 if (r->root_partition == 0)
611 continue;
612
613 if (rf_containsboot(r, booted_device)) {
614 num_root++;
615 rsc = sc;
616 dksc = &rsc->sc_dksc;
617 }
618 }
619 mutex_exit(&raid_lock);
620
621 if (num_root == 1) {
622 booted_device = dksc->sc_dev;
623 booted_method = "raidframe/multi";
624 booted_partition = 0; /* XXX assume 'a' */
625 } else {
626 /* we can't guess.. require the user to answer... */
627 boothowto |= RB_ASKNAME;
628 }
629 }
630 }
631
632 static int
633 raidsize(dev_t dev)
634 {
635 struct raid_softc *rs;
636 struct dk_softc *dksc;
637 unsigned int unit;
638
639 unit = raidunit(dev);
640 if ((rs = raidget(unit, false)) == NULL)
641 return -1;
642 dksc = &rs->sc_dksc;
643
644 if ((rs->sc_flags & RAIDF_INITED) == 0)
645 return -1;
646
647 return dk_size(dksc, dev);
648 }
649
650 static int
651 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
652 {
653 unsigned int unit;
654 struct raid_softc *rs;
655 struct dk_softc *dksc;
656
657 unit = raidunit(dev);
658 if ((rs = raidget(unit, false)) == NULL)
659 return ENXIO;
660 dksc = &rs->sc_dksc;
661
662 if ((rs->sc_flags & RAIDF_INITED) == 0)
663 return ENODEV;
664
665 /*
666 Note that blkno is relative to this particular partition.
667 By adding adding RF_PROTECTED_SECTORS, we get a value that
668 is relative to the partition used for the underlying component.
669 */
670 blkno += RF_PROTECTED_SECTORS;
671
672 return dk_dump(dksc, dev, blkno, va, size);
673 }
674
675 static int
676 raid_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk)
677 {
678 struct raid_softc *rs = raidsoftc(dev);
679 const struct bdevsw *bdev;
680 RF_Raid_t *raidPtr;
681 int c, sparecol, j, scol, dumpto;
682 int error = 0;
683
684 raidPtr = &rs->sc_r;
685
686 /* we only support dumping to RAID 1 sets */
687 if (raidPtr->Layout.numDataCol != 1 ||
688 raidPtr->Layout.numParityCol != 1)
689 return EINVAL;
690
691 if ((error = raidlock(rs)) != 0)
692 return error;
693
694 /* figure out what device is alive.. */
695
696 /*
697 Look for a component to dump to. The preference for the
698 component to dump to is as follows:
699 1) the master
700 2) a used_spare of the master
701 3) the slave
702 4) a used_spare of the slave
703 */
704
705 dumpto = -1;
706 for (c = 0; c < raidPtr->numCol; c++) {
707 if (raidPtr->Disks[c].status == rf_ds_optimal) {
708 /* this might be the one */
709 dumpto = c;
710 break;
711 }
712 }
713
714 /*
715 At this point we have possibly selected a live master or a
716 live slave. We now check to see if there is a spared
717 master (or a spared slave), if we didn't find a live master
718 or a live slave.
719 */
720
721 for (c = 0; c < raidPtr->numSpare; c++) {
722 sparecol = raidPtr->numCol + c;
723 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
724 /* How about this one? */
725 scol = -1;
726 for(j=0;j<raidPtr->numCol;j++) {
727 if (raidPtr->Disks[j].spareCol == sparecol) {
728 scol = j;
729 break;
730 }
731 }
732 if (scol == 0) {
733 /*
734 We must have found a spared master!
735 We'll take that over anything else
736 found so far. (We couldn't have
737 found a real master before, since
738 this is a used spare, and it's
739 saying that it's replacing the
740 master.) On reboot (with
741 autoconfiguration turned on)
742 sparecol will become the 1st
743 component (component0) of this set.
744 */
745 dumpto = sparecol;
746 break;
747 } else if (scol != -1) {
748 /*
749 Must be a spared slave. We'll dump
750 to that if we havn't found anything
751 else so far.
752 */
753 if (dumpto == -1)
754 dumpto = sparecol;
755 }
756 }
757 }
758
759 if (dumpto == -1) {
760 /* we couldn't find any live components to dump to!?!?
761 */
762 error = EINVAL;
763 goto out;
764 }
765
766 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
767 if (bdev == NULL) {
768 error = ENXIO;
769 goto out;
770 }
771
772 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
773 blkno, va, nblk * raidPtr->bytesPerSector);
774
775 out:
776 raidunlock(rs);
777
778 return error;
779 }
780
781 /* ARGSUSED */
782 static int
783 raidopen(dev_t dev, int flags, int fmt,
784 struct lwp *l)
785 {
786 int unit = raidunit(dev);
787 struct raid_softc *rs;
788 struct dk_softc *dksc;
789 int error = 0;
790 int part, pmask;
791
792 if ((rs = raidget(unit, true)) == NULL)
793 return ENXIO;
794 if ((error = raidlock(rs)) != 0)
795 return (error);
796
797 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
798 error = EBUSY;
799 goto bad;
800 }
801
802 dksc = &rs->sc_dksc;
803
804 part = DISKPART(dev);
805 pmask = (1 << part);
806
807 if (!DK_BUSY(dksc, pmask) &&
808 ((rs->sc_flags & RAIDF_INITED) != 0)) {
809 /* First one... mark things as dirty... Note that we *MUST*
810 have done a configure before this. I DO NOT WANT TO BE
811 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
812 THAT THEY BELONG TOGETHER!!!!! */
813 /* XXX should check to see if we're only open for reading
814 here... If so, we needn't do this, but then need some
815 other way of keeping track of what's happened.. */
816
817 rf_markalldirty(&rs->sc_r);
818 }
819
820 if ((rs->sc_flags & RAIDF_INITED) != 0)
821 error = dk_open(dksc, dev, flags, fmt, l);
822
823 bad:
824 raidunlock(rs);
825
826 return (error);
827
828
829 }
830
831 static int
832 raid_lastclose(device_t self)
833 {
834 struct raid_softc *rs = raidsoftc(self);
835
836 /* Last one... device is not unconfigured yet.
837 Device shutdown has taken care of setting the
838 clean bits if RAIDF_INITED is not set
839 mark things as clean... */
840
841 rf_update_component_labels(&rs->sc_r,
842 RF_FINAL_COMPONENT_UPDATE);
843
844 /* pass to unlocked code */
845 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
846 rs->sc_flags |= RAIDF_DETACH;
847
848 return 0;
849 }
850
851 /* ARGSUSED */
852 static int
853 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
854 {
855 int unit = raidunit(dev);
856 struct raid_softc *rs;
857 struct dk_softc *dksc;
858 cfdata_t cf;
859 int error = 0, do_detach = 0, do_put = 0;
860
861 if ((rs = raidget(unit, false)) == NULL)
862 return ENXIO;
863 dksc = &rs->sc_dksc;
864
865 if ((error = raidlock(rs)) != 0)
866 return (error);
867
868 if ((rs->sc_flags & RAIDF_INITED) != 0) {
869 error = dk_close(dksc, dev, flags, fmt, l);
870 if ((rs->sc_flags & RAIDF_DETACH) != 0)
871 do_detach = 1;
872 } else if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
873 do_put = 1;
874
875 raidunlock(rs);
876
877 if (do_detach) {
878 /* free the pseudo device attach bits */
879 cf = device_cfdata(dksc->sc_dev);
880 error = config_detach(dksc->sc_dev, 0);
881 if (error == 0)
882 free(cf, M_RAIDFRAME);
883 } else if (do_put) {
884 raidput(rs);
885 }
886
887 return (error);
888
889 }
890
891 static void
892 raid_wakeup(RF_Raid_t *raidPtr)
893 {
894 rf_lock_mutex2(raidPtr->iodone_lock);
895 rf_signal_cond2(raidPtr->iodone_cv);
896 rf_unlock_mutex2(raidPtr->iodone_lock);
897 }
898
899 static void
900 raidstrategy(struct buf *bp)
901 {
902 unsigned int unit;
903 struct raid_softc *rs;
904 struct dk_softc *dksc;
905 RF_Raid_t *raidPtr;
906
907 unit = raidunit(bp->b_dev);
908 if ((rs = raidget(unit, false)) == NULL) {
909 bp->b_error = ENXIO;
910 goto fail;
911 }
912 if ((rs->sc_flags & RAIDF_INITED) == 0) {
913 bp->b_error = ENXIO;
914 goto fail;
915 }
916 dksc = &rs->sc_dksc;
917 raidPtr = &rs->sc_r;
918
919 /* Queue IO only */
920 if (dk_strategy_defer(dksc, bp))
921 goto done;
922
923 /* schedule the IO to happen at the next convenient time */
924 raid_wakeup(raidPtr);
925
926 done:
927 return;
928
929 fail:
930 bp->b_resid = bp->b_bcount;
931 biodone(bp);
932 }
933
934 static int
935 raid_diskstart(device_t dev, struct buf *bp)
936 {
937 struct raid_softc *rs = raidsoftc(dev);
938 RF_Raid_t *raidPtr;
939
940 raidPtr = &rs->sc_r;
941 if (!raidPtr->valid) {
942 db1_printf(("raid is not valid..\n"));
943 return ENODEV;
944 }
945
946 /* XXX */
947 bp->b_resid = 0;
948
949 return raiddoaccess(raidPtr, bp);
950 }
951
952 void
953 raiddone(RF_Raid_t *raidPtr, struct buf *bp)
954 {
955 struct raid_softc *rs;
956 struct dk_softc *dksc;
957
958 rs = raidPtr->softc;
959 dksc = &rs->sc_dksc;
960
961 dk_done(dksc, bp);
962
963 rf_lock_mutex2(raidPtr->mutex);
964 raidPtr->openings++;
965 rf_unlock_mutex2(raidPtr->mutex);
966
967 /* schedule more IO */
968 raid_wakeup(raidPtr);
969 }
970
971 /* ARGSUSED */
972 static int
973 raidread(dev_t dev, struct uio *uio, int flags)
974 {
975 int unit = raidunit(dev);
976 struct raid_softc *rs;
977
978 if ((rs = raidget(unit, false)) == NULL)
979 return ENXIO;
980
981 if ((rs->sc_flags & RAIDF_INITED) == 0)
982 return (ENXIO);
983
984 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
985
986 }
987
988 /* ARGSUSED */
989 static int
990 raidwrite(dev_t dev, struct uio *uio, int flags)
991 {
992 int unit = raidunit(dev);
993 struct raid_softc *rs;
994
995 if ((rs = raidget(unit, false)) == NULL)
996 return ENXIO;
997
998 if ((rs->sc_flags & RAIDF_INITED) == 0)
999 return (ENXIO);
1000
1001 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
1002
1003 }
1004
1005 static int
1006 raid_detach_unlocked(struct raid_softc *rs)
1007 {
1008 struct dk_softc *dksc = &rs->sc_dksc;
1009 RF_Raid_t *raidPtr;
1010 int error;
1011
1012 raidPtr = &rs->sc_r;
1013
1014 if (DK_BUSY(dksc, 0) ||
1015 raidPtr->recon_in_progress != 0 ||
1016 raidPtr->parity_rewrite_in_progress != 0 ||
1017 raidPtr->copyback_in_progress != 0)
1018 return EBUSY;
1019
1020 if ((rs->sc_flags & RAIDF_INITED) == 0)
1021 return 0;
1022
1023 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1024
1025 if ((error = rf_Shutdown(raidPtr)) != 0)
1026 return error;
1027
1028 rs->sc_flags &= ~RAIDF_INITED;
1029
1030 /* Kill off any queued buffers */
1031 dk_drain(dksc);
1032 bufq_free(dksc->sc_bufq);
1033
1034 /* Detach the disk. */
1035 dkwedge_delall(&dksc->sc_dkdev);
1036 disk_detach(&dksc->sc_dkdev);
1037 disk_destroy(&dksc->sc_dkdev);
1038 dk_detach(dksc);
1039
1040 return 0;
1041 }
1042
1043 static int
1044 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1045 {
1046 int unit = raidunit(dev);
1047 int error = 0;
1048 int part, pmask;
1049 struct raid_softc *rs;
1050 struct dk_softc *dksc;
1051 RF_Config_t *k_cfg, *u_cfg;
1052 RF_Raid_t *raidPtr;
1053 RF_RaidDisk_t *diskPtr;
1054 RF_AccTotals_t *totals;
1055 RF_DeviceConfig_t *d_cfg, *ucfgp;
1056 u_char *specific_buf;
1057 int retcode = 0;
1058 int column;
1059 /* int raidid; */
1060 struct rf_recon_req *rr;
1061 struct rf_recon_req_internal *rrint;
1062 RF_ComponentLabel_t *clabel;
1063 RF_ComponentLabel_t *ci_label;
1064 RF_SingleComponent_t *sparePtr,*componentPtr;
1065 RF_SingleComponent_t component;
1066 int d;
1067
1068 if ((rs = raidget(unit, false)) == NULL)
1069 return ENXIO;
1070 dksc = &rs->sc_dksc;
1071 raidPtr = &rs->sc_r;
1072
1073 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1074 (int) DISKPART(dev), (int) unit, cmd));
1075
1076 /* Must be initialized for these... */
1077 switch (cmd) {
1078 case RAIDFRAME_REWRITEPARITY:
1079 case RAIDFRAME_GET_INFO:
1080 case RAIDFRAME_RESET_ACCTOTALS:
1081 case RAIDFRAME_GET_ACCTOTALS:
1082 case RAIDFRAME_KEEP_ACCTOTALS:
1083 case RAIDFRAME_GET_SIZE:
1084 case RAIDFRAME_FAIL_DISK:
1085 case RAIDFRAME_COPYBACK:
1086 case RAIDFRAME_CHECK_RECON_STATUS:
1087 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1088 case RAIDFRAME_GET_COMPONENT_LABEL:
1089 case RAIDFRAME_SET_COMPONENT_LABEL:
1090 case RAIDFRAME_ADD_HOT_SPARE:
1091 case RAIDFRAME_REMOVE_HOT_SPARE:
1092 case RAIDFRAME_INIT_LABELS:
1093 case RAIDFRAME_REBUILD_IN_PLACE:
1094 case RAIDFRAME_CHECK_PARITY:
1095 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1096 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1097 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1098 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1099 case RAIDFRAME_SET_AUTOCONFIG:
1100 case RAIDFRAME_SET_ROOT:
1101 case RAIDFRAME_DELETE_COMPONENT:
1102 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1103 case RAIDFRAME_PARITYMAP_STATUS:
1104 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1105 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1106 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1107 #ifdef COMPAT_50
1108 case RAIDFRAME_GET_INFO50:
1109 #endif
1110 #ifdef COMPAT_80
1111 case RAIDFRAME_CHECK_RECON_STATUS_EXT80:
1112 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT80:
1113 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT80:
1114 case RAIDFRAME_GET_INFO80:
1115 case RAIDFRAME_GET_COMPONENT_LABEL80:
1116 #endif
1117 #ifdef COMPAT_NETBSD32
1118 #ifdef _LP64
1119 case RAIDFRAME_GET_INFO32:
1120 #endif
1121 #endif
1122 if ((rs->sc_flags & RAIDF_INITED) == 0)
1123 return (ENXIO);
1124 }
1125
1126 switch (cmd) {
1127 #ifdef COMPAT_50
1128 case RAIDFRAME_GET_INFO50:
1129 return rf_get_info50(raidPtr, data);
1130
1131 case RAIDFRAME_CONFIGURE50:
1132 if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0)
1133 return retcode;
1134 goto config;
1135 #endif
1136
1137 #ifdef COMPAT_80
1138 case RAIDFRAME_CHECK_RECON_STATUS_EXT80:
1139 return rf_check_recon_status_ext80(raidPtr, data);
1140
1141 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT80:
1142 return rf_check_parityrewrite_status_ext80(raidPtr, data);
1143
1144 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT80:
1145 return rf_check_copyback_status_ext80(raidPtr, data);
1146
1147 case RAIDFRAME_GET_INFO80:
1148 return rf_get_info80(raidPtr, data);
1149
1150 case RAIDFRAME_GET_COMPONENT_LABEL80:
1151 return rf_get_component_label80(raidPtr, data);
1152
1153 case RAIDFRAME_CONFIGURE80:
1154 if ((retcode = rf_config80(raidPtr, unit, data, &k_cfg)) != 0)
1155 return retcode;
1156 goto config;
1157 #endif
1158
1159 /* configure the system */
1160 case RAIDFRAME_CONFIGURE:
1161 #ifdef COMPAT_NETBSD32
1162 #ifdef _LP64
1163 case RAIDFRAME_CONFIGURE32:
1164 #endif
1165 #endif
1166
1167 if (raidPtr->valid) {
1168 /* There is a valid RAID set running on this unit! */
1169 printf("raid%d: Device already configured!\n",unit);
1170 return(EINVAL);
1171 }
1172
1173 /* copy-in the configuration information */
1174 /* data points to a pointer to the configuration structure */
1175
1176 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1177 if (k_cfg == NULL) {
1178 return (ENOMEM);
1179 }
1180 #ifdef COMPAT_NETBSD32
1181 #ifdef _LP64
1182 if (cmd == RAIDFRAME_CONFIGURE32 &&
1183 (l->l_proc->p_flag & PK_32) != 0)
1184 retcode = rf_config_netbsd32(data, k_cfg);
1185 else
1186 #endif
1187 #endif
1188 {
1189 u_cfg = *((RF_Config_t **) data);
1190 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1191 }
1192 if (retcode) {
1193 RF_Free(k_cfg, sizeof(RF_Config_t));
1194 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1195 retcode));
1196 goto no_config;
1197 }
1198 goto config;
1199 config:
1200 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1201
1202 /* allocate a buffer for the layout-specific data, and copy it
1203 * in */
1204 if (k_cfg->layoutSpecificSize) {
1205 if (k_cfg->layoutSpecificSize > 10000) {
1206 /* sanity check */
1207 RF_Free(k_cfg, sizeof(RF_Config_t));
1208 retcode = EINVAL;
1209 goto no_config;
1210 }
1211 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1212 (u_char *));
1213 if (specific_buf == NULL) {
1214 RF_Free(k_cfg, sizeof(RF_Config_t));
1215 retcode = ENOMEM;
1216 goto no_config;
1217 }
1218 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1219 k_cfg->layoutSpecificSize);
1220 if (retcode) {
1221 RF_Free(k_cfg, sizeof(RF_Config_t));
1222 RF_Free(specific_buf,
1223 k_cfg->layoutSpecificSize);
1224 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1225 retcode));
1226 goto no_config;
1227 }
1228 } else
1229 specific_buf = NULL;
1230 k_cfg->layoutSpecific = specific_buf;
1231
1232 /* should do some kind of sanity check on the configuration.
1233 * Store the sum of all the bytes in the last byte? */
1234
1235 /* configure the system */
1236
1237 /*
1238 * Clear the entire RAID descriptor, just to make sure
1239 * there is no stale data left in the case of a
1240 * reconfiguration
1241 */
1242 memset(raidPtr, 0, sizeof(*raidPtr));
1243 raidPtr->softc = rs;
1244 raidPtr->raidid = unit;
1245
1246 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1247
1248 if (retcode == 0) {
1249
1250 /* allow this many simultaneous IO's to
1251 this RAID device */
1252 raidPtr->openings = RAIDOUTSTANDING;
1253
1254 raidinit(rs);
1255 raid_wakeup(raidPtr);
1256 rf_markalldirty(raidPtr);
1257 }
1258 /* free the buffers. No return code here. */
1259 if (k_cfg->layoutSpecificSize) {
1260 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1261 }
1262 RF_Free(k_cfg, sizeof(RF_Config_t));
1263
1264 no_config:
1265 /*
1266 * If configuration failed, set sc_flags so that we
1267 * will detach the device when we close it.
1268 */
1269 if (retcode != 0)
1270 rs->sc_flags |= RAIDF_SHUTDOWN;
1271 return (retcode);
1272
1273 /* shutdown the system */
1274 case RAIDFRAME_SHUTDOWN:
1275
1276 part = DISKPART(dev);
1277 pmask = (1 << part);
1278
1279 if ((error = raidlock(rs)) != 0)
1280 return (error);
1281
1282 if (DK_BUSY(dksc, pmask) ||
1283 raidPtr->recon_in_progress != 0 ||
1284 raidPtr->parity_rewrite_in_progress != 0 ||
1285 raidPtr->copyback_in_progress != 0)
1286 retcode = EBUSY;
1287 else {
1288 /* detach and free on close */
1289 rs->sc_flags |= RAIDF_SHUTDOWN;
1290 retcode = 0;
1291 }
1292
1293 raidunlock(rs);
1294
1295 return (retcode);
1296 case RAIDFRAME_GET_COMPONENT_LABEL:
1297 return rf_get_component_label(raidPtr, data);
1298
1299 #if 0
1300 case RAIDFRAME_SET_COMPONENT_LABEL:
1301 clabel = (RF_ComponentLabel_t *) data;
1302
1303 /* XXX check the label for valid stuff... */
1304 /* Note that some things *should not* get modified --
1305 the user should be re-initing the labels instead of
1306 trying to patch things.
1307 */
1308
1309 raidid = raidPtr->raidid;
1310 #ifdef DEBUG
1311 printf("raid%d: Got component label:\n", raidid);
1312 printf("raid%d: Version: %d\n", raidid, clabel->version);
1313 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1314 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1315 printf("raid%d: Column: %d\n", raidid, clabel->column);
1316 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1317 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1318 printf("raid%d: Status: %d\n", raidid, clabel->status);
1319 #endif
1320 clabel->row = 0;
1321 column = clabel->column;
1322
1323 if ((column < 0) || (column >= raidPtr->numCol)) {
1324 return(EINVAL);
1325 }
1326
1327 /* XXX this isn't allowed to do anything for now :-) */
1328
1329 /* XXX and before it is, we need to fill in the rest
1330 of the fields!?!?!?! */
1331 memcpy(raidget_component_label(raidPtr, column),
1332 clabel, sizeof(*clabel));
1333 raidflush_component_label(raidPtr, column);
1334 return (0);
1335 #endif
1336
1337 case RAIDFRAME_INIT_LABELS:
1338 clabel = (RF_ComponentLabel_t *) data;
1339 /*
1340 we only want the serial number from
1341 the above. We get all the rest of the information
1342 from the config that was used to create this RAID
1343 set.
1344 */
1345
1346 raidPtr->serial_number = clabel->serial_number;
1347
1348 for(column=0;column<raidPtr->numCol;column++) {
1349 diskPtr = &raidPtr->Disks[column];
1350 if (!RF_DEAD_DISK(diskPtr->status)) {
1351 ci_label = raidget_component_label(raidPtr,
1352 column);
1353 /* Zeroing this is important. */
1354 memset(ci_label, 0, sizeof(*ci_label));
1355 raid_init_component_label(raidPtr, ci_label);
1356 ci_label->serial_number =
1357 raidPtr->serial_number;
1358 ci_label->row = 0; /* we dont' pretend to support more */
1359 rf_component_label_set_partitionsize(ci_label,
1360 diskPtr->partitionSize);
1361 ci_label->column = column;
1362 raidflush_component_label(raidPtr, column);
1363 }
1364 /* XXXjld what about the spares? */
1365 }
1366
1367 return (retcode);
1368 case RAIDFRAME_SET_AUTOCONFIG:
1369 d = rf_set_autoconfig(raidPtr, *(int *) data);
1370 printf("raid%d: New autoconfig value is: %d\n",
1371 raidPtr->raidid, d);
1372 *(int *) data = d;
1373 return (retcode);
1374
1375 case RAIDFRAME_SET_ROOT:
1376 d = rf_set_rootpartition(raidPtr, *(int *) data);
1377 printf("raid%d: New rootpartition value is: %d\n",
1378 raidPtr->raidid, d);
1379 *(int *) data = d;
1380 return (retcode);
1381
1382 /* initialize all parity */
1383 case RAIDFRAME_REWRITEPARITY:
1384
1385 if (raidPtr->Layout.map->faultsTolerated == 0) {
1386 /* Parity for RAID 0 is trivially correct */
1387 raidPtr->parity_good = RF_RAID_CLEAN;
1388 return(0);
1389 }
1390
1391 if (raidPtr->parity_rewrite_in_progress == 1) {
1392 /* Re-write is already in progress! */
1393 return(EINVAL);
1394 }
1395
1396 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1397 rf_RewriteParityThread,
1398 raidPtr,"raid_parity");
1399 return (retcode);
1400
1401
1402 case RAIDFRAME_ADD_HOT_SPARE:
1403 sparePtr = (RF_SingleComponent_t *) data;
1404 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1405 retcode = rf_add_hot_spare(raidPtr, &component);
1406 return(retcode);
1407
1408 case RAIDFRAME_REMOVE_HOT_SPARE:
1409 return(retcode);
1410
1411 case RAIDFRAME_DELETE_COMPONENT:
1412 componentPtr = (RF_SingleComponent_t *)data;
1413 memcpy( &component, componentPtr,
1414 sizeof(RF_SingleComponent_t));
1415 retcode = rf_delete_component(raidPtr, &component);
1416 return(retcode);
1417
1418 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1419 componentPtr = (RF_SingleComponent_t *)data;
1420 memcpy( &component, componentPtr,
1421 sizeof(RF_SingleComponent_t));
1422 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1423 return(retcode);
1424
1425 case RAIDFRAME_REBUILD_IN_PLACE:
1426
1427 if (raidPtr->Layout.map->faultsTolerated == 0) {
1428 /* Can't do this on a RAID 0!! */
1429 return(EINVAL);
1430 }
1431
1432 if (raidPtr->recon_in_progress == 1) {
1433 /* a reconstruct is already in progress! */
1434 return(EINVAL);
1435 }
1436
1437 componentPtr = (RF_SingleComponent_t *) data;
1438 memcpy( &component, componentPtr,
1439 sizeof(RF_SingleComponent_t));
1440 component.row = 0; /* we don't support any more */
1441 column = component.column;
1442
1443 if ((column < 0) || (column >= raidPtr->numCol)) {
1444 return(EINVAL);
1445 }
1446
1447 rf_lock_mutex2(raidPtr->mutex);
1448 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1449 (raidPtr->numFailures > 0)) {
1450 /* XXX 0 above shouldn't be constant!!! */
1451 /* some component other than this has failed.
1452 Let's not make things worse than they already
1453 are... */
1454 printf("raid%d: Unable to reconstruct to disk at:\n",
1455 raidPtr->raidid);
1456 printf("raid%d: Col: %d Too many failures.\n",
1457 raidPtr->raidid, column);
1458 rf_unlock_mutex2(raidPtr->mutex);
1459 return (EINVAL);
1460 }
1461 if (raidPtr->Disks[column].status ==
1462 rf_ds_reconstructing) {
1463 printf("raid%d: Unable to reconstruct to disk at:\n",
1464 raidPtr->raidid);
1465 printf("raid%d: Col: %d Reconstruction already occurring!\n", raidPtr->raidid, column);
1466
1467 rf_unlock_mutex2(raidPtr->mutex);
1468 return (EINVAL);
1469 }
1470 if (raidPtr->Disks[column].status == rf_ds_spared) {
1471 rf_unlock_mutex2(raidPtr->mutex);
1472 return (EINVAL);
1473 }
1474 rf_unlock_mutex2(raidPtr->mutex);
1475
1476 RF_Malloc(rrint, sizeof(*rrint), (struct rf_recon_req_internal *));
1477 if (rrint == NULL)
1478 return(ENOMEM);
1479
1480 rrint->col = column;
1481 rrint->raidPtr = raidPtr;
1482
1483 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1484 rf_ReconstructInPlaceThread,
1485 rrint, "raid_reconip");
1486 return(retcode);
1487
1488 case RAIDFRAME_GET_INFO:
1489 #ifdef COMPAT_NETBSD32
1490 #ifdef _LP64
1491 case RAIDFRAME_GET_INFO32:
1492 #endif
1493 #endif
1494 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1495 (RF_DeviceConfig_t *));
1496 if (d_cfg == NULL)
1497 return (ENOMEM);
1498 retcode = rf_get_info(raidPtr, d_cfg);
1499 if (retcode == 0) {
1500 #ifdef COMPAT_NETBSD32
1501 #ifdef _LP64
1502 if (cmd == RAIDFRAME_GET_INFO32)
1503 ucfgp = NETBSD32PTR64(*(netbsd32_pointer_t *)data);
1504 else
1505 #endif
1506 #endif
1507 ucfgp = *(RF_DeviceConfig_t **)data;
1508 retcode = copyout(d_cfg, ucfgp, sizeof(RF_DeviceConfig_t));
1509 }
1510 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1511
1512 return (retcode);
1513
1514 case RAIDFRAME_CHECK_PARITY:
1515 *(int *) data = raidPtr->parity_good;
1516 return (0);
1517
1518 case RAIDFRAME_PARITYMAP_STATUS:
1519 if (rf_paritymap_ineligible(raidPtr))
1520 return EINVAL;
1521 rf_paritymap_status(raidPtr->parity_map,
1522 (struct rf_pmstat *)data);
1523 return 0;
1524
1525 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1526 if (rf_paritymap_ineligible(raidPtr))
1527 return EINVAL;
1528 if (raidPtr->parity_map == NULL)
1529 return ENOENT; /* ??? */
1530 if (0 != rf_paritymap_set_params(raidPtr->parity_map,
1531 (struct rf_pmparams *)data, 1))
1532 return EINVAL;
1533 return 0;
1534
1535 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1536 if (rf_paritymap_ineligible(raidPtr))
1537 return EINVAL;
1538 *(int *) data = rf_paritymap_get_disable(raidPtr);
1539 return 0;
1540
1541 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1542 if (rf_paritymap_ineligible(raidPtr))
1543 return EINVAL;
1544 rf_paritymap_set_disable(raidPtr, *(int *)data);
1545 /* XXX should errors be passed up? */
1546 return 0;
1547
1548 case RAIDFRAME_RESET_ACCTOTALS:
1549 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1550 return (0);
1551
1552 case RAIDFRAME_GET_ACCTOTALS:
1553 totals = (RF_AccTotals_t *) data;
1554 *totals = raidPtr->acc_totals;
1555 return (0);
1556
1557 case RAIDFRAME_KEEP_ACCTOTALS:
1558 raidPtr->keep_acc_totals = *(int *)data;
1559 return (0);
1560
1561 case RAIDFRAME_GET_SIZE:
1562 *(int *) data = raidPtr->totalSectors;
1563 return (0);
1564
1565 /* fail a disk & optionally start reconstruction */
1566 case RAIDFRAME_FAIL_DISK:
1567 #ifdef COMPAT_80
1568 case RAIDFRAME_FAIL_DISK80:
1569 #endif
1570
1571 if (raidPtr->Layout.map->faultsTolerated == 0) {
1572 /* Can't do this on a RAID 0!! */
1573 return(EINVAL);
1574 }
1575
1576 rr = (struct rf_recon_req *) data;
1577 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1578 return (EINVAL);
1579
1580 rf_lock_mutex2(raidPtr->mutex);
1581 if (raidPtr->status == rf_rs_reconstructing) {
1582 /* you can't fail a disk while we're reconstructing! */
1583 /* XXX wrong for RAID6 */
1584 rf_unlock_mutex2(raidPtr->mutex);
1585 return (EINVAL);
1586 }
1587 if ((raidPtr->Disks[rr->col].status ==
1588 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1589 /* some other component has failed. Let's not make
1590 things worse. XXX wrong for RAID6 */
1591 rf_unlock_mutex2(raidPtr->mutex);
1592 return (EINVAL);
1593 }
1594 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1595 /* Can't fail a spared disk! */
1596 rf_unlock_mutex2(raidPtr->mutex);
1597 return (EINVAL);
1598 }
1599 rf_unlock_mutex2(raidPtr->mutex);
1600
1601 /* make a copy of the recon request so that we don't rely on
1602 * the user's buffer */
1603 RF_Malloc(rrint, sizeof(*rrint), (struct rf_recon_req_internal *));
1604 if (rrint == NULL)
1605 return(ENOMEM);
1606 rrint->col = rr->col;
1607 rrint->flags = rr->flags;
1608 rrint->raidPtr = raidPtr;
1609
1610 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1611 rf_ReconThread,
1612 rrint, "raid_recon");
1613 return (0);
1614
1615 /* invoke a copyback operation after recon on whatever disk
1616 * needs it, if any */
1617 case RAIDFRAME_COPYBACK:
1618
1619 if (raidPtr->Layout.map->faultsTolerated == 0) {
1620 /* This makes no sense on a RAID 0!! */
1621 return(EINVAL);
1622 }
1623
1624 if (raidPtr->copyback_in_progress == 1) {
1625 /* Copyback is already in progress! */
1626 return(EINVAL);
1627 }
1628
1629 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1630 rf_CopybackThread,
1631 raidPtr,"raid_copyback");
1632 return (retcode);
1633
1634 /* return the percentage completion of reconstruction */
1635 case RAIDFRAME_CHECK_RECON_STATUS:
1636 if (raidPtr->Layout.map->faultsTolerated == 0) {
1637 /* This makes no sense on a RAID 0, so tell the
1638 user it's done. */
1639 *(int *) data = 100;
1640 return(0);
1641 }
1642 if (raidPtr->status != rf_rs_reconstructing)
1643 *(int *) data = 100;
1644 else {
1645 if (raidPtr->reconControl->numRUsTotal > 0) {
1646 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1647 } else {
1648 *(int *) data = 0;
1649 }
1650 }
1651 return (0);
1652 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1653 rf_check_recon_status_ext(raidPtr, data);
1654 return (0);
1655
1656 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1657 if (raidPtr->Layout.map->faultsTolerated == 0) {
1658 /* This makes no sense on a RAID 0, so tell the
1659 user it's done. */
1660 *(int *) data = 100;
1661 return(0);
1662 }
1663 if (raidPtr->parity_rewrite_in_progress == 1) {
1664 *(int *) data = 100 *
1665 raidPtr->parity_rewrite_stripes_done /
1666 raidPtr->Layout.numStripe;
1667 } else {
1668 *(int *) data = 100;
1669 }
1670 return (0);
1671
1672 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1673 rf_check_parityrewrite_status_ext(raidPtr, data);
1674 return (0);
1675
1676 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1677 if (raidPtr->Layout.map->faultsTolerated == 0) {
1678 /* This makes no sense on a RAID 0 */
1679 *(int *) data = 100;
1680 return(0);
1681 }
1682 if (raidPtr->copyback_in_progress == 1) {
1683 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1684 raidPtr->Layout.numStripe;
1685 } else {
1686 *(int *) data = 100;
1687 }
1688 return (0);
1689
1690 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1691 rf_check_copyback_status_ext(raidPtr, data);
1692 return 0;
1693
1694 case RAIDFRAME_SET_LAST_UNIT:
1695 for (column = 0; column < raidPtr->numCol; column++)
1696 if (raidPtr->Disks[column].status != rf_ds_optimal)
1697 return EBUSY;
1698
1699 for (column = 0; column < raidPtr->numCol; column++) {
1700 clabel = raidget_component_label(raidPtr, column);
1701 clabel->last_unit = *(int *)data;
1702 raidflush_component_label(raidPtr, column);
1703 }
1704 rs->sc_cflags |= RAIDF_UNIT_CHANGED;
1705 return 0;
1706
1707 /* the sparetable daemon calls this to wait for the kernel to
1708 * need a spare table. this ioctl does not return until a
1709 * spare table is needed. XXX -- calling mpsleep here in the
1710 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1711 * -- I should either compute the spare table in the kernel,
1712 * or have a different -- XXX XXX -- interface (a different
1713 * character device) for delivering the table -- XXX */
1714 #if 0
1715 case RAIDFRAME_SPARET_WAIT:
1716 rf_lock_mutex2(rf_sparet_wait_mutex);
1717 while (!rf_sparet_wait_queue)
1718 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1719 waitreq = rf_sparet_wait_queue;
1720 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1721 rf_unlock_mutex2(rf_sparet_wait_mutex);
1722
1723 /* structure assignment */
1724 *((RF_SparetWait_t *) data) = *waitreq;
1725
1726 RF_Free(waitreq, sizeof(*waitreq));
1727 return (0);
1728
1729 /* wakes up a process waiting on SPARET_WAIT and puts an error
1730 * code in it that will cause the dameon to exit */
1731 case RAIDFRAME_ABORT_SPARET_WAIT:
1732 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1733 waitreq->fcol = -1;
1734 rf_lock_mutex2(rf_sparet_wait_mutex);
1735 waitreq->next = rf_sparet_wait_queue;
1736 rf_sparet_wait_queue = waitreq;
1737 rf_broadcast_conf2(rf_sparet_wait_cv);
1738 rf_unlock_mutex2(rf_sparet_wait_mutex);
1739 return (0);
1740
1741 /* used by the spare table daemon to deliver a spare table
1742 * into the kernel */
1743 case RAIDFRAME_SEND_SPARET:
1744
1745 /* install the spare table */
1746 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1747
1748 /* respond to the requestor. the return status of the spare
1749 * table installation is passed in the "fcol" field */
1750 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1751 waitreq->fcol = retcode;
1752 rf_lock_mutex2(rf_sparet_wait_mutex);
1753 waitreq->next = rf_sparet_resp_queue;
1754 rf_sparet_resp_queue = waitreq;
1755 rf_broadcast_cond2(rf_sparet_resp_cv);
1756 rf_unlock_mutex2(rf_sparet_wait_mutex);
1757
1758 return (retcode);
1759 #endif
1760
1761 default:
1762 break; /* fall through to the os-specific code below */
1763
1764 }
1765
1766 if (!raidPtr->valid)
1767 return (EINVAL);
1768
1769 /*
1770 * Add support for "regular" device ioctls here.
1771 */
1772
1773 switch (cmd) {
1774 case DIOCGCACHE:
1775 retcode = rf_get_component_caches(raidPtr, (int *)data);
1776 break;
1777
1778 case DIOCCACHESYNC:
1779 retcode = rf_sync_component_caches(raidPtr);
1780 break;
1781
1782 default:
1783 retcode = dk_ioctl(dksc, dev, cmd, data, flag, l);
1784 break;
1785 }
1786
1787 return (retcode);
1788
1789 }
1790
1791
1792 /* raidinit -- complete the rest of the initialization for the
1793 RAIDframe device. */
1794
1795
1796 static void
1797 raidinit(struct raid_softc *rs)
1798 {
1799 cfdata_t cf;
1800 unsigned int unit;
1801 struct dk_softc *dksc = &rs->sc_dksc;
1802 RF_Raid_t *raidPtr = &rs->sc_r;
1803 device_t dev;
1804
1805 unit = raidPtr->raidid;
1806
1807 /* XXX doesn't check bounds. */
1808 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%u", unit);
1809
1810 /* attach the pseudo device */
1811 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1812 cf->cf_name = raid_cd.cd_name;
1813 cf->cf_atname = raid_cd.cd_name;
1814 cf->cf_unit = unit;
1815 cf->cf_fstate = FSTATE_STAR;
1816
1817 dev = config_attach_pseudo(cf);
1818 if (dev == NULL) {
1819 printf("raid%d: config_attach_pseudo failed\n",
1820 raidPtr->raidid);
1821 free(cf, M_RAIDFRAME);
1822 return;
1823 }
1824
1825 /* provide a backpointer to the real softc */
1826 raidsoftc(dev) = rs;
1827
1828 /* disk_attach actually creates space for the CPU disklabel, among
1829 * other things, so it's critical to call this *BEFORE* we try putzing
1830 * with disklabels. */
1831 dk_init(dksc, dev, DKTYPE_RAID);
1832 disk_init(&dksc->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1833
1834 /* XXX There may be a weird interaction here between this, and
1835 * protectedSectors, as used in RAIDframe. */
1836
1837 rs->sc_size = raidPtr->totalSectors;
1838
1839 /* Attach dk and disk subsystems */
1840 dk_attach(dksc);
1841 disk_attach(&dksc->sc_dkdev);
1842 rf_set_geometry(rs, raidPtr);
1843
1844 bufq_alloc(&dksc->sc_bufq, "fcfs", BUFQ_SORT_RAWBLOCK);
1845
1846 /* mark unit as usuable */
1847 rs->sc_flags |= RAIDF_INITED;
1848
1849 dkwedge_discover(&dksc->sc_dkdev);
1850 }
1851
1852 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1853 /* wake up the daemon & tell it to get us a spare table
1854 * XXX
1855 * the entries in the queues should be tagged with the raidPtr
1856 * so that in the extremely rare case that two recons happen at once,
1857 * we know for which device were requesting a spare table
1858 * XXX
1859 *
1860 * XXX This code is not currently used. GO
1861 */
1862 int
1863 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1864 {
1865 int retcode;
1866
1867 rf_lock_mutex2(rf_sparet_wait_mutex);
1868 req->next = rf_sparet_wait_queue;
1869 rf_sparet_wait_queue = req;
1870 rf_broadcast_cond2(rf_sparet_wait_cv);
1871
1872 /* mpsleep unlocks the mutex */
1873 while (!rf_sparet_resp_queue) {
1874 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
1875 }
1876 req = rf_sparet_resp_queue;
1877 rf_sparet_resp_queue = req->next;
1878 rf_unlock_mutex2(rf_sparet_wait_mutex);
1879
1880 retcode = req->fcol;
1881 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1882 * alloc'd */
1883 return (retcode);
1884 }
1885 #endif
1886
1887 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1888 * bp & passes it down.
1889 * any calls originating in the kernel must use non-blocking I/O
1890 * do some extra sanity checking to return "appropriate" error values for
1891 * certain conditions (to make some standard utilities work)
1892 *
1893 * Formerly known as: rf_DoAccessKernel
1894 */
1895 void
1896 raidstart(RF_Raid_t *raidPtr)
1897 {
1898 struct raid_softc *rs;
1899 struct dk_softc *dksc;
1900
1901 rs = raidPtr->softc;
1902 dksc = &rs->sc_dksc;
1903 /* quick check to see if anything has died recently */
1904 rf_lock_mutex2(raidPtr->mutex);
1905 if (raidPtr->numNewFailures > 0) {
1906 rf_unlock_mutex2(raidPtr->mutex);
1907 rf_update_component_labels(raidPtr,
1908 RF_NORMAL_COMPONENT_UPDATE);
1909 rf_lock_mutex2(raidPtr->mutex);
1910 raidPtr->numNewFailures--;
1911 }
1912 rf_unlock_mutex2(raidPtr->mutex);
1913
1914 if ((rs->sc_flags & RAIDF_INITED) == 0) {
1915 printf("raid%d: raidstart not ready\n", raidPtr->raidid);
1916 return;
1917 }
1918
1919 dk_start(dksc, NULL);
1920 }
1921
1922 static int
1923 raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp)
1924 {
1925 RF_SectorCount_t num_blocks, pb, sum;
1926 RF_RaidAddr_t raid_addr;
1927 daddr_t blocknum;
1928 int do_async;
1929 int rc;
1930
1931 rf_lock_mutex2(raidPtr->mutex);
1932 if (raidPtr->openings == 0) {
1933 rf_unlock_mutex2(raidPtr->mutex);
1934 return EAGAIN;
1935 }
1936 rf_unlock_mutex2(raidPtr->mutex);
1937
1938 blocknum = bp->b_rawblkno;
1939
1940 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1941 (int) blocknum));
1942
1943 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1944 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1945
1946 /* *THIS* is where we adjust what block we're going to...
1947 * but DO NOT TOUCH bp->b_blkno!!! */
1948 raid_addr = blocknum;
1949
1950 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1951 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1952 sum = raid_addr + num_blocks + pb;
1953 if (1 || rf_debugKernelAccess) {
1954 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1955 (int) raid_addr, (int) sum, (int) num_blocks,
1956 (int) pb, (int) bp->b_resid));
1957 }
1958 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1959 || (sum < num_blocks) || (sum < pb)) {
1960 rc = ENOSPC;
1961 goto done;
1962 }
1963 /*
1964 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1965 */
1966
1967 if (bp->b_bcount & raidPtr->sectorMask) {
1968 rc = ENOSPC;
1969 goto done;
1970 }
1971 db1_printf(("Calling DoAccess..\n"));
1972
1973
1974 rf_lock_mutex2(raidPtr->mutex);
1975 raidPtr->openings--;
1976 rf_unlock_mutex2(raidPtr->mutex);
1977
1978 /*
1979 * Everything is async.
1980 */
1981 do_async = 1;
1982
1983 /* don't ever condition on bp->b_flags & B_WRITE.
1984 * always condition on B_READ instead */
1985
1986 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1987 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1988 do_async, raid_addr, num_blocks,
1989 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1990
1991 done:
1992 return rc;
1993 }
1994
1995 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1996
1997 int
1998 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
1999 {
2000 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2001 struct buf *bp;
2002
2003 req->queue = queue;
2004 bp = req->bp;
2005
2006 switch (req->type) {
2007 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2008 /* XXX need to do something extra here.. */
2009 /* I'm leaving this in, as I've never actually seen it used,
2010 * and I'd like folks to report it... GO */
2011 printf(("WAKEUP CALLED\n"));
2012 queue->numOutstanding++;
2013
2014 bp->b_flags = 0;
2015 bp->b_private = req;
2016
2017 KernelWakeupFunc(bp);
2018 break;
2019
2020 case RF_IO_TYPE_READ:
2021 case RF_IO_TYPE_WRITE:
2022 #if RF_ACC_TRACE > 0
2023 if (req->tracerec) {
2024 RF_ETIMER_START(req->tracerec->timer);
2025 }
2026 #endif
2027 InitBP(bp, queue->rf_cinfo->ci_vp,
2028 op, queue->rf_cinfo->ci_dev,
2029 req->sectorOffset, req->numSector,
2030 req->buf, KernelWakeupFunc, (void *) req,
2031 queue->raidPtr->logBytesPerSector, req->b_proc);
2032
2033 if (rf_debugKernelAccess) {
2034 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2035 (long) bp->b_blkno));
2036 }
2037 queue->numOutstanding++;
2038 queue->last_deq_sector = req->sectorOffset;
2039 /* acc wouldn't have been let in if there were any pending
2040 * reqs at any other priority */
2041 queue->curPriority = req->priority;
2042
2043 db1_printf(("Going for %c to unit %d col %d\n",
2044 req->type, queue->raidPtr->raidid,
2045 queue->col));
2046 db1_printf(("sector %d count %d (%d bytes) %d\n",
2047 (int) req->sectorOffset, (int) req->numSector,
2048 (int) (req->numSector <<
2049 queue->raidPtr->logBytesPerSector),
2050 (int) queue->raidPtr->logBytesPerSector));
2051
2052 /*
2053 * XXX: drop lock here since this can block at
2054 * least with backing SCSI devices. Retake it
2055 * to minimize fuss with calling interfaces.
2056 */
2057
2058 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2059 bdev_strategy(bp);
2060 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2061 break;
2062
2063 default:
2064 panic("bad req->type in rf_DispatchKernelIO");
2065 }
2066 db1_printf(("Exiting from DispatchKernelIO\n"));
2067
2068 return (0);
2069 }
2070 /* this is the callback function associated with a I/O invoked from
2071 kernel code.
2072 */
2073 static void
2074 KernelWakeupFunc(struct buf *bp)
2075 {
2076 RF_DiskQueueData_t *req = NULL;
2077 RF_DiskQueue_t *queue;
2078
2079 db1_printf(("recovering the request queue:\n"));
2080
2081 req = bp->b_private;
2082
2083 queue = (RF_DiskQueue_t *) req->queue;
2084
2085 rf_lock_mutex2(queue->raidPtr->iodone_lock);
2086
2087 #if RF_ACC_TRACE > 0
2088 if (req->tracerec) {
2089 RF_ETIMER_STOP(req->tracerec->timer);
2090 RF_ETIMER_EVAL(req->tracerec->timer);
2091 rf_lock_mutex2(rf_tracing_mutex);
2092 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2093 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2094 req->tracerec->num_phys_ios++;
2095 rf_unlock_mutex2(rf_tracing_mutex);
2096 }
2097 #endif
2098
2099 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2100 * ballistic, and mark the component as hosed... */
2101
2102 if (bp->b_error != 0) {
2103 /* Mark the disk as dead */
2104 /* but only mark it once... */
2105 /* and only if it wouldn't leave this RAID set
2106 completely broken */
2107 if (((queue->raidPtr->Disks[queue->col].status ==
2108 rf_ds_optimal) ||
2109 (queue->raidPtr->Disks[queue->col].status ==
2110 rf_ds_used_spare)) &&
2111 (queue->raidPtr->numFailures <
2112 queue->raidPtr->Layout.map->faultsTolerated)) {
2113 printf("raid%d: IO Error (%d). Marking %s as failed.\n",
2114 queue->raidPtr->raidid,
2115 bp->b_error,
2116 queue->raidPtr->Disks[queue->col].devname);
2117 queue->raidPtr->Disks[queue->col].status =
2118 rf_ds_failed;
2119 queue->raidPtr->status = rf_rs_degraded;
2120 queue->raidPtr->numFailures++;
2121 queue->raidPtr->numNewFailures++;
2122 } else { /* Disk is already dead... */
2123 /* printf("Disk already marked as dead!\n"); */
2124 }
2125
2126 }
2127
2128 /* Fill in the error value */
2129 req->error = bp->b_error;
2130
2131 /* Drop this one on the "finished" queue... */
2132 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2133
2134 /* Let the raidio thread know there is work to be done. */
2135 rf_signal_cond2(queue->raidPtr->iodone_cv);
2136
2137 rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2138 }
2139
2140
2141 /*
2142 * initialize a buf structure for doing an I/O in the kernel.
2143 */
2144 static void
2145 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2146 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2147 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2148 struct proc *b_proc)
2149 {
2150 /* bp->b_flags = B_PHYS | rw_flag; */
2151 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */
2152 bp->b_oflags = 0;
2153 bp->b_cflags = 0;
2154 bp->b_bcount = numSect << logBytesPerSector;
2155 bp->b_bufsize = bp->b_bcount;
2156 bp->b_error = 0;
2157 bp->b_dev = dev;
2158 bp->b_data = bf;
2159 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2160 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2161 if (bp->b_bcount == 0) {
2162 panic("bp->b_bcount is zero in InitBP!!");
2163 }
2164 bp->b_proc = b_proc;
2165 bp->b_iodone = cbFunc;
2166 bp->b_private = cbArg;
2167 }
2168
2169 /*
2170 * Wait interruptibly for an exclusive lock.
2171 *
2172 * XXX
2173 * Several drivers do this; it should be abstracted and made MP-safe.
2174 * (Hmm... where have we seen this warning before :-> GO )
2175 */
2176 static int
2177 raidlock(struct raid_softc *rs)
2178 {
2179 int error;
2180
2181 error = 0;
2182 mutex_enter(&rs->sc_mutex);
2183 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2184 rs->sc_flags |= RAIDF_WANTED;
2185 error = cv_wait_sig(&rs->sc_cv, &rs->sc_mutex);
2186 if (error != 0)
2187 goto done;
2188 }
2189 rs->sc_flags |= RAIDF_LOCKED;
2190 done:
2191 mutex_exit(&rs->sc_mutex);
2192 return (error);
2193 }
2194 /*
2195 * Unlock and wake up any waiters.
2196 */
2197 static void
2198 raidunlock(struct raid_softc *rs)
2199 {
2200
2201 mutex_enter(&rs->sc_mutex);
2202 rs->sc_flags &= ~RAIDF_LOCKED;
2203 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2204 rs->sc_flags &= ~RAIDF_WANTED;
2205 cv_broadcast(&rs->sc_cv);
2206 }
2207 mutex_exit(&rs->sc_mutex);
2208 }
2209
2210
2211 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2212 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2213 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2214
2215 static daddr_t
2216 rf_component_info_offset(void)
2217 {
2218
2219 return RF_COMPONENT_INFO_OFFSET;
2220 }
2221
2222 static daddr_t
2223 rf_component_info_size(unsigned secsize)
2224 {
2225 daddr_t info_size;
2226
2227 KASSERT(secsize);
2228 if (secsize > RF_COMPONENT_INFO_SIZE)
2229 info_size = secsize;
2230 else
2231 info_size = RF_COMPONENT_INFO_SIZE;
2232
2233 return info_size;
2234 }
2235
2236 static daddr_t
2237 rf_parity_map_offset(RF_Raid_t *raidPtr)
2238 {
2239 daddr_t map_offset;
2240
2241 KASSERT(raidPtr->bytesPerSector);
2242 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2243 map_offset = raidPtr->bytesPerSector;
2244 else
2245 map_offset = RF_COMPONENT_INFO_SIZE;
2246 map_offset += rf_component_info_offset();
2247
2248 return map_offset;
2249 }
2250
2251 static daddr_t
2252 rf_parity_map_size(RF_Raid_t *raidPtr)
2253 {
2254 daddr_t map_size;
2255
2256 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2257 map_size = raidPtr->bytesPerSector;
2258 else
2259 map_size = RF_PARITY_MAP_SIZE;
2260
2261 return map_size;
2262 }
2263
2264 int
2265 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2266 {
2267 RF_ComponentLabel_t *clabel;
2268
2269 clabel = raidget_component_label(raidPtr, col);
2270 clabel->clean = RF_RAID_CLEAN;
2271 raidflush_component_label(raidPtr, col);
2272 return(0);
2273 }
2274
2275
2276 int
2277 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2278 {
2279 RF_ComponentLabel_t *clabel;
2280
2281 clabel = raidget_component_label(raidPtr, col);
2282 clabel->clean = RF_RAID_DIRTY;
2283 raidflush_component_label(raidPtr, col);
2284 return(0);
2285 }
2286
2287 int
2288 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2289 {
2290 KASSERT(raidPtr->bytesPerSector);
2291 return raidread_component_label(raidPtr->bytesPerSector,
2292 raidPtr->Disks[col].dev,
2293 raidPtr->raid_cinfo[col].ci_vp,
2294 &raidPtr->raid_cinfo[col].ci_label);
2295 }
2296
2297 RF_ComponentLabel_t *
2298 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2299 {
2300 return &raidPtr->raid_cinfo[col].ci_label;
2301 }
2302
2303 int
2304 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2305 {
2306 RF_ComponentLabel_t *label;
2307
2308 label = &raidPtr->raid_cinfo[col].ci_label;
2309 label->mod_counter = raidPtr->mod_counter;
2310 #ifndef RF_NO_PARITY_MAP
2311 label->parity_map_modcount = label->mod_counter;
2312 #endif
2313 return raidwrite_component_label(raidPtr->bytesPerSector,
2314 raidPtr->Disks[col].dev,
2315 raidPtr->raid_cinfo[col].ci_vp, label);
2316 }
2317
2318
2319 static int
2320 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2321 RF_ComponentLabel_t *clabel)
2322 {
2323 return raidread_component_area(dev, b_vp, clabel,
2324 sizeof(RF_ComponentLabel_t),
2325 rf_component_info_offset(),
2326 rf_component_info_size(secsize));
2327 }
2328
2329 /* ARGSUSED */
2330 static int
2331 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2332 size_t msize, daddr_t offset, daddr_t dsize)
2333 {
2334 struct buf *bp;
2335 int error;
2336
2337 /* XXX should probably ensure that we don't try to do this if
2338 someone has changed rf_protected_sectors. */
2339
2340 if (b_vp == NULL) {
2341 /* For whatever reason, this component is not valid.
2342 Don't try to read a component label from it. */
2343 return(EINVAL);
2344 }
2345
2346 /* get a block of the appropriate size... */
2347 bp = geteblk((int)dsize);
2348 bp->b_dev = dev;
2349
2350 /* get our ducks in a row for the read */
2351 bp->b_blkno = offset / DEV_BSIZE;
2352 bp->b_bcount = dsize;
2353 bp->b_flags |= B_READ;
2354 bp->b_resid = dsize;
2355
2356 bdev_strategy(bp);
2357 error = biowait(bp);
2358
2359 if (!error) {
2360 memcpy(data, bp->b_data, msize);
2361 }
2362
2363 brelse(bp, 0);
2364 return(error);
2365 }
2366
2367
2368 static int
2369 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2370 RF_ComponentLabel_t *clabel)
2371 {
2372 return raidwrite_component_area(dev, b_vp, clabel,
2373 sizeof(RF_ComponentLabel_t),
2374 rf_component_info_offset(),
2375 rf_component_info_size(secsize), 0);
2376 }
2377
2378 /* ARGSUSED */
2379 static int
2380 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2381 size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2382 {
2383 struct buf *bp;
2384 int error;
2385
2386 /* get a block of the appropriate size... */
2387 bp = geteblk((int)dsize);
2388 bp->b_dev = dev;
2389
2390 /* get our ducks in a row for the write */
2391 bp->b_blkno = offset / DEV_BSIZE;
2392 bp->b_bcount = dsize;
2393 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2394 bp->b_resid = dsize;
2395
2396 memset(bp->b_data, 0, dsize);
2397 memcpy(bp->b_data, data, msize);
2398
2399 bdev_strategy(bp);
2400 if (asyncp)
2401 return 0;
2402 error = biowait(bp);
2403 brelse(bp, 0);
2404 if (error) {
2405 #if 1
2406 printf("Failed to write RAID component info!\n");
2407 #endif
2408 }
2409
2410 return(error);
2411 }
2412
2413 void
2414 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2415 {
2416 int c;
2417
2418 for (c = 0; c < raidPtr->numCol; c++) {
2419 /* Skip dead disks. */
2420 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2421 continue;
2422 /* XXXjld: what if an error occurs here? */
2423 raidwrite_component_area(raidPtr->Disks[c].dev,
2424 raidPtr->raid_cinfo[c].ci_vp, map,
2425 RF_PARITYMAP_NBYTE,
2426 rf_parity_map_offset(raidPtr),
2427 rf_parity_map_size(raidPtr), 0);
2428 }
2429 }
2430
2431 void
2432 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2433 {
2434 struct rf_paritymap_ondisk tmp;
2435 int c,first;
2436
2437 first=1;
2438 for (c = 0; c < raidPtr->numCol; c++) {
2439 /* Skip dead disks. */
2440 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2441 continue;
2442 raidread_component_area(raidPtr->Disks[c].dev,
2443 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2444 RF_PARITYMAP_NBYTE,
2445 rf_parity_map_offset(raidPtr),
2446 rf_parity_map_size(raidPtr));
2447 if (first) {
2448 memcpy(map, &tmp, sizeof(*map));
2449 first = 0;
2450 } else {
2451 rf_paritymap_merge(map, &tmp);
2452 }
2453 }
2454 }
2455
2456 void
2457 rf_markalldirty(RF_Raid_t *raidPtr)
2458 {
2459 RF_ComponentLabel_t *clabel;
2460 int sparecol;
2461 int c;
2462 int j;
2463 int scol = -1;
2464
2465 raidPtr->mod_counter++;
2466 for (c = 0; c < raidPtr->numCol; c++) {
2467 /* we don't want to touch (at all) a disk that has
2468 failed */
2469 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2470 clabel = raidget_component_label(raidPtr, c);
2471 if (clabel->status == rf_ds_spared) {
2472 /* XXX do something special...
2473 but whatever you do, don't
2474 try to access it!! */
2475 } else {
2476 raidmarkdirty(raidPtr, c);
2477 }
2478 }
2479 }
2480
2481 for( c = 0; c < raidPtr->numSpare ; c++) {
2482 sparecol = raidPtr->numCol + c;
2483 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2484 /*
2485
2486 we claim this disk is "optimal" if it's
2487 rf_ds_used_spare, as that means it should be
2488 directly substitutable for the disk it replaced.
2489 We note that too...
2490
2491 */
2492
2493 for(j=0;j<raidPtr->numCol;j++) {
2494 if (raidPtr->Disks[j].spareCol == sparecol) {
2495 scol = j;
2496 break;
2497 }
2498 }
2499
2500 clabel = raidget_component_label(raidPtr, sparecol);
2501 /* make sure status is noted */
2502
2503 raid_init_component_label(raidPtr, clabel);
2504
2505 clabel->row = 0;
2506 clabel->column = scol;
2507 /* Note: we *don't* change status from rf_ds_used_spare
2508 to rf_ds_optimal */
2509 /* clabel.status = rf_ds_optimal; */
2510
2511 raidmarkdirty(raidPtr, sparecol);
2512 }
2513 }
2514 }
2515
2516
2517 void
2518 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2519 {
2520 RF_ComponentLabel_t *clabel;
2521 int sparecol;
2522 int c;
2523 int j;
2524 int scol;
2525 struct raid_softc *rs = raidPtr->softc;
2526
2527 scol = -1;
2528
2529 /* XXX should do extra checks to make sure things really are clean,
2530 rather than blindly setting the clean bit... */
2531
2532 raidPtr->mod_counter++;
2533
2534 for (c = 0; c < raidPtr->numCol; c++) {
2535 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2536 clabel = raidget_component_label(raidPtr, c);
2537 /* make sure status is noted */
2538 clabel->status = rf_ds_optimal;
2539
2540 /* note what unit we are configured as */
2541 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2542 clabel->last_unit = raidPtr->raidid;
2543
2544 raidflush_component_label(raidPtr, c);
2545 if (final == RF_FINAL_COMPONENT_UPDATE) {
2546 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2547 raidmarkclean(raidPtr, c);
2548 }
2549 }
2550 }
2551 /* else we don't touch it.. */
2552 }
2553
2554 for( c = 0; c < raidPtr->numSpare ; c++) {
2555 sparecol = raidPtr->numCol + c;
2556 /* Need to ensure that the reconstruct actually completed! */
2557 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2558 /*
2559
2560 we claim this disk is "optimal" if it's
2561 rf_ds_used_spare, as that means it should be
2562 directly substitutable for the disk it replaced.
2563 We note that too...
2564
2565 */
2566
2567 for(j=0;j<raidPtr->numCol;j++) {
2568 if (raidPtr->Disks[j].spareCol == sparecol) {
2569 scol = j;
2570 break;
2571 }
2572 }
2573
2574 /* XXX shouldn't *really* need this... */
2575 clabel = raidget_component_label(raidPtr, sparecol);
2576 /* make sure status is noted */
2577
2578 raid_init_component_label(raidPtr, clabel);
2579
2580 clabel->column = scol;
2581 clabel->status = rf_ds_optimal;
2582 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2583 clabel->last_unit = raidPtr->raidid;
2584
2585 raidflush_component_label(raidPtr, sparecol);
2586 if (final == RF_FINAL_COMPONENT_UPDATE) {
2587 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2588 raidmarkclean(raidPtr, sparecol);
2589 }
2590 }
2591 }
2592 }
2593 }
2594
2595 void
2596 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2597 {
2598
2599 if (vp != NULL) {
2600 if (auto_configured == 1) {
2601 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2602 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2603 vput(vp);
2604
2605 } else {
2606 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2607 }
2608 }
2609 }
2610
2611
2612 void
2613 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2614 {
2615 int r,c;
2616 struct vnode *vp;
2617 int acd;
2618
2619
2620 /* We take this opportunity to close the vnodes like we should.. */
2621
2622 for (c = 0; c < raidPtr->numCol; c++) {
2623 vp = raidPtr->raid_cinfo[c].ci_vp;
2624 acd = raidPtr->Disks[c].auto_configured;
2625 rf_close_component(raidPtr, vp, acd);
2626 raidPtr->raid_cinfo[c].ci_vp = NULL;
2627 raidPtr->Disks[c].auto_configured = 0;
2628 }
2629
2630 for (r = 0; r < raidPtr->numSpare; r++) {
2631 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2632 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2633 rf_close_component(raidPtr, vp, acd);
2634 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2635 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2636 }
2637 }
2638
2639
2640 void
2641 rf_ReconThread(struct rf_recon_req_internal *req)
2642 {
2643 int s;
2644 RF_Raid_t *raidPtr;
2645
2646 s = splbio();
2647 raidPtr = (RF_Raid_t *) req->raidPtr;
2648 raidPtr->recon_in_progress = 1;
2649
2650 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2651 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2652
2653 RF_Free(req, sizeof(*req));
2654
2655 raidPtr->recon_in_progress = 0;
2656 splx(s);
2657
2658 /* That's all... */
2659 kthread_exit(0); /* does not return */
2660 }
2661
2662 void
2663 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2664 {
2665 int retcode;
2666 int s;
2667
2668 raidPtr->parity_rewrite_stripes_done = 0;
2669 raidPtr->parity_rewrite_in_progress = 1;
2670 s = splbio();
2671 retcode = rf_RewriteParity(raidPtr);
2672 splx(s);
2673 if (retcode) {
2674 printf("raid%d: Error re-writing parity (%d)!\n",
2675 raidPtr->raidid, retcode);
2676 } else {
2677 /* set the clean bit! If we shutdown correctly,
2678 the clean bit on each component label will get
2679 set */
2680 raidPtr->parity_good = RF_RAID_CLEAN;
2681 }
2682 raidPtr->parity_rewrite_in_progress = 0;
2683
2684 /* Anyone waiting for us to stop? If so, inform them... */
2685 if (raidPtr->waitShutdown) {
2686 rf_lock_mutex2(raidPtr->rad_lock);
2687 cv_broadcast(&raidPtr->parity_rewrite_cv);
2688 rf_unlock_mutex2(raidPtr->rad_lock);
2689 }
2690
2691 /* That's all... */
2692 kthread_exit(0); /* does not return */
2693 }
2694
2695
2696 void
2697 rf_CopybackThread(RF_Raid_t *raidPtr)
2698 {
2699 int s;
2700
2701 raidPtr->copyback_in_progress = 1;
2702 s = splbio();
2703 rf_CopybackReconstructedData(raidPtr);
2704 splx(s);
2705 raidPtr->copyback_in_progress = 0;
2706
2707 /* That's all... */
2708 kthread_exit(0); /* does not return */
2709 }
2710
2711
2712 void
2713 rf_ReconstructInPlaceThread(struct rf_recon_req_internal *req)
2714 {
2715 int s;
2716 RF_Raid_t *raidPtr;
2717
2718 s = splbio();
2719 raidPtr = req->raidPtr;
2720 raidPtr->recon_in_progress = 1;
2721 rf_ReconstructInPlace(raidPtr, req->col);
2722 RF_Free(req, sizeof(*req));
2723 raidPtr->recon_in_progress = 0;
2724 splx(s);
2725
2726 /* That's all... */
2727 kthread_exit(0); /* does not return */
2728 }
2729
2730 static RF_AutoConfig_t *
2731 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
2732 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
2733 unsigned secsize)
2734 {
2735 int good_one = 0;
2736 RF_ComponentLabel_t *clabel;
2737 RF_AutoConfig_t *ac;
2738
2739 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
2740 if (clabel == NULL) {
2741 oomem:
2742 while(ac_list) {
2743 ac = ac_list;
2744 if (ac->clabel)
2745 free(ac->clabel, M_RAIDFRAME);
2746 ac_list = ac_list->next;
2747 free(ac, M_RAIDFRAME);
2748 }
2749 printf("RAID auto config: out of memory!\n");
2750 return NULL; /* XXX probably should panic? */
2751 }
2752
2753 if (!raidread_component_label(secsize, dev, vp, clabel)) {
2754 /* Got the label. Does it look reasonable? */
2755 if (rf_reasonable_label(clabel, numsecs) &&
2756 (rf_component_label_partitionsize(clabel) <= size)) {
2757 #ifdef DEBUG
2758 printf("Component on: %s: %llu\n",
2759 cname, (unsigned long long)size);
2760 rf_print_component_label(clabel);
2761 #endif
2762 /* if it's reasonable, add it, else ignore it. */
2763 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
2764 M_NOWAIT);
2765 if (ac == NULL) {
2766 free(clabel, M_RAIDFRAME);
2767 goto oomem;
2768 }
2769 strlcpy(ac->devname, cname, sizeof(ac->devname));
2770 ac->dev = dev;
2771 ac->vp = vp;
2772 ac->clabel = clabel;
2773 ac->next = ac_list;
2774 ac_list = ac;
2775 good_one = 1;
2776 }
2777 }
2778 if (!good_one) {
2779 /* cleanup */
2780 free(clabel, M_RAIDFRAME);
2781 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2782 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2783 vput(vp);
2784 }
2785 return ac_list;
2786 }
2787
2788 RF_AutoConfig_t *
2789 rf_find_raid_components(void)
2790 {
2791 struct vnode *vp;
2792 struct disklabel label;
2793 device_t dv;
2794 deviter_t di;
2795 dev_t dev;
2796 int bmajor, bminor, wedge, rf_part_found;
2797 int error;
2798 int i;
2799 RF_AutoConfig_t *ac_list;
2800 uint64_t numsecs;
2801 unsigned secsize;
2802 int dowedges;
2803
2804 /* initialize the AutoConfig list */
2805 ac_list = NULL;
2806
2807 /*
2808 * we begin by trolling through *all* the devices on the system *twice*
2809 * first we scan for wedges, second for other devices. This avoids
2810 * using a raw partition instead of a wedge that covers the whole disk
2811 */
2812
2813 for (dowedges=1; dowedges>=0; --dowedges) {
2814 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
2815 dv = deviter_next(&di)) {
2816
2817 /* we are only interested in disks... */
2818 if (device_class(dv) != DV_DISK)
2819 continue;
2820
2821 /* we don't care about floppies... */
2822 if (device_is_a(dv, "fd")) {
2823 continue;
2824 }
2825
2826 /* we don't care about CD's... */
2827 if (device_is_a(dv, "cd")) {
2828 continue;
2829 }
2830
2831 /* we don't care about md's... */
2832 if (device_is_a(dv, "md")) {
2833 continue;
2834 }
2835
2836 /* hdfd is the Atari/Hades floppy driver */
2837 if (device_is_a(dv, "hdfd")) {
2838 continue;
2839 }
2840
2841 /* fdisa is the Atari/Milan floppy driver */
2842 if (device_is_a(dv, "fdisa")) {
2843 continue;
2844 }
2845
2846 /* are we in the wedges pass ? */
2847 wedge = device_is_a(dv, "dk");
2848 if (wedge != dowedges) {
2849 continue;
2850 }
2851
2852 /* need to find the device_name_to_block_device_major stuff */
2853 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
2854
2855 rf_part_found = 0; /*No raid partition as yet*/
2856
2857 /* get a vnode for the raw partition of this disk */
2858 bminor = minor(device_unit(dv));
2859 dev = wedge ? makedev(bmajor, bminor) :
2860 MAKEDISKDEV(bmajor, bminor, RAW_PART);
2861 if (bdevvp(dev, &vp))
2862 panic("RAID can't alloc vnode");
2863
2864 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
2865
2866 if (error) {
2867 /* "Who cares." Continue looking
2868 for something that exists*/
2869 vput(vp);
2870 continue;
2871 }
2872
2873 error = getdisksize(vp, &numsecs, &secsize);
2874 if (error) {
2875 /*
2876 * Pseudo devices like vnd and cgd can be
2877 * opened but may still need some configuration.
2878 * Ignore these quietly.
2879 */
2880 if (error != ENXIO)
2881 printf("RAIDframe: can't get disk size"
2882 " for dev %s (%d)\n",
2883 device_xname(dv), error);
2884 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2885 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2886 vput(vp);
2887 continue;
2888 }
2889 if (wedge) {
2890 struct dkwedge_info dkw;
2891 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
2892 NOCRED);
2893 if (error) {
2894 printf("RAIDframe: can't get wedge info for "
2895 "dev %s (%d)\n", device_xname(dv), error);
2896 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2897 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2898 vput(vp);
2899 continue;
2900 }
2901
2902 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
2903 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2904 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2905 vput(vp);
2906 continue;
2907 }
2908
2909 ac_list = rf_get_component(ac_list, dev, vp,
2910 device_xname(dv), dkw.dkw_size, numsecs, secsize);
2911 rf_part_found = 1; /*There is a raid component on this disk*/
2912 continue;
2913 }
2914
2915 /* Ok, the disk exists. Go get the disklabel. */
2916 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
2917 if (error) {
2918 /*
2919 * XXX can't happen - open() would
2920 * have errored out (or faked up one)
2921 */
2922 if (error != ENOTTY)
2923 printf("RAIDframe: can't get label for dev "
2924 "%s (%d)\n", device_xname(dv), error);
2925 }
2926
2927 /* don't need this any more. We'll allocate it again
2928 a little later if we really do... */
2929 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2930 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2931 vput(vp);
2932
2933 if (error)
2934 continue;
2935
2936 rf_part_found = 0; /*No raid partitions yet*/
2937 for (i = 0; i < label.d_npartitions; i++) {
2938 char cname[sizeof(ac_list->devname)];
2939
2940 /* We only support partitions marked as RAID */
2941 if (label.d_partitions[i].p_fstype != FS_RAID)
2942 continue;
2943
2944 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
2945 if (bdevvp(dev, &vp))
2946 panic("RAID can't alloc vnode");
2947
2948 error = VOP_OPEN(vp, FREAD, NOCRED);
2949 if (error) {
2950 /* Whatever... */
2951 vput(vp);
2952 continue;
2953 }
2954 snprintf(cname, sizeof(cname), "%s%c",
2955 device_xname(dv), 'a' + i);
2956 ac_list = rf_get_component(ac_list, dev, vp, cname,
2957 label.d_partitions[i].p_size, numsecs, secsize);
2958 rf_part_found = 1; /*There is at least one raid partition on this disk*/
2959 }
2960
2961 /*
2962 *If there is no raid component on this disk, either in a
2963 *disklabel or inside a wedge, check the raw partition as well,
2964 *as it is possible to configure raid components on raw disk
2965 *devices.
2966 */
2967
2968 if (!rf_part_found) {
2969 char cname[sizeof(ac_list->devname)];
2970
2971 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
2972 if (bdevvp(dev, &vp))
2973 panic("RAID can't alloc vnode");
2974
2975 error = VOP_OPEN(vp, FREAD, NOCRED);
2976 if (error) {
2977 /* Whatever... */
2978 vput(vp);
2979 continue;
2980 }
2981 snprintf(cname, sizeof(cname), "%s%c",
2982 device_xname(dv), 'a' + RAW_PART);
2983 ac_list = rf_get_component(ac_list, dev, vp, cname,
2984 label.d_partitions[RAW_PART].p_size, numsecs, secsize);
2985 }
2986 }
2987 deviter_release(&di);
2988 }
2989 return ac_list;
2990 }
2991
2992
2993 int
2994 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
2995 {
2996
2997 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2998 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2999 ((clabel->clean == RF_RAID_CLEAN) ||
3000 (clabel->clean == RF_RAID_DIRTY)) &&
3001 clabel->row >=0 &&
3002 clabel->column >= 0 &&
3003 clabel->num_rows > 0 &&
3004 clabel->num_columns > 0 &&
3005 clabel->row < clabel->num_rows &&
3006 clabel->column < clabel->num_columns &&
3007 clabel->blockSize > 0 &&
3008 /*
3009 * numBlocksHi may contain garbage, but it is ok since
3010 * the type is unsigned. If it is really garbage,
3011 * rf_fix_old_label_size() will fix it.
3012 */
3013 rf_component_label_numblocks(clabel) > 0) {
3014 /*
3015 * label looks reasonable enough...
3016 * let's make sure it has no old garbage.
3017 */
3018 if (numsecs)
3019 rf_fix_old_label_size(clabel, numsecs);
3020 return(1);
3021 }
3022 return(0);
3023 }
3024
3025
3026 /*
3027 * For reasons yet unknown, some old component labels have garbage in
3028 * the newer numBlocksHi region, and this causes lossage. Since those
3029 * disks will also have numsecs set to less than 32 bits of sectors,
3030 * we can determine when this corruption has occurred, and fix it.
3031 *
3032 * The exact same problem, with the same unknown reason, happens to
3033 * the partitionSizeHi member as well.
3034 */
3035 static void
3036 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3037 {
3038
3039 if (numsecs < ((uint64_t)1 << 32)) {
3040 if (clabel->numBlocksHi) {
3041 printf("WARNING: total sectors < 32 bits, yet "
3042 "numBlocksHi set\n"
3043 "WARNING: resetting numBlocksHi to zero.\n");
3044 clabel->numBlocksHi = 0;
3045 }
3046
3047 if (clabel->partitionSizeHi) {
3048 printf("WARNING: total sectors < 32 bits, yet "
3049 "partitionSizeHi set\n"
3050 "WARNING: resetting partitionSizeHi to zero.\n");
3051 clabel->partitionSizeHi = 0;
3052 }
3053 }
3054 }
3055
3056
3057 #ifdef DEBUG
3058 void
3059 rf_print_component_label(RF_ComponentLabel_t *clabel)
3060 {
3061 uint64_t numBlocks;
3062 static const char *rp[] = {
3063 "No", "Force", "Soft", "*invalid*"
3064 };
3065
3066
3067 numBlocks = rf_component_label_numblocks(clabel);
3068
3069 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3070 clabel->row, clabel->column,
3071 clabel->num_rows, clabel->num_columns);
3072 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3073 clabel->version, clabel->serial_number,
3074 clabel->mod_counter);
3075 printf(" Clean: %s Status: %d\n",
3076 clabel->clean ? "Yes" : "No", clabel->status);
3077 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3078 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3079 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3080 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3081 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3082 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]);
3083 printf(" Last configured as: raid%d\n", clabel->last_unit);
3084 #if 0
3085 printf(" Config order: %d\n", clabel->config_order);
3086 #endif
3087
3088 }
3089 #endif
3090
3091 RF_ConfigSet_t *
3092 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3093 {
3094 RF_AutoConfig_t *ac;
3095 RF_ConfigSet_t *config_sets;
3096 RF_ConfigSet_t *cset;
3097 RF_AutoConfig_t *ac_next;
3098
3099
3100 config_sets = NULL;
3101
3102 /* Go through the AutoConfig list, and figure out which components
3103 belong to what sets. */
3104 ac = ac_list;
3105 while(ac!=NULL) {
3106 /* we're going to putz with ac->next, so save it here
3107 for use at the end of the loop */
3108 ac_next = ac->next;
3109
3110 if (config_sets == NULL) {
3111 /* will need at least this one... */
3112 config_sets = (RF_ConfigSet_t *)
3113 malloc(sizeof(RF_ConfigSet_t),
3114 M_RAIDFRAME, M_NOWAIT);
3115 if (config_sets == NULL) {
3116 panic("rf_create_auto_sets: No memory!");
3117 }
3118 /* this one is easy :) */
3119 config_sets->ac = ac;
3120 config_sets->next = NULL;
3121 config_sets->rootable = 0;
3122 ac->next = NULL;
3123 } else {
3124 /* which set does this component fit into? */
3125 cset = config_sets;
3126 while(cset!=NULL) {
3127 if (rf_does_it_fit(cset, ac)) {
3128 /* looks like it matches... */
3129 ac->next = cset->ac;
3130 cset->ac = ac;
3131 break;
3132 }
3133 cset = cset->next;
3134 }
3135 if (cset==NULL) {
3136 /* didn't find a match above... new set..*/
3137 cset = (RF_ConfigSet_t *)
3138 malloc(sizeof(RF_ConfigSet_t),
3139 M_RAIDFRAME, M_NOWAIT);
3140 if (cset == NULL) {
3141 panic("rf_create_auto_sets: No memory!");
3142 }
3143 cset->ac = ac;
3144 ac->next = NULL;
3145 cset->next = config_sets;
3146 cset->rootable = 0;
3147 config_sets = cset;
3148 }
3149 }
3150 ac = ac_next;
3151 }
3152
3153
3154 return(config_sets);
3155 }
3156
3157 static int
3158 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3159 {
3160 RF_ComponentLabel_t *clabel1, *clabel2;
3161
3162 /* If this one matches the *first* one in the set, that's good
3163 enough, since the other members of the set would have been
3164 through here too... */
3165 /* note that we are not checking partitionSize here..
3166
3167 Note that we are also not checking the mod_counters here.
3168 If everything else matches except the mod_counter, that's
3169 good enough for this test. We will deal with the mod_counters
3170 a little later in the autoconfiguration process.
3171
3172 (clabel1->mod_counter == clabel2->mod_counter) &&
3173
3174 The reason we don't check for this is that failed disks
3175 will have lower modification counts. If those disks are
3176 not added to the set they used to belong to, then they will
3177 form their own set, which may result in 2 different sets,
3178 for example, competing to be configured at raid0, and
3179 perhaps competing to be the root filesystem set. If the
3180 wrong ones get configured, or both attempt to become /,
3181 weird behaviour and or serious lossage will occur. Thus we
3182 need to bring them into the fold here, and kick them out at
3183 a later point.
3184
3185 */
3186
3187 clabel1 = cset->ac->clabel;
3188 clabel2 = ac->clabel;
3189 if ((clabel1->version == clabel2->version) &&
3190 (clabel1->serial_number == clabel2->serial_number) &&
3191 (clabel1->num_rows == clabel2->num_rows) &&
3192 (clabel1->num_columns == clabel2->num_columns) &&
3193 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3194 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3195 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3196 (clabel1->parityConfig == clabel2->parityConfig) &&
3197 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3198 (clabel1->blockSize == clabel2->blockSize) &&
3199 rf_component_label_numblocks(clabel1) ==
3200 rf_component_label_numblocks(clabel2) &&
3201 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3202 (clabel1->root_partition == clabel2->root_partition) &&
3203 (clabel1->last_unit == clabel2->last_unit) &&
3204 (clabel1->config_order == clabel2->config_order)) {
3205 /* if it get's here, it almost *has* to be a match */
3206 } else {
3207 /* it's not consistent with somebody in the set..
3208 punt */
3209 return(0);
3210 }
3211 /* all was fine.. it must fit... */
3212 return(1);
3213 }
3214
3215 int
3216 rf_have_enough_components(RF_ConfigSet_t *cset)
3217 {
3218 RF_AutoConfig_t *ac;
3219 RF_AutoConfig_t *auto_config;
3220 RF_ComponentLabel_t *clabel;
3221 int c;
3222 int num_cols;
3223 int num_missing;
3224 int mod_counter;
3225 int mod_counter_found;
3226 int even_pair_failed;
3227 char parity_type;
3228
3229
3230 /* check to see that we have enough 'live' components
3231 of this set. If so, we can configure it if necessary */
3232
3233 num_cols = cset->ac->clabel->num_columns;
3234 parity_type = cset->ac->clabel->parityConfig;
3235
3236 /* XXX Check for duplicate components!?!?!? */
3237
3238 /* Determine what the mod_counter is supposed to be for this set. */
3239
3240 mod_counter_found = 0;
3241 mod_counter = 0;
3242 ac = cset->ac;
3243 while(ac!=NULL) {
3244 if (mod_counter_found==0) {
3245 mod_counter = ac->clabel->mod_counter;
3246 mod_counter_found = 1;
3247 } else {
3248 if (ac->clabel->mod_counter > mod_counter) {
3249 mod_counter = ac->clabel->mod_counter;
3250 }
3251 }
3252 ac = ac->next;
3253 }
3254
3255 num_missing = 0;
3256 auto_config = cset->ac;
3257
3258 even_pair_failed = 0;
3259 for(c=0; c<num_cols; c++) {
3260 ac = auto_config;
3261 while(ac!=NULL) {
3262 if ((ac->clabel->column == c) &&
3263 (ac->clabel->mod_counter == mod_counter)) {
3264 /* it's this one... */
3265 #ifdef DEBUG
3266 printf("Found: %s at %d\n",
3267 ac->devname,c);
3268 #endif
3269 break;
3270 }
3271 ac=ac->next;
3272 }
3273 if (ac==NULL) {
3274 /* Didn't find one here! */
3275 /* special case for RAID 1, especially
3276 where there are more than 2
3277 components (where RAIDframe treats
3278 things a little differently :( ) */
3279 if (parity_type == '1') {
3280 if (c%2 == 0) { /* even component */
3281 even_pair_failed = 1;
3282 } else { /* odd component. If
3283 we're failed, and
3284 so is the even
3285 component, it's
3286 "Good Night, Charlie" */
3287 if (even_pair_failed == 1) {
3288 return(0);
3289 }
3290 }
3291 } else {
3292 /* normal accounting */
3293 num_missing++;
3294 }
3295 }
3296 if ((parity_type == '1') && (c%2 == 1)) {
3297 /* Just did an even component, and we didn't
3298 bail.. reset the even_pair_failed flag,
3299 and go on to the next component.... */
3300 even_pair_failed = 0;
3301 }
3302 }
3303
3304 clabel = cset->ac->clabel;
3305
3306 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3307 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3308 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3309 /* XXX this needs to be made *much* more general */
3310 /* Too many failures */
3311 return(0);
3312 }
3313 /* otherwise, all is well, and we've got enough to take a kick
3314 at autoconfiguring this set */
3315 return(1);
3316 }
3317
3318 void
3319 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3320 RF_Raid_t *raidPtr)
3321 {
3322 RF_ComponentLabel_t *clabel;
3323 int i;
3324
3325 clabel = ac->clabel;
3326
3327 /* 1. Fill in the common stuff */
3328 config->numCol = clabel->num_columns;
3329 config->numSpare = 0; /* XXX should this be set here? */
3330 config->sectPerSU = clabel->sectPerSU;
3331 config->SUsPerPU = clabel->SUsPerPU;
3332 config->SUsPerRU = clabel->SUsPerRU;
3333 config->parityConfig = clabel->parityConfig;
3334 /* XXX... */
3335 strcpy(config->diskQueueType,"fifo");
3336 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3337 config->layoutSpecificSize = 0; /* XXX ?? */
3338
3339 while(ac!=NULL) {
3340 /* row/col values will be in range due to the checks
3341 in reasonable_label() */
3342 strcpy(config->devnames[0][ac->clabel->column],
3343 ac->devname);
3344 ac = ac->next;
3345 }
3346
3347 for(i=0;i<RF_MAXDBGV;i++) {
3348 config->debugVars[i][0] = 0;
3349 }
3350 }
3351
3352 int
3353 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3354 {
3355 RF_ComponentLabel_t *clabel;
3356 int column;
3357 int sparecol;
3358
3359 raidPtr->autoconfigure = new_value;
3360
3361 for(column=0; column<raidPtr->numCol; column++) {
3362 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3363 clabel = raidget_component_label(raidPtr, column);
3364 clabel->autoconfigure = new_value;
3365 raidflush_component_label(raidPtr, column);
3366 }
3367 }
3368 for(column = 0; column < raidPtr->numSpare ; column++) {
3369 sparecol = raidPtr->numCol + column;
3370 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3371 clabel = raidget_component_label(raidPtr, sparecol);
3372 clabel->autoconfigure = new_value;
3373 raidflush_component_label(raidPtr, sparecol);
3374 }
3375 }
3376 return(new_value);
3377 }
3378
3379 int
3380 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3381 {
3382 RF_ComponentLabel_t *clabel;
3383 int column;
3384 int sparecol;
3385
3386 raidPtr->root_partition = new_value;
3387 for(column=0; column<raidPtr->numCol; column++) {
3388 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3389 clabel = raidget_component_label(raidPtr, column);
3390 clabel->root_partition = new_value;
3391 raidflush_component_label(raidPtr, column);
3392 }
3393 }
3394 for(column = 0; column < raidPtr->numSpare ; column++) {
3395 sparecol = raidPtr->numCol + column;
3396 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3397 clabel = raidget_component_label(raidPtr, sparecol);
3398 clabel->root_partition = new_value;
3399 raidflush_component_label(raidPtr, sparecol);
3400 }
3401 }
3402 return(new_value);
3403 }
3404
3405 void
3406 rf_release_all_vps(RF_ConfigSet_t *cset)
3407 {
3408 RF_AutoConfig_t *ac;
3409
3410 ac = cset->ac;
3411 while(ac!=NULL) {
3412 /* Close the vp, and give it back */
3413 if (ac->vp) {
3414 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3415 VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
3416 vput(ac->vp);
3417 ac->vp = NULL;
3418 }
3419 ac = ac->next;
3420 }
3421 }
3422
3423
3424 void
3425 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3426 {
3427 RF_AutoConfig_t *ac;
3428 RF_AutoConfig_t *next_ac;
3429
3430 ac = cset->ac;
3431 while(ac!=NULL) {
3432 next_ac = ac->next;
3433 /* nuke the label */
3434 free(ac->clabel, M_RAIDFRAME);
3435 /* cleanup the config structure */
3436 free(ac, M_RAIDFRAME);
3437 /* "next.." */
3438 ac = next_ac;
3439 }
3440 /* and, finally, nuke the config set */
3441 free(cset, M_RAIDFRAME);
3442 }
3443
3444
3445 void
3446 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3447 {
3448 /* current version number */
3449 clabel->version = RF_COMPONENT_LABEL_VERSION;
3450 clabel->serial_number = raidPtr->serial_number;
3451 clabel->mod_counter = raidPtr->mod_counter;
3452
3453 clabel->num_rows = 1;
3454 clabel->num_columns = raidPtr->numCol;
3455 clabel->clean = RF_RAID_DIRTY; /* not clean */
3456 clabel->status = rf_ds_optimal; /* "It's good!" */
3457
3458 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3459 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3460 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3461
3462 clabel->blockSize = raidPtr->bytesPerSector;
3463 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3464
3465 /* XXX not portable */
3466 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3467 clabel->maxOutstanding = raidPtr->maxOutstanding;
3468 clabel->autoconfigure = raidPtr->autoconfigure;
3469 clabel->root_partition = raidPtr->root_partition;
3470 clabel->last_unit = raidPtr->raidid;
3471 clabel->config_order = raidPtr->config_order;
3472
3473 #ifndef RF_NO_PARITY_MAP
3474 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3475 #endif
3476 }
3477
3478 struct raid_softc *
3479 rf_auto_config_set(RF_ConfigSet_t *cset)
3480 {
3481 RF_Raid_t *raidPtr;
3482 RF_Config_t *config;
3483 int raidID;
3484 struct raid_softc *sc;
3485
3486 #ifdef DEBUG
3487 printf("RAID autoconfigure\n");
3488 #endif
3489
3490 /* 1. Create a config structure */
3491 config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO);
3492 if (config == NULL) {
3493 printf("%s: Out of mem - config!?!?\n", __func__);
3494 /* XXX do something more intelligent here. */
3495 return NULL;
3496 }
3497
3498 /*
3499 2. Figure out what RAID ID this one is supposed to live at
3500 See if we can get the same RAID dev that it was configured
3501 on last time..
3502 */
3503
3504 raidID = cset->ac->clabel->last_unit;
3505 for (sc = raidget(raidID, false); sc && sc->sc_r.valid != 0;
3506 sc = raidget(++raidID, false))
3507 continue;
3508 #ifdef DEBUG
3509 printf("Configuring raid%d:\n",raidID);
3510 #endif
3511
3512 if (sc == NULL)
3513 sc = raidget(raidID, true);
3514 if (sc == NULL) {
3515 printf("%s: Out of mem - softc!?!?\n", __func__);
3516 /* XXX do something more intelligent here. */
3517 free(config, M_RAIDFRAME);
3518 return NULL;
3519 }
3520
3521 raidPtr = &sc->sc_r;
3522
3523 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3524 raidPtr->softc = sc;
3525 raidPtr->raidid = raidID;
3526 raidPtr->openings = RAIDOUTSTANDING;
3527
3528 /* 3. Build the configuration structure */
3529 rf_create_configuration(cset->ac, config, raidPtr);
3530
3531 /* 4. Do the configuration */
3532 if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3533 raidinit(sc);
3534
3535 rf_markalldirty(raidPtr);
3536 raidPtr->autoconfigure = 1; /* XXX do this here? */
3537 switch (cset->ac->clabel->root_partition) {
3538 case 1: /* Force Root */
3539 case 2: /* Soft Root: root when boot partition part of raid */
3540 /*
3541 * everything configured just fine. Make a note
3542 * that this set is eligible to be root,
3543 * or forced to be root
3544 */
3545 cset->rootable = cset->ac->clabel->root_partition;
3546 /* XXX do this here? */
3547 raidPtr->root_partition = cset->rootable;
3548 break;
3549 default:
3550 break;
3551 }
3552 } else {
3553 raidput(sc);
3554 sc = NULL;
3555 }
3556
3557 /* 5. Cleanup */
3558 free(config, M_RAIDFRAME);
3559 return sc;
3560 }
3561
3562 void
3563 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3564 size_t xmin, size_t xmax)
3565 {
3566 int error;
3567
3568 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3569 pool_sethiwat(p, xmax);
3570 if ((error = pool_prime(p, xmin)) != 0)
3571 panic("%s: failed to prime pool: %d", __func__, error);
3572 pool_setlowat(p, xmin);
3573 }
3574
3575 /*
3576 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buffer queue
3577 * to see if there is IO pending and if that IO could possibly be done
3578 * for a given RAID set. Returns 0 if IO is waiting and can be done, 1
3579 * otherwise.
3580 *
3581 */
3582 int
3583 rf_buf_queue_check(RF_Raid_t *raidPtr)
3584 {
3585 struct raid_softc *rs;
3586 struct dk_softc *dksc;
3587
3588 rs = raidPtr->softc;
3589 dksc = &rs->sc_dksc;
3590
3591 if ((rs->sc_flags & RAIDF_INITED) == 0)
3592 return 1;
3593
3594 if (dk_strategy_pending(dksc) && raidPtr->openings > 0) {
3595 /* there is work to do */
3596 return 0;
3597 }
3598 /* default is nothing to do */
3599 return 1;
3600 }
3601
3602 int
3603 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3604 {
3605 uint64_t numsecs;
3606 unsigned secsize;
3607 int error;
3608
3609 error = getdisksize(vp, &numsecs, &secsize);
3610 if (error == 0) {
3611 diskPtr->blockSize = secsize;
3612 diskPtr->numBlocks = numsecs - rf_protectedSectors;
3613 diskPtr->partitionSize = numsecs;
3614 return 0;
3615 }
3616 return error;
3617 }
3618
3619 static int
3620 raid_match(device_t self, cfdata_t cfdata, void *aux)
3621 {
3622 return 1;
3623 }
3624
3625 static void
3626 raid_attach(device_t parent, device_t self, void *aux)
3627 {
3628 }
3629
3630
3631 static int
3632 raid_detach(device_t self, int flags)
3633 {
3634 int error;
3635 struct raid_softc *rs = raidsoftc(self);
3636
3637 if (rs == NULL)
3638 return ENXIO;
3639
3640 if ((error = raidlock(rs)) != 0)
3641 return (error);
3642
3643 error = raid_detach_unlocked(rs);
3644
3645 raidunlock(rs);
3646
3647 /* XXX raid can be referenced here */
3648
3649 if (error)
3650 return error;
3651
3652 /* Free the softc */
3653 raidput(rs);
3654
3655 return 0;
3656 }
3657
3658 static void
3659 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
3660 {
3661 struct dk_softc *dksc = &rs->sc_dksc;
3662 struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
3663
3664 memset(dg, 0, sizeof(*dg));
3665
3666 dg->dg_secperunit = raidPtr->totalSectors;
3667 dg->dg_secsize = raidPtr->bytesPerSector;
3668 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
3669 dg->dg_ntracks = 4 * raidPtr->numCol;
3670
3671 disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, NULL);
3672 }
3673
3674 /*
3675 * Get cache info for all the components (including spares).
3676 * Returns intersection of all the cache flags of all disks, or first
3677 * error if any encountered.
3678 * XXXfua feature flags can change as spares are added - lock down somehow
3679 */
3680 static int
3681 rf_get_component_caches(RF_Raid_t *raidPtr, int *data)
3682 {
3683 int c;
3684 int error;
3685 int dkwhole = 0, dkpart;
3686
3687 for (c = 0; c < raidPtr->numCol + raidPtr->numSpare; c++) {
3688 /*
3689 * Check any non-dead disk, even when currently being
3690 * reconstructed.
3691 */
3692 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)
3693 || raidPtr->Disks[c].status == rf_ds_reconstructing) {
3694 error = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp,
3695 DIOCGCACHE, &dkpart, FREAD, NOCRED);
3696 if (error) {
3697 if (error != ENODEV) {
3698 printf("raid%d: get cache for component %s failed\n",
3699 raidPtr->raidid,
3700 raidPtr->Disks[c].devname);
3701 }
3702
3703 return error;
3704 }
3705
3706 if (c == 0)
3707 dkwhole = dkpart;
3708 else
3709 dkwhole = DKCACHE_COMBINE(dkwhole, dkpart);
3710 }
3711 }
3712
3713 *data = dkwhole;
3714
3715 return 0;
3716 }
3717
3718 /*
3719 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3720 * We end up returning whatever error was returned by the first cache flush
3721 * that fails.
3722 */
3723
3724 int
3725 rf_sync_component_caches(RF_Raid_t *raidPtr)
3726 {
3727 int c, sparecol;
3728 int e,error;
3729 int force = 1;
3730
3731 error = 0;
3732 for (c = 0; c < raidPtr->numCol; c++) {
3733 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3734 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3735 &force, FWRITE, NOCRED);
3736 if (e) {
3737 if (e != ENODEV)
3738 printf("raid%d: cache flush to component %s failed.\n",
3739 raidPtr->raidid, raidPtr->Disks[c].devname);
3740 if (error == 0) {
3741 error = e;
3742 }
3743 }
3744 }
3745 }
3746
3747 for( c = 0; c < raidPtr->numSpare ; c++) {
3748 sparecol = raidPtr->numCol + c;
3749 /* Need to ensure that the reconstruct actually completed! */
3750 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3751 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3752 DIOCCACHESYNC, &force, FWRITE, NOCRED);
3753 if (e) {
3754 if (e != ENODEV)
3755 printf("raid%d: cache flush to component %s failed.\n",
3756 raidPtr->raidid, raidPtr->Disks[sparecol].devname);
3757 if (error == 0) {
3758 error = e;
3759 }
3760 }
3761 }
3762 }
3763 return error;
3764 }
3765
3766 /* Fill in info with the current status */
3767 void
3768 rf_check_recon_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3769 {
3770
3771 if (raidPtr->status != rf_rs_reconstructing) {
3772 info->total = 100;
3773 info->completed = 100;
3774 } else {
3775 info->total = raidPtr->reconControl->numRUsTotal;
3776 info->completed = raidPtr->reconControl->numRUsComplete;
3777 }
3778 info->remaining = info->total - info->completed;
3779 }
3780
3781 /* Fill in info with the current status */
3782 void
3783 rf_check_parityrewrite_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3784 {
3785
3786 if (raidPtr->parity_rewrite_in_progress == 1) {
3787 info->total = raidPtr->Layout.numStripe;
3788 info->completed = raidPtr->parity_rewrite_stripes_done;
3789 } else {
3790 info->completed = 100;
3791 info->total = 100;
3792 }
3793 info->remaining = info->total - info->completed;
3794 }
3795
3796 /* Fill in info with the current status */
3797 void
3798 rf_check_copyback_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3799 {
3800
3801 if (raidPtr->copyback_in_progress == 1) {
3802 info->total = raidPtr->Layout.numStripe;
3803 info->completed = raidPtr->copyback_stripes_done;
3804 info->remaining = info->total - info->completed;
3805 } else {
3806 info->remaining = 0;
3807 info->completed = 100;
3808 info->total = 100;
3809 }
3810 }
3811
3812 /* Fill in config with the current info */
3813 int
3814 rf_get_info(RF_Raid_t *raidPtr, RF_DeviceConfig_t *config)
3815 {
3816 int d, i, j;
3817
3818 if (!raidPtr->valid)
3819 return (ENODEV);
3820 config->cols = raidPtr->numCol;
3821 config->ndevs = raidPtr->numCol;
3822 if (config->ndevs >= RF_MAX_DISKS)
3823 return (ENOMEM);
3824 config->nspares = raidPtr->numSpare;
3825 if (config->nspares >= RF_MAX_DISKS)
3826 return (ENOMEM);
3827 config->maxqdepth = raidPtr->maxQueueDepth;
3828 d = 0;
3829 for (j = 0; j < config->cols; j++) {
3830 config->devs[d] = raidPtr->Disks[j];
3831 d++;
3832 }
3833 for (j = config->cols, i = 0; i < config->nspares; i++, j++) {
3834 config->spares[i] = raidPtr->Disks[j];
3835 if (config->spares[i].status == rf_ds_rebuilding_spare) {
3836 /* XXX: raidctl(8) expects to see this as a used spare */
3837 config->spares[i].status = rf_ds_used_spare;
3838 }
3839 }
3840 return 0;
3841 }
3842
3843 int
3844 rf_get_component_label(RF_Raid_t *raidPtr, void *data)
3845 {
3846 RF_ComponentLabel_t *clabel = (RF_ComponentLabel_t *)data;
3847 RF_ComponentLabel_t *raid_clabel;
3848 int column = clabel->column;
3849
3850 if ((column < 0) || (column >= raidPtr->numCol + raidPtr->numSpare))
3851 return EINVAL;
3852 raid_clabel = raidget_component_label(raidPtr, column);
3853 memcpy(clabel, raid_clabel, sizeof *clabel);
3854
3855 return 0;
3856 }
3857
3858 /*
3859 * Module interface
3860 */
3861
3862 MODULE(MODULE_CLASS_DRIVER, raid, "dk_subr,bufq_fcfs");
3863
3864 #ifdef _MODULE
3865 CFDRIVER_DECL(raid, DV_DISK, NULL);
3866 #endif
3867
3868 static int raid_modcmd(modcmd_t, void *);
3869 static int raid_modcmd_init(void);
3870 static int raid_modcmd_fini(void);
3871
3872 static int
3873 raid_modcmd(modcmd_t cmd, void *data)
3874 {
3875 int error;
3876
3877 error = 0;
3878 switch (cmd) {
3879 case MODULE_CMD_INIT:
3880 error = raid_modcmd_init();
3881 break;
3882 case MODULE_CMD_FINI:
3883 error = raid_modcmd_fini();
3884 break;
3885 default:
3886 error = ENOTTY;
3887 break;
3888 }
3889 return error;
3890 }
3891
3892 static int
3893 raid_modcmd_init(void)
3894 {
3895 int error;
3896 int bmajor, cmajor;
3897
3898 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
3899 mutex_enter(&raid_lock);
3900 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
3901 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
3902 rf_init_cond2(rf_sparet_wait_cv, "sparetw");
3903 rf_init_cond2(rf_sparet_resp_cv, "rfgst");
3904
3905 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
3906 #endif
3907
3908 bmajor = cmajor = -1;
3909 error = devsw_attach("raid", &raid_bdevsw, &bmajor,
3910 &raid_cdevsw, &cmajor);
3911 if (error != 0 && error != EEXIST) {
3912 aprint_error("%s: devsw_attach failed %d\n", __func__, error);
3913 mutex_exit(&raid_lock);
3914 return error;
3915 }
3916 #ifdef _MODULE
3917 error = config_cfdriver_attach(&raid_cd);
3918 if (error != 0) {
3919 aprint_error("%s: config_cfdriver_attach failed %d\n",
3920 __func__, error);
3921 devsw_detach(&raid_bdevsw, &raid_cdevsw);
3922 mutex_exit(&raid_lock);
3923 return error;
3924 }
3925 #endif
3926 error = config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3927 if (error != 0) {
3928 aprint_error("%s: config_cfattach_attach failed %d\n",
3929 __func__, error);
3930 #ifdef _MODULE
3931 config_cfdriver_detach(&raid_cd);
3932 #endif
3933 devsw_detach(&raid_bdevsw, &raid_cdevsw);
3934 mutex_exit(&raid_lock);
3935 return error;
3936 }
3937
3938 raidautoconfigdone = false;
3939
3940 mutex_exit(&raid_lock);
3941
3942 if (error == 0) {
3943 if (rf_BootRaidframe(true) == 0)
3944 aprint_verbose("Kernelized RAIDframe activated\n");
3945 else
3946 panic("Serious error activating RAID!!");
3947 }
3948
3949 /*
3950 * Register a finalizer which will be used to auto-config RAID
3951 * sets once all real hardware devices have been found.
3952 */
3953 error = config_finalize_register(NULL, rf_autoconfig);
3954 if (error != 0) {
3955 aprint_error("WARNING: unable to register RAIDframe "
3956 "finalizer\n");
3957 error = 0;
3958 }
3959
3960 return error;
3961 }
3962
3963 static int
3964 raid_modcmd_fini(void)
3965 {
3966 int error;
3967
3968 mutex_enter(&raid_lock);
3969
3970 /* Don't allow unload if raid device(s) exist. */
3971 if (!LIST_EMPTY(&raids)) {
3972 mutex_exit(&raid_lock);
3973 return EBUSY;
3974 }
3975
3976 error = config_cfattach_detach(raid_cd.cd_name, &raid_ca);
3977 if (error != 0) {
3978 aprint_error("%s: cannot detach cfattach\n",__func__);
3979 mutex_exit(&raid_lock);
3980 return error;
3981 }
3982 #ifdef _MODULE
3983 error = config_cfdriver_detach(&raid_cd);
3984 if (error != 0) {
3985 aprint_error("%s: cannot detach cfdriver\n",__func__);
3986 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3987 mutex_exit(&raid_lock);
3988 return error;
3989 }
3990 #endif
3991 error = devsw_detach(&raid_bdevsw, &raid_cdevsw);
3992 if (error != 0) {
3993 aprint_error("%s: cannot detach devsw\n",__func__);
3994 #ifdef _MODULE
3995 config_cfdriver_attach(&raid_cd);
3996 #endif
3997 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3998 mutex_exit(&raid_lock);
3999 return error;
4000 }
4001 rf_BootRaidframe(false);
4002 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
4003 rf_destroy_mutex2(rf_sparet_wait_mutex);
4004 rf_destroy_cond2(rf_sparet_wait_cv);
4005 rf_destroy_cond2(rf_sparet_resp_cv);
4006 #endif
4007 mutex_exit(&raid_lock);
4008 mutex_destroy(&raid_lock);
4009
4010 return error;
4011 }
4012