rf_netbsdkintf.c revision 1.361 1 /* $NetBSD: rf_netbsdkintf.c,v 1.361 2019/02/03 08:02:24 pgoyette Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: cd.c 1.6 90/11/28$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
89 * School of Computer Science
90 * Carnegie Mellon University
91 * Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97 /***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.361 2019/02/03 08:02:24 pgoyette Exp $");
105
106 #ifdef _KERNEL_OPT
107 #include "opt_raid_autoconfig.h"
108 #endif
109
110 #include <sys/param.h>
111 #include <sys/errno.h>
112 #include <sys/pool.h>
113 #include <sys/proc.h>
114 #include <sys/queue.h>
115 #include <sys/disk.h>
116 #include <sys/device.h>
117 #include <sys/stat.h>
118 #include <sys/ioctl.h>
119 #include <sys/fcntl.h>
120 #include <sys/systm.h>
121 #include <sys/vnode.h>
122 #include <sys/disklabel.h>
123 #include <sys/conf.h>
124 #include <sys/buf.h>
125 #include <sys/bufq.h>
126 #include <sys/reboot.h>
127 #include <sys/kauth.h>
128 #include <sys/module.h>
129 #include <sys/compat_stub.h>
130
131 #include <prop/proplib.h>
132
133 #include <dev/raidframe/raidframevar.h>
134 #include <dev/raidframe/raidframeio.h>
135 #include <dev/raidframe/rf_paritymap.h>
136
137 #include "rf_raid.h"
138 #include "rf_copyback.h"
139 #include "rf_dag.h"
140 #include "rf_dagflags.h"
141 #include "rf_desc.h"
142 #include "rf_diskqueue.h"
143 #include "rf_etimer.h"
144 #include "rf_general.h"
145 #include "rf_kintf.h"
146 #include "rf_options.h"
147 #include "rf_driver.h"
148 #include "rf_parityscan.h"
149 #include "rf_threadstuff.h"
150
151 #include "rf_compat80.h"
152
153 #ifdef _LP64
154 #ifndef COMPAT_NETBSD32
155 #define COMPAT_NETBSD32
156 #endif
157 #include "rf_compat32.h"
158 #endif
159
160 #include "ioconf.h"
161
162 #ifdef DEBUG
163 int rf_kdebug_level = 0;
164 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
165 #else /* DEBUG */
166 #define db1_printf(a) { }
167 #endif /* DEBUG */
168
169 #ifdef DEBUG_ROOT
170 #define DPRINTF(a, ...) printf(a, __VA_ARGS__)
171 #else
172 #define DPRINTF(a, ...)
173 #endif
174
175 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
176 static rf_declare_mutex2(rf_sparet_wait_mutex);
177 static rf_declare_cond2(rf_sparet_wait_cv);
178 static rf_declare_cond2(rf_sparet_resp_cv);
179
180 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
181 * spare table */
182 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
183 * installation process */
184 #endif
185
186 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
187
188 /* prototypes */
189 static void KernelWakeupFunc(struct buf *);
190 static void InitBP(struct buf *, struct vnode *, unsigned,
191 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
192 void *, int, struct proc *);
193 struct raid_softc;
194 static void raidinit(struct raid_softc *);
195 static int raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp);
196 static int rf_get_component_caches(RF_Raid_t *raidPtr, int *);
197
198 static int raid_match(device_t, cfdata_t, void *);
199 static void raid_attach(device_t, device_t, void *);
200 static int raid_detach(device_t, int);
201
202 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
203 daddr_t, daddr_t);
204 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
205 daddr_t, daddr_t, int);
206
207 static int raidwrite_component_label(unsigned,
208 dev_t, struct vnode *, RF_ComponentLabel_t *);
209 static int raidread_component_label(unsigned,
210 dev_t, struct vnode *, RF_ComponentLabel_t *);
211
212 static int raid_diskstart(device_t, struct buf *bp);
213 static int raid_dumpblocks(device_t, void *, daddr_t, int);
214 static int raid_lastclose(device_t);
215
216 static dev_type_open(raidopen);
217 static dev_type_close(raidclose);
218 static dev_type_read(raidread);
219 static dev_type_write(raidwrite);
220 static dev_type_ioctl(raidioctl);
221 static dev_type_strategy(raidstrategy);
222 static dev_type_dump(raiddump);
223 static dev_type_size(raidsize);
224
225 const struct bdevsw raid_bdevsw = {
226 .d_open = raidopen,
227 .d_close = raidclose,
228 .d_strategy = raidstrategy,
229 .d_ioctl = raidioctl,
230 .d_dump = raiddump,
231 .d_psize = raidsize,
232 .d_discard = nodiscard,
233 .d_flag = D_DISK
234 };
235
236 const struct cdevsw raid_cdevsw = {
237 .d_open = raidopen,
238 .d_close = raidclose,
239 .d_read = raidread,
240 .d_write = raidwrite,
241 .d_ioctl = raidioctl,
242 .d_stop = nostop,
243 .d_tty = notty,
244 .d_poll = nopoll,
245 .d_mmap = nommap,
246 .d_kqfilter = nokqfilter,
247 .d_discard = nodiscard,
248 .d_flag = D_DISK
249 };
250
251 static struct dkdriver rf_dkdriver = {
252 .d_open = raidopen,
253 .d_close = raidclose,
254 .d_strategy = raidstrategy,
255 .d_diskstart = raid_diskstart,
256 .d_dumpblocks = raid_dumpblocks,
257 .d_lastclose = raid_lastclose,
258 .d_minphys = minphys
259 };
260
261 struct raid_softc {
262 struct dk_softc sc_dksc;
263 int sc_unit;
264 int sc_flags; /* flags */
265 int sc_cflags; /* configuration flags */
266 kmutex_t sc_mutex; /* interlock mutex */
267 kcondvar_t sc_cv; /* and the condvar */
268 uint64_t sc_size; /* size of the raid device */
269 char sc_xname[20]; /* XXX external name */
270 RF_Raid_t sc_r;
271 LIST_ENTRY(raid_softc) sc_link;
272 };
273 /* sc_flags */
274 #define RAIDF_INITED 0x01 /* unit has been initialized */
275 #define RAIDF_SHUTDOWN 0x02 /* unit is being shutdown */
276 #define RAIDF_DETACH 0x04 /* detach after final close */
277 #define RAIDF_WANTED 0x08 /* someone waiting to obtain a lock */
278 #define RAIDF_LOCKED 0x10 /* unit is locked */
279 #define RAIDF_UNIT_CHANGED 0x20 /* unit is being changed */
280
281 #define raidunit(x) DISKUNIT(x)
282 #define raidsoftc(dev) (((struct raid_softc *)device_private(dev))->sc_r.softc)
283
284 extern struct cfdriver raid_cd;
285 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
286 raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
287 DVF_DETACH_SHUTDOWN);
288
289 /* Internal representation of a rf_recon_req */
290 struct rf_recon_req_internal {
291 RF_RowCol_t col;
292 RF_ReconReqFlags_t flags;
293 void *raidPtr;
294 };
295
296 /*
297 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
298 * Be aware that large numbers can allow the driver to consume a lot of
299 * kernel memory, especially on writes, and in degraded mode reads.
300 *
301 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
302 * a single 64K write will typically require 64K for the old data,
303 * 64K for the old parity, and 64K for the new parity, for a total
304 * of 192K (if the parity buffer is not re-used immediately).
305 * Even it if is used immediately, that's still 128K, which when multiplied
306 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
307 *
308 * Now in degraded mode, for example, a 64K read on the above setup may
309 * require data reconstruction, which will require *all* of the 4 remaining
310 * disks to participate -- 4 * 32K/disk == 128K again.
311 */
312
313 #ifndef RAIDOUTSTANDING
314 #define RAIDOUTSTANDING 6
315 #endif
316
317 #define RAIDLABELDEV(dev) \
318 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
319
320 /* declared here, and made public, for the benefit of KVM stuff.. */
321
322 static int raidlock(struct raid_softc *);
323 static void raidunlock(struct raid_softc *);
324
325 static int raid_detach_unlocked(struct raid_softc *);
326
327 static void rf_markalldirty(RF_Raid_t *);
328 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
329
330 void rf_ReconThread(struct rf_recon_req_internal *);
331 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
332 void rf_CopybackThread(RF_Raid_t *raidPtr);
333 void rf_ReconstructInPlaceThread(struct rf_recon_req_internal *);
334 int rf_autoconfig(device_t);
335 void rf_buildroothack(RF_ConfigSet_t *);
336
337 RF_AutoConfig_t *rf_find_raid_components(void);
338 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
339 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
340 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
341 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
342 int rf_set_autoconfig(RF_Raid_t *, int);
343 int rf_set_rootpartition(RF_Raid_t *, int);
344 void rf_release_all_vps(RF_ConfigSet_t *);
345 void rf_cleanup_config_set(RF_ConfigSet_t *);
346 int rf_have_enough_components(RF_ConfigSet_t *);
347 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
348 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
349
350 /*
351 * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
352 * Note that this is overridden by having RAID_AUTOCONFIG as an option
353 * in the kernel config file.
354 */
355 #ifdef RAID_AUTOCONFIG
356 int raidautoconfig = 1;
357 #else
358 int raidautoconfig = 0;
359 #endif
360 static bool raidautoconfigdone = false;
361
362 struct RF_Pools_s rf_pools;
363
364 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
365 static kmutex_t raid_lock;
366
367 static struct raid_softc *
368 raidcreate(int unit) {
369 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
370 sc->sc_unit = unit;
371 cv_init(&sc->sc_cv, "raidunit");
372 mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_NONE);
373 return sc;
374 }
375
376 static void
377 raiddestroy(struct raid_softc *sc) {
378 cv_destroy(&sc->sc_cv);
379 mutex_destroy(&sc->sc_mutex);
380 kmem_free(sc, sizeof(*sc));
381 }
382
383 static struct raid_softc *
384 raidget(int unit, bool create) {
385 struct raid_softc *sc;
386 if (unit < 0) {
387 #ifdef DIAGNOSTIC
388 panic("%s: unit %d!", __func__, unit);
389 #endif
390 return NULL;
391 }
392 mutex_enter(&raid_lock);
393 LIST_FOREACH(sc, &raids, sc_link) {
394 if (sc->sc_unit == unit) {
395 mutex_exit(&raid_lock);
396 return sc;
397 }
398 }
399 mutex_exit(&raid_lock);
400 if (!create)
401 return NULL;
402 if ((sc = raidcreate(unit)) == NULL)
403 return NULL;
404 mutex_enter(&raid_lock);
405 LIST_INSERT_HEAD(&raids, sc, sc_link);
406 mutex_exit(&raid_lock);
407 return sc;
408 }
409
410 static void
411 raidput(struct raid_softc *sc) {
412 mutex_enter(&raid_lock);
413 LIST_REMOVE(sc, sc_link);
414 mutex_exit(&raid_lock);
415 raiddestroy(sc);
416 }
417
418 void
419 raidattach(int num)
420 {
421
422 /*
423 * Device attachment and associated initialization now occurs
424 * as part of the module initialization.
425 */
426 }
427
428 int
429 rf_autoconfig(device_t self)
430 {
431 RF_AutoConfig_t *ac_list;
432 RF_ConfigSet_t *config_sets;
433
434 if (!raidautoconfig || raidautoconfigdone == true)
435 return (0);
436
437 /* XXX This code can only be run once. */
438 raidautoconfigdone = true;
439
440 #ifdef __HAVE_CPU_BOOTCONF
441 /*
442 * 0. find the boot device if needed first so we can use it later
443 * this needs to be done before we autoconfigure any raid sets,
444 * because if we use wedges we are not going to be able to open
445 * the boot device later
446 */
447 if (booted_device == NULL)
448 cpu_bootconf();
449 #endif
450 /* 1. locate all RAID components on the system */
451 aprint_debug("Searching for RAID components...\n");
452 ac_list = rf_find_raid_components();
453
454 /* 2. Sort them into their respective sets. */
455 config_sets = rf_create_auto_sets(ac_list);
456
457 /*
458 * 3. Evaluate each set and configure the valid ones.
459 * This gets done in rf_buildroothack().
460 */
461 rf_buildroothack(config_sets);
462
463 return 1;
464 }
465
466 static int
467 rf_containsboot(RF_Raid_t *r, device_t bdv) {
468 const char *bootname;
469 size_t len;
470
471 /* if bdv is NULL, the set can't contain it. exit early. */
472 if (bdv == NULL)
473 return 0;
474
475 bootname = device_xname(bdv);
476 len = strlen(bootname);
477
478 for (int col = 0; col < r->numCol; col++) {
479 const char *devname = r->Disks[col].devname;
480 devname += sizeof("/dev/") - 1;
481 if (strncmp(devname, "dk", 2) == 0) {
482 const char *parent =
483 dkwedge_get_parent_name(r->Disks[col].dev);
484 if (parent != NULL)
485 devname = parent;
486 }
487 if (strncmp(devname, bootname, len) == 0) {
488 struct raid_softc *sc = r->softc;
489 aprint_debug("raid%d includes boot device %s\n",
490 sc->sc_unit, devname);
491 return 1;
492 }
493 }
494 return 0;
495 }
496
497 void
498 rf_buildroothack(RF_ConfigSet_t *config_sets)
499 {
500 RF_ConfigSet_t *cset;
501 RF_ConfigSet_t *next_cset;
502 int num_root;
503 struct raid_softc *sc, *rsc;
504 struct dk_softc *dksc;
505
506 sc = rsc = NULL;
507 num_root = 0;
508 cset = config_sets;
509 while (cset != NULL) {
510 next_cset = cset->next;
511 if (rf_have_enough_components(cset) &&
512 cset->ac->clabel->autoconfigure == 1) {
513 sc = rf_auto_config_set(cset);
514 if (sc != NULL) {
515 aprint_debug("raid%d: configured ok, rootable %d\n",
516 sc->sc_unit, cset->rootable);
517 if (cset->rootable) {
518 rsc = sc;
519 num_root++;
520 }
521 } else {
522 /* The autoconfig didn't work :( */
523 aprint_debug("Autoconfig failed\n");
524 rf_release_all_vps(cset);
525 }
526 } else {
527 /* we're not autoconfiguring this set...
528 release the associated resources */
529 rf_release_all_vps(cset);
530 }
531 /* cleanup */
532 rf_cleanup_config_set(cset);
533 cset = next_cset;
534 }
535 dksc = &rsc->sc_dksc;
536
537 /* if the user has specified what the root device should be
538 then we don't touch booted_device or boothowto... */
539
540 if (rootspec != NULL) {
541 DPRINTF("%s: rootspec %s\n", __func__, rootspec);
542 return;
543 }
544
545 /* we found something bootable... */
546
547 /*
548 * XXX: The following code assumes that the root raid
549 * is the first ('a') partition. This is about the best
550 * we can do with a BSD disklabel, but we might be able
551 * to do better with a GPT label, by setting a specified
552 * attribute to indicate the root partition. We can then
553 * stash the partition number in the r->root_partition
554 * high bits (the bottom 2 bits are already used). For
555 * now we just set booted_partition to 0 when we override
556 * root.
557 */
558 if (num_root == 1) {
559 device_t candidate_root;
560 if (dksc->sc_dkdev.dk_nwedges != 0) {
561 char cname[sizeof(cset->ac->devname)];
562 /* XXX: assume partition 'a' first */
563 snprintf(cname, sizeof(cname), "%s%c",
564 device_xname(dksc->sc_dev), 'a');
565 candidate_root = dkwedge_find_by_wname(cname);
566 DPRINTF("%s: candidate wedge root=%s\n", __func__,
567 cname);
568 if (candidate_root == NULL) {
569 /*
570 * If that is not found, because we don't use
571 * disklabel, return the first dk child
572 * XXX: we can skip the 'a' check above
573 * and always do this...
574 */
575 size_t i = 0;
576 candidate_root = dkwedge_find_by_parent(
577 device_xname(dksc->sc_dev), &i);
578 }
579 DPRINTF("%s: candidate wedge root=%p\n", __func__,
580 candidate_root);
581 } else
582 candidate_root = dksc->sc_dev;
583 DPRINTF("%s: candidate root=%p\n", __func__, candidate_root);
584 DPRINTF("%s: booted_device=%p root_partition=%d "
585 "contains_boot=%d",
586 __func__, booted_device, rsc->sc_r.root_partition,
587 rf_containsboot(&rsc->sc_r, booted_device));
588 /* XXX the check for booted_device == NULL can probably be
589 * dropped, now that rf_containsboot handles that case.
590 */
591 if (booted_device == NULL ||
592 rsc->sc_r.root_partition == 1 ||
593 rf_containsboot(&rsc->sc_r, booted_device)) {
594 booted_device = candidate_root;
595 booted_method = "raidframe/single";
596 booted_partition = 0; /* XXX assume 'a' */
597 }
598 } else if (num_root > 1) {
599 DPRINTF("%s: many roots=%d, %p\n", __func__, num_root,
600 booted_device);
601
602 /*
603 * Maybe the MD code can help. If it cannot, then
604 * setroot() will discover that we have no
605 * booted_device and will ask the user if nothing was
606 * hardwired in the kernel config file
607 */
608 if (booted_device == NULL)
609 return;
610
611 num_root = 0;
612 mutex_enter(&raid_lock);
613 LIST_FOREACH(sc, &raids, sc_link) {
614 RF_Raid_t *r = &sc->sc_r;
615 if (r->valid == 0)
616 continue;
617
618 if (r->root_partition == 0)
619 continue;
620
621 if (rf_containsboot(r, booted_device)) {
622 num_root++;
623 rsc = sc;
624 dksc = &rsc->sc_dksc;
625 }
626 }
627 mutex_exit(&raid_lock);
628
629 if (num_root == 1) {
630 booted_device = dksc->sc_dev;
631 booted_method = "raidframe/multi";
632 booted_partition = 0; /* XXX assume 'a' */
633 } else {
634 /* we can't guess.. require the user to answer... */
635 boothowto |= RB_ASKNAME;
636 }
637 }
638 }
639
640 static int
641 raidsize(dev_t dev)
642 {
643 struct raid_softc *rs;
644 struct dk_softc *dksc;
645 unsigned int unit;
646
647 unit = raidunit(dev);
648 if ((rs = raidget(unit, false)) == NULL)
649 return -1;
650 dksc = &rs->sc_dksc;
651
652 if ((rs->sc_flags & RAIDF_INITED) == 0)
653 return -1;
654
655 return dk_size(dksc, dev);
656 }
657
658 static int
659 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
660 {
661 unsigned int unit;
662 struct raid_softc *rs;
663 struct dk_softc *dksc;
664
665 unit = raidunit(dev);
666 if ((rs = raidget(unit, false)) == NULL)
667 return ENXIO;
668 dksc = &rs->sc_dksc;
669
670 if ((rs->sc_flags & RAIDF_INITED) == 0)
671 return ENODEV;
672
673 /*
674 Note that blkno is relative to this particular partition.
675 By adding adding RF_PROTECTED_SECTORS, we get a value that
676 is relative to the partition used for the underlying component.
677 */
678 blkno += RF_PROTECTED_SECTORS;
679
680 return dk_dump(dksc, dev, blkno, va, size);
681 }
682
683 static int
684 raid_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk)
685 {
686 struct raid_softc *rs = raidsoftc(dev);
687 const struct bdevsw *bdev;
688 RF_Raid_t *raidPtr;
689 int c, sparecol, j, scol, dumpto;
690 int error = 0;
691
692 raidPtr = &rs->sc_r;
693
694 /* we only support dumping to RAID 1 sets */
695 if (raidPtr->Layout.numDataCol != 1 ||
696 raidPtr->Layout.numParityCol != 1)
697 return EINVAL;
698
699 if ((error = raidlock(rs)) != 0)
700 return error;
701
702 /* figure out what device is alive.. */
703
704 /*
705 Look for a component to dump to. The preference for the
706 component to dump to is as follows:
707 1) the master
708 2) a used_spare of the master
709 3) the slave
710 4) a used_spare of the slave
711 */
712
713 dumpto = -1;
714 for (c = 0; c < raidPtr->numCol; c++) {
715 if (raidPtr->Disks[c].status == rf_ds_optimal) {
716 /* this might be the one */
717 dumpto = c;
718 break;
719 }
720 }
721
722 /*
723 At this point we have possibly selected a live master or a
724 live slave. We now check to see if there is a spared
725 master (or a spared slave), if we didn't find a live master
726 or a live slave.
727 */
728
729 for (c = 0; c < raidPtr->numSpare; c++) {
730 sparecol = raidPtr->numCol + c;
731 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
732 /* How about this one? */
733 scol = -1;
734 for(j=0;j<raidPtr->numCol;j++) {
735 if (raidPtr->Disks[j].spareCol == sparecol) {
736 scol = j;
737 break;
738 }
739 }
740 if (scol == 0) {
741 /*
742 We must have found a spared master!
743 We'll take that over anything else
744 found so far. (We couldn't have
745 found a real master before, since
746 this is a used spare, and it's
747 saying that it's replacing the
748 master.) On reboot (with
749 autoconfiguration turned on)
750 sparecol will become the 1st
751 component (component0) of this set.
752 */
753 dumpto = sparecol;
754 break;
755 } else if (scol != -1) {
756 /*
757 Must be a spared slave. We'll dump
758 to that if we havn't found anything
759 else so far.
760 */
761 if (dumpto == -1)
762 dumpto = sparecol;
763 }
764 }
765 }
766
767 if (dumpto == -1) {
768 /* we couldn't find any live components to dump to!?!?
769 */
770 error = EINVAL;
771 goto out;
772 }
773
774 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
775 if (bdev == NULL) {
776 error = ENXIO;
777 goto out;
778 }
779
780 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
781 blkno, va, nblk * raidPtr->bytesPerSector);
782
783 out:
784 raidunlock(rs);
785
786 return error;
787 }
788
789 /* ARGSUSED */
790 static int
791 raidopen(dev_t dev, int flags, int fmt,
792 struct lwp *l)
793 {
794 int unit = raidunit(dev);
795 struct raid_softc *rs;
796 struct dk_softc *dksc;
797 int error = 0;
798 int part, pmask;
799
800 if ((rs = raidget(unit, true)) == NULL)
801 return ENXIO;
802 if ((error = raidlock(rs)) != 0)
803 return (error);
804
805 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
806 error = EBUSY;
807 goto bad;
808 }
809
810 dksc = &rs->sc_dksc;
811
812 part = DISKPART(dev);
813 pmask = (1 << part);
814
815 if (!DK_BUSY(dksc, pmask) &&
816 ((rs->sc_flags & RAIDF_INITED) != 0)) {
817 /* First one... mark things as dirty... Note that we *MUST*
818 have done a configure before this. I DO NOT WANT TO BE
819 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
820 THAT THEY BELONG TOGETHER!!!!! */
821 /* XXX should check to see if we're only open for reading
822 here... If so, we needn't do this, but then need some
823 other way of keeping track of what's happened.. */
824
825 rf_markalldirty(&rs->sc_r);
826 }
827
828 if ((rs->sc_flags & RAIDF_INITED) != 0)
829 error = dk_open(dksc, dev, flags, fmt, l);
830
831 bad:
832 raidunlock(rs);
833
834 return (error);
835
836
837 }
838
839 static int
840 raid_lastclose(device_t self)
841 {
842 struct raid_softc *rs = raidsoftc(self);
843
844 /* Last one... device is not unconfigured yet.
845 Device shutdown has taken care of setting the
846 clean bits if RAIDF_INITED is not set
847 mark things as clean... */
848
849 rf_update_component_labels(&rs->sc_r,
850 RF_FINAL_COMPONENT_UPDATE);
851
852 /* pass to unlocked code */
853 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
854 rs->sc_flags |= RAIDF_DETACH;
855
856 return 0;
857 }
858
859 /* ARGSUSED */
860 static int
861 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
862 {
863 int unit = raidunit(dev);
864 struct raid_softc *rs;
865 struct dk_softc *dksc;
866 cfdata_t cf;
867 int error = 0, do_detach = 0, do_put = 0;
868
869 if ((rs = raidget(unit, false)) == NULL)
870 return ENXIO;
871 dksc = &rs->sc_dksc;
872
873 if ((error = raidlock(rs)) != 0)
874 return (error);
875
876 if ((rs->sc_flags & RAIDF_INITED) != 0) {
877 error = dk_close(dksc, dev, flags, fmt, l);
878 if ((rs->sc_flags & RAIDF_DETACH) != 0)
879 do_detach = 1;
880 } else if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
881 do_put = 1;
882
883 raidunlock(rs);
884
885 if (do_detach) {
886 /* free the pseudo device attach bits */
887 cf = device_cfdata(dksc->sc_dev);
888 error = config_detach(dksc->sc_dev, 0);
889 if (error == 0)
890 free(cf, M_RAIDFRAME);
891 } else if (do_put) {
892 raidput(rs);
893 }
894
895 return (error);
896
897 }
898
899 static void
900 raid_wakeup(RF_Raid_t *raidPtr)
901 {
902 rf_lock_mutex2(raidPtr->iodone_lock);
903 rf_signal_cond2(raidPtr->iodone_cv);
904 rf_unlock_mutex2(raidPtr->iodone_lock);
905 }
906
907 static void
908 raidstrategy(struct buf *bp)
909 {
910 unsigned int unit;
911 struct raid_softc *rs;
912 struct dk_softc *dksc;
913 RF_Raid_t *raidPtr;
914
915 unit = raidunit(bp->b_dev);
916 if ((rs = raidget(unit, false)) == NULL) {
917 bp->b_error = ENXIO;
918 goto fail;
919 }
920 if ((rs->sc_flags & RAIDF_INITED) == 0) {
921 bp->b_error = ENXIO;
922 goto fail;
923 }
924 dksc = &rs->sc_dksc;
925 raidPtr = &rs->sc_r;
926
927 /* Queue IO only */
928 if (dk_strategy_defer(dksc, bp))
929 goto done;
930
931 /* schedule the IO to happen at the next convenient time */
932 raid_wakeup(raidPtr);
933
934 done:
935 return;
936
937 fail:
938 bp->b_resid = bp->b_bcount;
939 biodone(bp);
940 }
941
942 static int
943 raid_diskstart(device_t dev, struct buf *bp)
944 {
945 struct raid_softc *rs = raidsoftc(dev);
946 RF_Raid_t *raidPtr;
947
948 raidPtr = &rs->sc_r;
949 if (!raidPtr->valid) {
950 db1_printf(("raid is not valid..\n"));
951 return ENODEV;
952 }
953
954 /* XXX */
955 bp->b_resid = 0;
956
957 return raiddoaccess(raidPtr, bp);
958 }
959
960 void
961 raiddone(RF_Raid_t *raidPtr, struct buf *bp)
962 {
963 struct raid_softc *rs;
964 struct dk_softc *dksc;
965
966 rs = raidPtr->softc;
967 dksc = &rs->sc_dksc;
968
969 dk_done(dksc, bp);
970
971 rf_lock_mutex2(raidPtr->mutex);
972 raidPtr->openings++;
973 rf_unlock_mutex2(raidPtr->mutex);
974
975 /* schedule more IO */
976 raid_wakeup(raidPtr);
977 }
978
979 /* ARGSUSED */
980 static int
981 raidread(dev_t dev, struct uio *uio, int flags)
982 {
983 int unit = raidunit(dev);
984 struct raid_softc *rs;
985
986 if ((rs = raidget(unit, false)) == NULL)
987 return ENXIO;
988
989 if ((rs->sc_flags & RAIDF_INITED) == 0)
990 return (ENXIO);
991
992 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
993
994 }
995
996 /* ARGSUSED */
997 static int
998 raidwrite(dev_t dev, struct uio *uio, int flags)
999 {
1000 int unit = raidunit(dev);
1001 struct raid_softc *rs;
1002
1003 if ((rs = raidget(unit, false)) == NULL)
1004 return ENXIO;
1005
1006 if ((rs->sc_flags & RAIDF_INITED) == 0)
1007 return (ENXIO);
1008
1009 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
1010
1011 }
1012
1013 static int
1014 raid_detach_unlocked(struct raid_softc *rs)
1015 {
1016 struct dk_softc *dksc = &rs->sc_dksc;
1017 RF_Raid_t *raidPtr;
1018 int error;
1019
1020 raidPtr = &rs->sc_r;
1021
1022 if (DK_BUSY(dksc, 0) ||
1023 raidPtr->recon_in_progress != 0 ||
1024 raidPtr->parity_rewrite_in_progress != 0 ||
1025 raidPtr->copyback_in_progress != 0)
1026 return EBUSY;
1027
1028 if ((rs->sc_flags & RAIDF_INITED) == 0)
1029 return 0;
1030
1031 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1032
1033 if ((error = rf_Shutdown(raidPtr)) != 0)
1034 return error;
1035
1036 rs->sc_flags &= ~RAIDF_INITED;
1037
1038 /* Kill off any queued buffers */
1039 dk_drain(dksc);
1040 bufq_free(dksc->sc_bufq);
1041
1042 /* Detach the disk. */
1043 dkwedge_delall(&dksc->sc_dkdev);
1044 disk_detach(&dksc->sc_dkdev);
1045 disk_destroy(&dksc->sc_dkdev);
1046 dk_detach(dksc);
1047
1048 return 0;
1049 }
1050
1051 static int
1052 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1053 {
1054 int unit = raidunit(dev);
1055 int error = 0;
1056 int part, pmask;
1057 struct raid_softc *rs;
1058 struct dk_softc *dksc;
1059 RF_Config_t *k_cfg, *u_cfg;
1060 RF_Raid_t *raidPtr;
1061 RF_RaidDisk_t *diskPtr;
1062 RF_AccTotals_t *totals;
1063 RF_DeviceConfig_t *d_cfg, *ucfgp;
1064 u_char *specific_buf;
1065 int retcode = 0;
1066 int column;
1067 /* int raidid; */
1068 struct rf_recon_req *rr;
1069 struct rf_recon_req_internal *rrint;
1070 RF_ComponentLabel_t *clabel;
1071 RF_ComponentLabel_t *ci_label;
1072 RF_SingleComponent_t *sparePtr,*componentPtr;
1073 RF_SingleComponent_t component;
1074 int d;
1075
1076 if ((rs = raidget(unit, false)) == NULL)
1077 return ENXIO;
1078 dksc = &rs->sc_dksc;
1079 raidPtr = &rs->sc_r;
1080
1081 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1082 (int) DISKPART(dev), (int) unit, cmd));
1083
1084 /* Must be initialized for these... */
1085 switch (cmd) {
1086 case RAIDFRAME_REWRITEPARITY:
1087 case RAIDFRAME_GET_INFO:
1088 case RAIDFRAME_RESET_ACCTOTALS:
1089 case RAIDFRAME_GET_ACCTOTALS:
1090 case RAIDFRAME_KEEP_ACCTOTALS:
1091 case RAIDFRAME_GET_SIZE:
1092 case RAIDFRAME_FAIL_DISK:
1093 case RAIDFRAME_COPYBACK:
1094 case RAIDFRAME_CHECK_RECON_STATUS:
1095 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1096 case RAIDFRAME_GET_COMPONENT_LABEL:
1097 case RAIDFRAME_SET_COMPONENT_LABEL:
1098 case RAIDFRAME_ADD_HOT_SPARE:
1099 case RAIDFRAME_REMOVE_HOT_SPARE:
1100 case RAIDFRAME_INIT_LABELS:
1101 case RAIDFRAME_REBUILD_IN_PLACE:
1102 case RAIDFRAME_CHECK_PARITY:
1103 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1104 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1105 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1106 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1107 case RAIDFRAME_SET_AUTOCONFIG:
1108 case RAIDFRAME_SET_ROOT:
1109 case RAIDFRAME_DELETE_COMPONENT:
1110 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1111 case RAIDFRAME_PARITYMAP_STATUS:
1112 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1113 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1114 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1115 #ifdef _LP64
1116 case RAIDFRAME_GET_INFO32:
1117 #endif
1118 if ((rs->sc_flags & RAIDF_INITED) == 0)
1119 return (ENXIO);
1120 }
1121
1122 /*
1123 * Handle compat ioctl calls
1124 *
1125 * * If compat code is not loaded, stub returns ENOSYS and we just
1126 * check the "native" cmd's
1127 * * If compat code is loaded but does not recognize the cmd, it
1128 * returns EPASSTHROUGH, and we just check the "native" cmd's
1129 * * If compat code returns EAGAIN, we need to finish via config
1130 * * Otherwise the cmd has been handled and we just return
1131 */
1132 module_autoload("compat_raid_50", MODULE_CLASS_EXEC);
1133 MODULE_CALL_HOOK(raidframe_ioctl_50_hook,
1134 (cmd, (rs->sc_flags & RAIDF_INITED),raidPtr, unit, data, &k_cfg),
1135 enosys(), retcode);
1136 if (retcode == ENOSYS)
1137 retcode = 0;
1138 else if (retcode == EAGAIN)
1139 goto config;
1140 else if (retcode != EPASSTHROUGH)
1141 return retcode;
1142
1143 module_autoload("compat_raid_80", MODULE_CLASS_EXEC);
1144 MODULE_CALL_HOOK(raidframe_ioctl_80_hook,
1145 (cmd, (rs->sc_flags & RAIDF_INITED),raidPtr, unit, data, &k_cfg),
1146 enosys(), retcode);
1147 if (retcode == ENOSYS)
1148 retcode = 0;
1149 else if (retcode == EAGAIN)
1150 goto config;
1151 else if (retcode != EPASSTHROUGH)
1152 return retcode;
1153
1154 /*
1155 * XXX
1156 * Handling of FAIL_DISK80 command requires us to retain retcode's
1157 * value of EPASSTHROUGH. If you add more compat code later, make
1158 * sure you don't overwrite retcode and break this!
1159 */
1160
1161 switch (cmd) {
1162
1163 /* configure the system */
1164 case RAIDFRAME_CONFIGURE:
1165 #ifdef _LP64
1166 case RAIDFRAME_CONFIGURE32:
1167 #endif
1168
1169 if (raidPtr->valid) {
1170 /* There is a valid RAID set running on this unit! */
1171 printf("raid%d: Device already configured!\n",unit);
1172 return(EINVAL);
1173 }
1174
1175 /* copy-in the configuration information */
1176 /* data points to a pointer to the configuration structure */
1177
1178 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1179 if (k_cfg == NULL) {
1180 return (ENOMEM);
1181 }
1182 #ifdef _LP64
1183 if (cmd == RAIDFRAME_CONFIGURE32 &&
1184 (l->l_proc->p_flag & PK_32) != 0)
1185 MODULE_CALL_HOOK(raidframe_netbsd32_config_hook,
1186 (data, k_cfg), enosys(), retcode);
1187 else
1188 #endif
1189 {
1190 u_cfg = *((RF_Config_t **) data);
1191 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1192 }
1193 if (retcode) {
1194 RF_Free(k_cfg, sizeof(RF_Config_t));
1195 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1196 retcode));
1197 goto no_config;
1198 }
1199 goto config;
1200 config:
1201 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1202
1203 /* allocate a buffer for the layout-specific data, and copy it
1204 * in */
1205 if (k_cfg->layoutSpecificSize) {
1206 if (k_cfg->layoutSpecificSize > 10000) {
1207 /* sanity check */
1208 RF_Free(k_cfg, sizeof(RF_Config_t));
1209 retcode = EINVAL;
1210 goto no_config;
1211 }
1212 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1213 (u_char *));
1214 if (specific_buf == NULL) {
1215 RF_Free(k_cfg, sizeof(RF_Config_t));
1216 retcode = ENOMEM;
1217 goto no_config;
1218 }
1219 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1220 k_cfg->layoutSpecificSize);
1221 if (retcode) {
1222 RF_Free(k_cfg, sizeof(RF_Config_t));
1223 RF_Free(specific_buf,
1224 k_cfg->layoutSpecificSize);
1225 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1226 retcode));
1227 goto no_config;
1228 }
1229 } else
1230 specific_buf = NULL;
1231 k_cfg->layoutSpecific = specific_buf;
1232
1233 /* should do some kind of sanity check on the configuration.
1234 * Store the sum of all the bytes in the last byte? */
1235
1236 /* configure the system */
1237
1238 /*
1239 * Clear the entire RAID descriptor, just to make sure
1240 * there is no stale data left in the case of a
1241 * reconfiguration
1242 */
1243 memset(raidPtr, 0, sizeof(*raidPtr));
1244 raidPtr->softc = rs;
1245 raidPtr->raidid = unit;
1246
1247 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1248
1249 if (retcode == 0) {
1250
1251 /* allow this many simultaneous IO's to
1252 this RAID device */
1253 raidPtr->openings = RAIDOUTSTANDING;
1254
1255 raidinit(rs);
1256 raid_wakeup(raidPtr);
1257 rf_markalldirty(raidPtr);
1258 }
1259 /* free the buffers. No return code here. */
1260 if (k_cfg->layoutSpecificSize) {
1261 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1262 }
1263 RF_Free(k_cfg, sizeof(RF_Config_t));
1264
1265 no_config:
1266 /*
1267 * If configuration failed, set sc_flags so that we
1268 * will detach the device when we close it.
1269 */
1270 if (retcode != 0)
1271 rs->sc_flags |= RAIDF_SHUTDOWN;
1272 return (retcode);
1273
1274 /* shutdown the system */
1275 case RAIDFRAME_SHUTDOWN:
1276
1277 part = DISKPART(dev);
1278 pmask = (1 << part);
1279
1280 if ((error = raidlock(rs)) != 0)
1281 return (error);
1282
1283 if (DK_BUSY(dksc, pmask) ||
1284 raidPtr->recon_in_progress != 0 ||
1285 raidPtr->parity_rewrite_in_progress != 0 ||
1286 raidPtr->copyback_in_progress != 0)
1287 retcode = EBUSY;
1288 else {
1289 /* detach and free on close */
1290 rs->sc_flags |= RAIDF_SHUTDOWN;
1291 retcode = 0;
1292 }
1293
1294 raidunlock(rs);
1295
1296 return (retcode);
1297 case RAIDFRAME_GET_COMPONENT_LABEL:
1298 return rf_get_component_label(raidPtr, data);
1299
1300 #if 0
1301 case RAIDFRAME_SET_COMPONENT_LABEL:
1302 clabel = (RF_ComponentLabel_t *) data;
1303
1304 /* XXX check the label for valid stuff... */
1305 /* Note that some things *should not* get modified --
1306 the user should be re-initing the labels instead of
1307 trying to patch things.
1308 */
1309
1310 raidid = raidPtr->raidid;
1311 #ifdef DEBUG
1312 printf("raid%d: Got component label:\n", raidid);
1313 printf("raid%d: Version: %d\n", raidid, clabel->version);
1314 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1315 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1316 printf("raid%d: Column: %d\n", raidid, clabel->column);
1317 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1318 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1319 printf("raid%d: Status: %d\n", raidid, clabel->status);
1320 #endif /* DEBUG */
1321 clabel->row = 0;
1322 column = clabel->column;
1323
1324 if ((column < 0) || (column >= raidPtr->numCol)) {
1325 return(EINVAL);
1326 }
1327
1328 /* XXX this isn't allowed to do anything for now :-) */
1329
1330 /* XXX and before it is, we need to fill in the rest
1331 of the fields!?!?!?! */
1332 memcpy(raidget_component_label(raidPtr, column),
1333 clabel, sizeof(*clabel));
1334 raidflush_component_label(raidPtr, column);
1335 return (0);
1336 #endif /* 0 */
1337
1338 case RAIDFRAME_INIT_LABELS:
1339 clabel = (RF_ComponentLabel_t *) data;
1340 /*
1341 we only want the serial number from
1342 the above. We get all the rest of the information
1343 from the config that was used to create this RAID
1344 set.
1345 */
1346
1347 raidPtr->serial_number = clabel->serial_number;
1348
1349 for(column=0;column<raidPtr->numCol;column++) {
1350 diskPtr = &raidPtr->Disks[column];
1351 if (!RF_DEAD_DISK(diskPtr->status)) {
1352 ci_label = raidget_component_label(raidPtr,
1353 column);
1354 /* Zeroing this is important. */
1355 memset(ci_label, 0, sizeof(*ci_label));
1356 raid_init_component_label(raidPtr, ci_label);
1357 ci_label->serial_number =
1358 raidPtr->serial_number;
1359 ci_label->row = 0; /* we dont' pretend to support more */
1360 rf_component_label_set_partitionsize(ci_label,
1361 diskPtr->partitionSize);
1362 ci_label->column = column;
1363 raidflush_component_label(raidPtr, column);
1364 }
1365 /* XXXjld what about the spares? */
1366 }
1367
1368 return (retcode);
1369 case RAIDFRAME_SET_AUTOCONFIG:
1370 d = rf_set_autoconfig(raidPtr, *(int *) data);
1371 printf("raid%d: New autoconfig value is: %d\n",
1372 raidPtr->raidid, d);
1373 *(int *) data = d;
1374 return (retcode);
1375
1376 case RAIDFRAME_SET_ROOT:
1377 d = rf_set_rootpartition(raidPtr, *(int *) data);
1378 printf("raid%d: New rootpartition value is: %d\n",
1379 raidPtr->raidid, d);
1380 *(int *) data = d;
1381 return (retcode);
1382
1383 /* initialize all parity */
1384 case RAIDFRAME_REWRITEPARITY:
1385
1386 if (raidPtr->Layout.map->faultsTolerated == 0) {
1387 /* Parity for RAID 0 is trivially correct */
1388 raidPtr->parity_good = RF_RAID_CLEAN;
1389 return(0);
1390 }
1391
1392 if (raidPtr->parity_rewrite_in_progress == 1) {
1393 /* Re-write is already in progress! */
1394 return(EINVAL);
1395 }
1396
1397 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1398 rf_RewriteParityThread,
1399 raidPtr,"raid_parity");
1400 return (retcode);
1401
1402
1403 case RAIDFRAME_ADD_HOT_SPARE:
1404 sparePtr = (RF_SingleComponent_t *) data;
1405 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1406 retcode = rf_add_hot_spare(raidPtr, &component);
1407 return(retcode);
1408
1409 case RAIDFRAME_REMOVE_HOT_SPARE:
1410 return(retcode);
1411
1412 case RAIDFRAME_DELETE_COMPONENT:
1413 componentPtr = (RF_SingleComponent_t *)data;
1414 memcpy( &component, componentPtr,
1415 sizeof(RF_SingleComponent_t));
1416 retcode = rf_delete_component(raidPtr, &component);
1417 return(retcode);
1418
1419 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1420 componentPtr = (RF_SingleComponent_t *)data;
1421 memcpy( &component, componentPtr,
1422 sizeof(RF_SingleComponent_t));
1423 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1424 return(retcode);
1425
1426 case RAIDFRAME_REBUILD_IN_PLACE:
1427
1428 if (raidPtr->Layout.map->faultsTolerated == 0) {
1429 /* Can't do this on a RAID 0!! */
1430 return(EINVAL);
1431 }
1432
1433 if (raidPtr->recon_in_progress == 1) {
1434 /* a reconstruct is already in progress! */
1435 return(EINVAL);
1436 }
1437
1438 componentPtr = (RF_SingleComponent_t *) data;
1439 memcpy( &component, componentPtr,
1440 sizeof(RF_SingleComponent_t));
1441 component.row = 0; /* we don't support any more */
1442 column = component.column;
1443
1444 if ((column < 0) || (column >= raidPtr->numCol)) {
1445 return(EINVAL);
1446 }
1447
1448 rf_lock_mutex2(raidPtr->mutex);
1449 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1450 (raidPtr->numFailures > 0)) {
1451 /* XXX 0 above shouldn't be constant!!! */
1452 /* some component other than this has failed.
1453 Let's not make things worse than they already
1454 are... */
1455 printf("raid%d: Unable to reconstruct to disk at:\n",
1456 raidPtr->raidid);
1457 printf("raid%d: Col: %d Too many failures.\n",
1458 raidPtr->raidid, column);
1459 rf_unlock_mutex2(raidPtr->mutex);
1460 return (EINVAL);
1461 }
1462 if (raidPtr->Disks[column].status ==
1463 rf_ds_reconstructing) {
1464 printf("raid%d: Unable to reconstruct to disk at:\n",
1465 raidPtr->raidid);
1466 printf("raid%d: Col: %d Reconstruction already occurring!\n", raidPtr->raidid, column);
1467
1468 rf_unlock_mutex2(raidPtr->mutex);
1469 return (EINVAL);
1470 }
1471 if (raidPtr->Disks[column].status == rf_ds_spared) {
1472 rf_unlock_mutex2(raidPtr->mutex);
1473 return (EINVAL);
1474 }
1475 rf_unlock_mutex2(raidPtr->mutex);
1476
1477 RF_Malloc(rrint, sizeof(*rrint), (struct rf_recon_req_internal *));
1478 if (rrint == NULL)
1479 return(ENOMEM);
1480
1481 rrint->col = column;
1482 rrint->raidPtr = raidPtr;
1483
1484 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1485 rf_ReconstructInPlaceThread,
1486 rrint, "raid_reconip");
1487 return(retcode);
1488
1489 case RAIDFRAME_GET_INFO:
1490 #ifdef _LP64
1491 case RAIDFRAME_GET_INFO32:
1492 #endif /* LP64 */
1493 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1494 (RF_DeviceConfig_t *));
1495 if (d_cfg == NULL)
1496 return (ENOMEM);
1497 retcode = rf_get_info(raidPtr, d_cfg);
1498 if (retcode == 0) {
1499 #ifdef _LP64
1500 if (raidframe_netbsd32_config_hook.hooked &&
1501 cmd == RAIDFRAME_GET_INFO32)
1502 ucfgp = NETBSD32PTR64(*(netbsd32_pointer_t *)data);
1503 else
1504 #endif /* _LP64 */
1505 ucfgp = *(RF_DeviceConfig_t **)data;
1506 retcode = copyout(d_cfg, ucfgp, sizeof(RF_DeviceConfig_t));
1507 }
1508 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1509
1510 return (retcode);
1511
1512 case RAIDFRAME_CHECK_PARITY:
1513 *(int *) data = raidPtr->parity_good;
1514 return (0);
1515
1516 case RAIDFRAME_PARITYMAP_STATUS:
1517 if (rf_paritymap_ineligible(raidPtr))
1518 return EINVAL;
1519 rf_paritymap_status(raidPtr->parity_map,
1520 (struct rf_pmstat *)data);
1521 return 0;
1522
1523 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1524 if (rf_paritymap_ineligible(raidPtr))
1525 return EINVAL;
1526 if (raidPtr->parity_map == NULL)
1527 return ENOENT; /* ??? */
1528 if (0 != rf_paritymap_set_params(raidPtr->parity_map,
1529 (struct rf_pmparams *)data, 1))
1530 return EINVAL;
1531 return 0;
1532
1533 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1534 if (rf_paritymap_ineligible(raidPtr))
1535 return EINVAL;
1536 *(int *) data = rf_paritymap_get_disable(raidPtr);
1537 return 0;
1538
1539 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1540 if (rf_paritymap_ineligible(raidPtr))
1541 return EINVAL;
1542 rf_paritymap_set_disable(raidPtr, *(int *)data);
1543 /* XXX should errors be passed up? */
1544 return 0;
1545
1546 case RAIDFRAME_RESET_ACCTOTALS:
1547 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1548 return (0);
1549
1550 case RAIDFRAME_GET_ACCTOTALS:
1551 totals = (RF_AccTotals_t *) data;
1552 *totals = raidPtr->acc_totals;
1553 return (0);
1554
1555 case RAIDFRAME_KEEP_ACCTOTALS:
1556 raidPtr->keep_acc_totals = *(int *)data;
1557 return (0);
1558
1559 case RAIDFRAME_GET_SIZE:
1560 *(int *) data = raidPtr->totalSectors;
1561 return (0);
1562
1563 /* fail a disk & optionally start reconstruction */
1564 case RAIDFRAME_FAIL_DISK80:
1565 /* Check if we called compat code for this cmd */
1566 if (retcode != EPASSTHROUGH)
1567 return EINVAL;
1568 /* FALLTHRU */
1569 case RAIDFRAME_FAIL_DISK:
1570 if (raidPtr->Layout.map->faultsTolerated == 0) {
1571 /* Can't do this on a RAID 0!! */
1572 return(EINVAL);
1573 }
1574
1575 rr = (struct rf_recon_req *) data;
1576 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1577 return (EINVAL);
1578
1579 rf_lock_mutex2(raidPtr->mutex);
1580 if (raidPtr->status == rf_rs_reconstructing) {
1581 /* you can't fail a disk while we're reconstructing! */
1582 /* XXX wrong for RAID6 */
1583 rf_unlock_mutex2(raidPtr->mutex);
1584 return (EINVAL);
1585 }
1586 if ((raidPtr->Disks[rr->col].status ==
1587 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1588 /* some other component has failed. Let's not make
1589 things worse. XXX wrong for RAID6 */
1590 rf_unlock_mutex2(raidPtr->mutex);
1591 return (EINVAL);
1592 }
1593 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1594 /* Can't fail a spared disk! */
1595 rf_unlock_mutex2(raidPtr->mutex);
1596 return (EINVAL);
1597 }
1598 rf_unlock_mutex2(raidPtr->mutex);
1599
1600 /* make a copy of the recon request so that we don't rely on
1601 * the user's buffer */
1602 RF_Malloc(rrint, sizeof(*rrint), (struct rf_recon_req_internal *));
1603 if (rrint == NULL)
1604 return(ENOMEM);
1605 rrint->col = rr->col;
1606 rrint->flags = rr->flags;
1607 rrint->raidPtr = raidPtr;
1608
1609 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1610 rf_ReconThread,
1611 rrint, "raid_recon");
1612 return (0);
1613
1614 /* invoke a copyback operation after recon on whatever disk
1615 * needs it, if any */
1616 case RAIDFRAME_COPYBACK:
1617
1618 if (raidPtr->Layout.map->faultsTolerated == 0) {
1619 /* This makes no sense on a RAID 0!! */
1620 return(EINVAL);
1621 }
1622
1623 if (raidPtr->copyback_in_progress == 1) {
1624 /* Copyback is already in progress! */
1625 return(EINVAL);
1626 }
1627
1628 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1629 rf_CopybackThread,
1630 raidPtr,"raid_copyback");
1631 return (retcode);
1632
1633 /* return the percentage completion of reconstruction */
1634 case RAIDFRAME_CHECK_RECON_STATUS:
1635 if (raidPtr->Layout.map->faultsTolerated == 0) {
1636 /* This makes no sense on a RAID 0, so tell the
1637 user it's done. */
1638 *(int *) data = 100;
1639 return(0);
1640 }
1641 if (raidPtr->status != rf_rs_reconstructing)
1642 *(int *) data = 100;
1643 else {
1644 if (raidPtr->reconControl->numRUsTotal > 0) {
1645 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1646 } else {
1647 *(int *) data = 0;
1648 }
1649 }
1650 return (0);
1651 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1652 rf_check_recon_status_ext(raidPtr, data);
1653 return (0);
1654
1655 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1656 if (raidPtr->Layout.map->faultsTolerated == 0) {
1657 /* This makes no sense on a RAID 0, so tell the
1658 user it's done. */
1659 *(int *) data = 100;
1660 return(0);
1661 }
1662 if (raidPtr->parity_rewrite_in_progress == 1) {
1663 *(int *) data = 100 *
1664 raidPtr->parity_rewrite_stripes_done /
1665 raidPtr->Layout.numStripe;
1666 } else {
1667 *(int *) data = 100;
1668 }
1669 return (0);
1670
1671 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1672 rf_check_parityrewrite_status_ext(raidPtr, data);
1673 return (0);
1674
1675 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1676 if (raidPtr->Layout.map->faultsTolerated == 0) {
1677 /* This makes no sense on a RAID 0 */
1678 *(int *) data = 100;
1679 return(0);
1680 }
1681 if (raidPtr->copyback_in_progress == 1) {
1682 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1683 raidPtr->Layout.numStripe;
1684 } else {
1685 *(int *) data = 100;
1686 }
1687 return (0);
1688
1689 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1690 rf_check_copyback_status_ext(raidPtr, data);
1691 return 0;
1692
1693 case RAIDFRAME_SET_LAST_UNIT:
1694 for (column = 0; column < raidPtr->numCol; column++)
1695 if (raidPtr->Disks[column].status != rf_ds_optimal)
1696 return EBUSY;
1697
1698 for (column = 0; column < raidPtr->numCol; column++) {
1699 clabel = raidget_component_label(raidPtr, column);
1700 clabel->last_unit = *(int *)data;
1701 raidflush_component_label(raidPtr, column);
1702 }
1703 rs->sc_cflags |= RAIDF_UNIT_CHANGED;
1704 return 0;
1705
1706 /* the sparetable daemon calls this to wait for the kernel to
1707 * need a spare table. this ioctl does not return until a
1708 * spare table is needed. XXX -- calling mpsleep here in the
1709 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1710 * -- I should either compute the spare table in the kernel,
1711 * or have a different -- XXX XXX -- interface (a different
1712 * character device) for delivering the table -- XXX */
1713 #if 0
1714 case RAIDFRAME_SPARET_WAIT:
1715 rf_lock_mutex2(rf_sparet_wait_mutex);
1716 while (!rf_sparet_wait_queue)
1717 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1718 waitreq = rf_sparet_wait_queue;
1719 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1720 rf_unlock_mutex2(rf_sparet_wait_mutex);
1721
1722 /* structure assignment */
1723 *((RF_SparetWait_t *) data) = *waitreq;
1724
1725 RF_Free(waitreq, sizeof(*waitreq));
1726 return (0);
1727
1728 /* wakes up a process waiting on SPARET_WAIT and puts an error
1729 * code in it that will cause the dameon to exit */
1730 case RAIDFRAME_ABORT_SPARET_WAIT:
1731 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1732 waitreq->fcol = -1;
1733 rf_lock_mutex2(rf_sparet_wait_mutex);
1734 waitreq->next = rf_sparet_wait_queue;
1735 rf_sparet_wait_queue = waitreq;
1736 rf_broadcast_conf2(rf_sparet_wait_cv);
1737 rf_unlock_mutex2(rf_sparet_wait_mutex);
1738 return (0);
1739
1740 /* used by the spare table daemon to deliver a spare table
1741 * into the kernel */
1742 case RAIDFRAME_SEND_SPARET:
1743
1744 /* install the spare table */
1745 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1746
1747 /* respond to the requestor. the return status of the spare
1748 * table installation is passed in the "fcol" field */
1749 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1750 waitreq->fcol = retcode;
1751 rf_lock_mutex2(rf_sparet_wait_mutex);
1752 waitreq->next = rf_sparet_resp_queue;
1753 rf_sparet_resp_queue = waitreq;
1754 rf_broadcast_cond2(rf_sparet_resp_cv);
1755 rf_unlock_mutex2(rf_sparet_wait_mutex);
1756
1757 return (retcode);
1758 #endif
1759
1760 default:
1761 break; /* fall through to the os-specific code below */
1762
1763 }
1764
1765 if (!raidPtr->valid)
1766 return (EINVAL);
1767
1768 /*
1769 * Add support for "regular" device ioctls here.
1770 */
1771
1772 switch (cmd) {
1773 case DIOCGCACHE:
1774 retcode = rf_get_component_caches(raidPtr, (int *)data);
1775 break;
1776
1777 case DIOCCACHESYNC:
1778 retcode = rf_sync_component_caches(raidPtr);
1779 break;
1780
1781 default:
1782 retcode = dk_ioctl(dksc, dev, cmd, data, flag, l);
1783 break;
1784 }
1785
1786 return (retcode);
1787
1788 }
1789
1790
1791 /* raidinit -- complete the rest of the initialization for the
1792 RAIDframe device. */
1793
1794
1795 static void
1796 raidinit(struct raid_softc *rs)
1797 {
1798 cfdata_t cf;
1799 unsigned int unit;
1800 struct dk_softc *dksc = &rs->sc_dksc;
1801 RF_Raid_t *raidPtr = &rs->sc_r;
1802 device_t dev;
1803
1804 unit = raidPtr->raidid;
1805
1806 /* XXX doesn't check bounds. */
1807 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%u", unit);
1808
1809 /* attach the pseudo device */
1810 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1811 cf->cf_name = raid_cd.cd_name;
1812 cf->cf_atname = raid_cd.cd_name;
1813 cf->cf_unit = unit;
1814 cf->cf_fstate = FSTATE_STAR;
1815
1816 dev = config_attach_pseudo(cf);
1817 if (dev == NULL) {
1818 printf("raid%d: config_attach_pseudo failed\n",
1819 raidPtr->raidid);
1820 free(cf, M_RAIDFRAME);
1821 return;
1822 }
1823
1824 /* provide a backpointer to the real softc */
1825 raidsoftc(dev) = rs;
1826
1827 /* disk_attach actually creates space for the CPU disklabel, among
1828 * other things, so it's critical to call this *BEFORE* we try putzing
1829 * with disklabels. */
1830 dk_init(dksc, dev, DKTYPE_RAID);
1831 disk_init(&dksc->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1832
1833 /* XXX There may be a weird interaction here between this, and
1834 * protectedSectors, as used in RAIDframe. */
1835
1836 rs->sc_size = raidPtr->totalSectors;
1837
1838 /* Attach dk and disk subsystems */
1839 dk_attach(dksc);
1840 disk_attach(&dksc->sc_dkdev);
1841 rf_set_geometry(rs, raidPtr);
1842
1843 bufq_alloc(&dksc->sc_bufq, "fcfs", BUFQ_SORT_RAWBLOCK);
1844
1845 /* mark unit as usuable */
1846 rs->sc_flags |= RAIDF_INITED;
1847
1848 dkwedge_discover(&dksc->sc_dkdev);
1849 }
1850
1851 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1852 /* wake up the daemon & tell it to get us a spare table
1853 * XXX
1854 * the entries in the queues should be tagged with the raidPtr
1855 * so that in the extremely rare case that two recons happen at once,
1856 * we know for which device were requesting a spare table
1857 * XXX
1858 *
1859 * XXX This code is not currently used. GO
1860 */
1861 int
1862 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1863 {
1864 int retcode;
1865
1866 rf_lock_mutex2(rf_sparet_wait_mutex);
1867 req->next = rf_sparet_wait_queue;
1868 rf_sparet_wait_queue = req;
1869 rf_broadcast_cond2(rf_sparet_wait_cv);
1870
1871 /* mpsleep unlocks the mutex */
1872 while (!rf_sparet_resp_queue) {
1873 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
1874 }
1875 req = rf_sparet_resp_queue;
1876 rf_sparet_resp_queue = req->next;
1877 rf_unlock_mutex2(rf_sparet_wait_mutex);
1878
1879 retcode = req->fcol;
1880 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1881 * alloc'd */
1882 return (retcode);
1883 }
1884 #endif
1885
1886 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1887 * bp & passes it down.
1888 * any calls originating in the kernel must use non-blocking I/O
1889 * do some extra sanity checking to return "appropriate" error values for
1890 * certain conditions (to make some standard utilities work)
1891 *
1892 * Formerly known as: rf_DoAccessKernel
1893 */
1894 void
1895 raidstart(RF_Raid_t *raidPtr)
1896 {
1897 struct raid_softc *rs;
1898 struct dk_softc *dksc;
1899
1900 rs = raidPtr->softc;
1901 dksc = &rs->sc_dksc;
1902 /* quick check to see if anything has died recently */
1903 rf_lock_mutex2(raidPtr->mutex);
1904 if (raidPtr->numNewFailures > 0) {
1905 rf_unlock_mutex2(raidPtr->mutex);
1906 rf_update_component_labels(raidPtr,
1907 RF_NORMAL_COMPONENT_UPDATE);
1908 rf_lock_mutex2(raidPtr->mutex);
1909 raidPtr->numNewFailures--;
1910 }
1911 rf_unlock_mutex2(raidPtr->mutex);
1912
1913 if ((rs->sc_flags & RAIDF_INITED) == 0) {
1914 printf("raid%d: raidstart not ready\n", raidPtr->raidid);
1915 return;
1916 }
1917
1918 dk_start(dksc, NULL);
1919 }
1920
1921 static int
1922 raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp)
1923 {
1924 RF_SectorCount_t num_blocks, pb, sum;
1925 RF_RaidAddr_t raid_addr;
1926 daddr_t blocknum;
1927 int do_async;
1928 int rc;
1929
1930 rf_lock_mutex2(raidPtr->mutex);
1931 if (raidPtr->openings == 0) {
1932 rf_unlock_mutex2(raidPtr->mutex);
1933 return EAGAIN;
1934 }
1935 rf_unlock_mutex2(raidPtr->mutex);
1936
1937 blocknum = bp->b_rawblkno;
1938
1939 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1940 (int) blocknum));
1941
1942 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1943 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1944
1945 /* *THIS* is where we adjust what block we're going to...
1946 * but DO NOT TOUCH bp->b_blkno!!! */
1947 raid_addr = blocknum;
1948
1949 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1950 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1951 sum = raid_addr + num_blocks + pb;
1952 if (1 || rf_debugKernelAccess) {
1953 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1954 (int) raid_addr, (int) sum, (int) num_blocks,
1955 (int) pb, (int) bp->b_resid));
1956 }
1957 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1958 || (sum < num_blocks) || (sum < pb)) {
1959 rc = ENOSPC;
1960 goto done;
1961 }
1962 /*
1963 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1964 */
1965
1966 if (bp->b_bcount & raidPtr->sectorMask) {
1967 rc = ENOSPC;
1968 goto done;
1969 }
1970 db1_printf(("Calling DoAccess..\n"));
1971
1972
1973 rf_lock_mutex2(raidPtr->mutex);
1974 raidPtr->openings--;
1975 rf_unlock_mutex2(raidPtr->mutex);
1976
1977 /*
1978 * Everything is async.
1979 */
1980 do_async = 1;
1981
1982 /* don't ever condition on bp->b_flags & B_WRITE.
1983 * always condition on B_READ instead */
1984
1985 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1986 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1987 do_async, raid_addr, num_blocks,
1988 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1989
1990 done:
1991 return rc;
1992 }
1993
1994 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1995
1996 int
1997 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
1998 {
1999 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2000 struct buf *bp;
2001
2002 req->queue = queue;
2003 bp = req->bp;
2004
2005 switch (req->type) {
2006 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2007 /* XXX need to do something extra here.. */
2008 /* I'm leaving this in, as I've never actually seen it used,
2009 * and I'd like folks to report it... GO */
2010 printf(("WAKEUP CALLED\n"));
2011 queue->numOutstanding++;
2012
2013 bp->b_flags = 0;
2014 bp->b_private = req;
2015
2016 KernelWakeupFunc(bp);
2017 break;
2018
2019 case RF_IO_TYPE_READ:
2020 case RF_IO_TYPE_WRITE:
2021 #if RF_ACC_TRACE > 0
2022 if (req->tracerec) {
2023 RF_ETIMER_START(req->tracerec->timer);
2024 }
2025 #endif
2026 InitBP(bp, queue->rf_cinfo->ci_vp,
2027 op, queue->rf_cinfo->ci_dev,
2028 req->sectorOffset, req->numSector,
2029 req->buf, KernelWakeupFunc, (void *) req,
2030 queue->raidPtr->logBytesPerSector, req->b_proc);
2031
2032 if (rf_debugKernelAccess) {
2033 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2034 (long) bp->b_blkno));
2035 }
2036 queue->numOutstanding++;
2037 queue->last_deq_sector = req->sectorOffset;
2038 /* acc wouldn't have been let in if there were any pending
2039 * reqs at any other priority */
2040 queue->curPriority = req->priority;
2041
2042 db1_printf(("Going for %c to unit %d col %d\n",
2043 req->type, queue->raidPtr->raidid,
2044 queue->col));
2045 db1_printf(("sector %d count %d (%d bytes) %d\n",
2046 (int) req->sectorOffset, (int) req->numSector,
2047 (int) (req->numSector <<
2048 queue->raidPtr->logBytesPerSector),
2049 (int) queue->raidPtr->logBytesPerSector));
2050
2051 /*
2052 * XXX: drop lock here since this can block at
2053 * least with backing SCSI devices. Retake it
2054 * to minimize fuss with calling interfaces.
2055 */
2056
2057 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2058 bdev_strategy(bp);
2059 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2060 break;
2061
2062 default:
2063 panic("bad req->type in rf_DispatchKernelIO");
2064 }
2065 db1_printf(("Exiting from DispatchKernelIO\n"));
2066
2067 return (0);
2068 }
2069 /* this is the callback function associated with a I/O invoked from
2070 kernel code.
2071 */
2072 static void
2073 KernelWakeupFunc(struct buf *bp)
2074 {
2075 RF_DiskQueueData_t *req = NULL;
2076 RF_DiskQueue_t *queue;
2077
2078 db1_printf(("recovering the request queue:\n"));
2079
2080 req = bp->b_private;
2081
2082 queue = (RF_DiskQueue_t *) req->queue;
2083
2084 rf_lock_mutex2(queue->raidPtr->iodone_lock);
2085
2086 #if RF_ACC_TRACE > 0
2087 if (req->tracerec) {
2088 RF_ETIMER_STOP(req->tracerec->timer);
2089 RF_ETIMER_EVAL(req->tracerec->timer);
2090 rf_lock_mutex2(rf_tracing_mutex);
2091 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2092 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2093 req->tracerec->num_phys_ios++;
2094 rf_unlock_mutex2(rf_tracing_mutex);
2095 }
2096 #endif
2097
2098 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2099 * ballistic, and mark the component as hosed... */
2100
2101 if (bp->b_error != 0) {
2102 /* Mark the disk as dead */
2103 /* but only mark it once... */
2104 /* and only if it wouldn't leave this RAID set
2105 completely broken */
2106 if (((queue->raidPtr->Disks[queue->col].status ==
2107 rf_ds_optimal) ||
2108 (queue->raidPtr->Disks[queue->col].status ==
2109 rf_ds_used_spare)) &&
2110 (queue->raidPtr->numFailures <
2111 queue->raidPtr->Layout.map->faultsTolerated)) {
2112 printf("raid%d: IO Error (%d). Marking %s as failed.\n",
2113 queue->raidPtr->raidid,
2114 bp->b_error,
2115 queue->raidPtr->Disks[queue->col].devname);
2116 queue->raidPtr->Disks[queue->col].status =
2117 rf_ds_failed;
2118 queue->raidPtr->status = rf_rs_degraded;
2119 queue->raidPtr->numFailures++;
2120 queue->raidPtr->numNewFailures++;
2121 } else { /* Disk is already dead... */
2122 /* printf("Disk already marked as dead!\n"); */
2123 }
2124
2125 }
2126
2127 /* Fill in the error value */
2128 req->error = bp->b_error;
2129
2130 /* Drop this one on the "finished" queue... */
2131 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2132
2133 /* Let the raidio thread know there is work to be done. */
2134 rf_signal_cond2(queue->raidPtr->iodone_cv);
2135
2136 rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2137 }
2138
2139
2140 /*
2141 * initialize a buf structure for doing an I/O in the kernel.
2142 */
2143 static void
2144 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2145 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2146 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2147 struct proc *b_proc)
2148 {
2149 /* bp->b_flags = B_PHYS | rw_flag; */
2150 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */
2151 bp->b_oflags = 0;
2152 bp->b_cflags = 0;
2153 bp->b_bcount = numSect << logBytesPerSector;
2154 bp->b_bufsize = bp->b_bcount;
2155 bp->b_error = 0;
2156 bp->b_dev = dev;
2157 bp->b_data = bf;
2158 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2159 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2160 if (bp->b_bcount == 0) {
2161 panic("bp->b_bcount is zero in InitBP!!");
2162 }
2163 bp->b_proc = b_proc;
2164 bp->b_iodone = cbFunc;
2165 bp->b_private = cbArg;
2166 }
2167
2168 /*
2169 * Wait interruptibly for an exclusive lock.
2170 *
2171 * XXX
2172 * Several drivers do this; it should be abstracted and made MP-safe.
2173 * (Hmm... where have we seen this warning before :-> GO )
2174 */
2175 static int
2176 raidlock(struct raid_softc *rs)
2177 {
2178 int error;
2179
2180 error = 0;
2181 mutex_enter(&rs->sc_mutex);
2182 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2183 rs->sc_flags |= RAIDF_WANTED;
2184 error = cv_wait_sig(&rs->sc_cv, &rs->sc_mutex);
2185 if (error != 0)
2186 goto done;
2187 }
2188 rs->sc_flags |= RAIDF_LOCKED;
2189 done:
2190 mutex_exit(&rs->sc_mutex);
2191 return (error);
2192 }
2193 /*
2194 * Unlock and wake up any waiters.
2195 */
2196 static void
2197 raidunlock(struct raid_softc *rs)
2198 {
2199
2200 mutex_enter(&rs->sc_mutex);
2201 rs->sc_flags &= ~RAIDF_LOCKED;
2202 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2203 rs->sc_flags &= ~RAIDF_WANTED;
2204 cv_broadcast(&rs->sc_cv);
2205 }
2206 mutex_exit(&rs->sc_mutex);
2207 }
2208
2209
2210 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2211 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2212 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2213
2214 static daddr_t
2215 rf_component_info_offset(void)
2216 {
2217
2218 return RF_COMPONENT_INFO_OFFSET;
2219 }
2220
2221 static daddr_t
2222 rf_component_info_size(unsigned secsize)
2223 {
2224 daddr_t info_size;
2225
2226 KASSERT(secsize);
2227 if (secsize > RF_COMPONENT_INFO_SIZE)
2228 info_size = secsize;
2229 else
2230 info_size = RF_COMPONENT_INFO_SIZE;
2231
2232 return info_size;
2233 }
2234
2235 static daddr_t
2236 rf_parity_map_offset(RF_Raid_t *raidPtr)
2237 {
2238 daddr_t map_offset;
2239
2240 KASSERT(raidPtr->bytesPerSector);
2241 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2242 map_offset = raidPtr->bytesPerSector;
2243 else
2244 map_offset = RF_COMPONENT_INFO_SIZE;
2245 map_offset += rf_component_info_offset();
2246
2247 return map_offset;
2248 }
2249
2250 static daddr_t
2251 rf_parity_map_size(RF_Raid_t *raidPtr)
2252 {
2253 daddr_t map_size;
2254
2255 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2256 map_size = raidPtr->bytesPerSector;
2257 else
2258 map_size = RF_PARITY_MAP_SIZE;
2259
2260 return map_size;
2261 }
2262
2263 int
2264 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2265 {
2266 RF_ComponentLabel_t *clabel;
2267
2268 clabel = raidget_component_label(raidPtr, col);
2269 clabel->clean = RF_RAID_CLEAN;
2270 raidflush_component_label(raidPtr, col);
2271 return(0);
2272 }
2273
2274
2275 int
2276 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2277 {
2278 RF_ComponentLabel_t *clabel;
2279
2280 clabel = raidget_component_label(raidPtr, col);
2281 clabel->clean = RF_RAID_DIRTY;
2282 raidflush_component_label(raidPtr, col);
2283 return(0);
2284 }
2285
2286 int
2287 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2288 {
2289 KASSERT(raidPtr->bytesPerSector);
2290 return raidread_component_label(raidPtr->bytesPerSector,
2291 raidPtr->Disks[col].dev,
2292 raidPtr->raid_cinfo[col].ci_vp,
2293 &raidPtr->raid_cinfo[col].ci_label);
2294 }
2295
2296 RF_ComponentLabel_t *
2297 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2298 {
2299 return &raidPtr->raid_cinfo[col].ci_label;
2300 }
2301
2302 int
2303 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2304 {
2305 RF_ComponentLabel_t *label;
2306
2307 label = &raidPtr->raid_cinfo[col].ci_label;
2308 label->mod_counter = raidPtr->mod_counter;
2309 #ifndef RF_NO_PARITY_MAP
2310 label->parity_map_modcount = label->mod_counter;
2311 #endif
2312 return raidwrite_component_label(raidPtr->bytesPerSector,
2313 raidPtr->Disks[col].dev,
2314 raidPtr->raid_cinfo[col].ci_vp, label);
2315 }
2316
2317
2318 static int
2319 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2320 RF_ComponentLabel_t *clabel)
2321 {
2322 return raidread_component_area(dev, b_vp, clabel,
2323 sizeof(RF_ComponentLabel_t),
2324 rf_component_info_offset(),
2325 rf_component_info_size(secsize));
2326 }
2327
2328 /* ARGSUSED */
2329 static int
2330 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2331 size_t msize, daddr_t offset, daddr_t dsize)
2332 {
2333 struct buf *bp;
2334 int error;
2335
2336 /* XXX should probably ensure that we don't try to do this if
2337 someone has changed rf_protected_sectors. */
2338
2339 if (b_vp == NULL) {
2340 /* For whatever reason, this component is not valid.
2341 Don't try to read a component label from it. */
2342 return(EINVAL);
2343 }
2344
2345 /* get a block of the appropriate size... */
2346 bp = geteblk((int)dsize);
2347 bp->b_dev = dev;
2348
2349 /* get our ducks in a row for the read */
2350 bp->b_blkno = offset / DEV_BSIZE;
2351 bp->b_bcount = dsize;
2352 bp->b_flags |= B_READ;
2353 bp->b_resid = dsize;
2354
2355 bdev_strategy(bp);
2356 error = biowait(bp);
2357
2358 if (!error) {
2359 memcpy(data, bp->b_data, msize);
2360 }
2361
2362 brelse(bp, 0);
2363 return(error);
2364 }
2365
2366
2367 static int
2368 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2369 RF_ComponentLabel_t *clabel)
2370 {
2371 return raidwrite_component_area(dev, b_vp, clabel,
2372 sizeof(RF_ComponentLabel_t),
2373 rf_component_info_offset(),
2374 rf_component_info_size(secsize), 0);
2375 }
2376
2377 /* ARGSUSED */
2378 static int
2379 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2380 size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2381 {
2382 struct buf *bp;
2383 int error;
2384
2385 /* get a block of the appropriate size... */
2386 bp = geteblk((int)dsize);
2387 bp->b_dev = dev;
2388
2389 /* get our ducks in a row for the write */
2390 bp->b_blkno = offset / DEV_BSIZE;
2391 bp->b_bcount = dsize;
2392 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2393 bp->b_resid = dsize;
2394
2395 memset(bp->b_data, 0, dsize);
2396 memcpy(bp->b_data, data, msize);
2397
2398 bdev_strategy(bp);
2399 if (asyncp)
2400 return 0;
2401 error = biowait(bp);
2402 brelse(bp, 0);
2403 if (error) {
2404 #if 1
2405 printf("Failed to write RAID component info!\n");
2406 #endif
2407 }
2408
2409 return(error);
2410 }
2411
2412 void
2413 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2414 {
2415 int c;
2416
2417 for (c = 0; c < raidPtr->numCol; c++) {
2418 /* Skip dead disks. */
2419 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2420 continue;
2421 /* XXXjld: what if an error occurs here? */
2422 raidwrite_component_area(raidPtr->Disks[c].dev,
2423 raidPtr->raid_cinfo[c].ci_vp, map,
2424 RF_PARITYMAP_NBYTE,
2425 rf_parity_map_offset(raidPtr),
2426 rf_parity_map_size(raidPtr), 0);
2427 }
2428 }
2429
2430 void
2431 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2432 {
2433 struct rf_paritymap_ondisk tmp;
2434 int c,first;
2435
2436 first=1;
2437 for (c = 0; c < raidPtr->numCol; c++) {
2438 /* Skip dead disks. */
2439 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2440 continue;
2441 raidread_component_area(raidPtr->Disks[c].dev,
2442 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2443 RF_PARITYMAP_NBYTE,
2444 rf_parity_map_offset(raidPtr),
2445 rf_parity_map_size(raidPtr));
2446 if (first) {
2447 memcpy(map, &tmp, sizeof(*map));
2448 first = 0;
2449 } else {
2450 rf_paritymap_merge(map, &tmp);
2451 }
2452 }
2453 }
2454
2455 void
2456 rf_markalldirty(RF_Raid_t *raidPtr)
2457 {
2458 RF_ComponentLabel_t *clabel;
2459 int sparecol;
2460 int c;
2461 int j;
2462 int scol = -1;
2463
2464 raidPtr->mod_counter++;
2465 for (c = 0; c < raidPtr->numCol; c++) {
2466 /* we don't want to touch (at all) a disk that has
2467 failed */
2468 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2469 clabel = raidget_component_label(raidPtr, c);
2470 if (clabel->status == rf_ds_spared) {
2471 /* XXX do something special...
2472 but whatever you do, don't
2473 try to access it!! */
2474 } else {
2475 raidmarkdirty(raidPtr, c);
2476 }
2477 }
2478 }
2479
2480 for( c = 0; c < raidPtr->numSpare ; c++) {
2481 sparecol = raidPtr->numCol + c;
2482 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2483 /*
2484
2485 we claim this disk is "optimal" if it's
2486 rf_ds_used_spare, as that means it should be
2487 directly substitutable for the disk it replaced.
2488 We note that too...
2489
2490 */
2491
2492 for(j=0;j<raidPtr->numCol;j++) {
2493 if (raidPtr->Disks[j].spareCol == sparecol) {
2494 scol = j;
2495 break;
2496 }
2497 }
2498
2499 clabel = raidget_component_label(raidPtr, sparecol);
2500 /* make sure status is noted */
2501
2502 raid_init_component_label(raidPtr, clabel);
2503
2504 clabel->row = 0;
2505 clabel->column = scol;
2506 /* Note: we *don't* change status from rf_ds_used_spare
2507 to rf_ds_optimal */
2508 /* clabel.status = rf_ds_optimal; */
2509
2510 raidmarkdirty(raidPtr, sparecol);
2511 }
2512 }
2513 }
2514
2515
2516 void
2517 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2518 {
2519 RF_ComponentLabel_t *clabel;
2520 int sparecol;
2521 int c;
2522 int j;
2523 int scol;
2524 struct raid_softc *rs = raidPtr->softc;
2525
2526 scol = -1;
2527
2528 /* XXX should do extra checks to make sure things really are clean,
2529 rather than blindly setting the clean bit... */
2530
2531 raidPtr->mod_counter++;
2532
2533 for (c = 0; c < raidPtr->numCol; c++) {
2534 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2535 clabel = raidget_component_label(raidPtr, c);
2536 /* make sure status is noted */
2537 clabel->status = rf_ds_optimal;
2538
2539 /* note what unit we are configured as */
2540 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2541 clabel->last_unit = raidPtr->raidid;
2542
2543 raidflush_component_label(raidPtr, c);
2544 if (final == RF_FINAL_COMPONENT_UPDATE) {
2545 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2546 raidmarkclean(raidPtr, c);
2547 }
2548 }
2549 }
2550 /* else we don't touch it.. */
2551 }
2552
2553 for( c = 0; c < raidPtr->numSpare ; c++) {
2554 sparecol = raidPtr->numCol + c;
2555 /* Need to ensure that the reconstruct actually completed! */
2556 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2557 /*
2558
2559 we claim this disk is "optimal" if it's
2560 rf_ds_used_spare, as that means it should be
2561 directly substitutable for the disk it replaced.
2562 We note that too...
2563
2564 */
2565
2566 for(j=0;j<raidPtr->numCol;j++) {
2567 if (raidPtr->Disks[j].spareCol == sparecol) {
2568 scol = j;
2569 break;
2570 }
2571 }
2572
2573 /* XXX shouldn't *really* need this... */
2574 clabel = raidget_component_label(raidPtr, sparecol);
2575 /* make sure status is noted */
2576
2577 raid_init_component_label(raidPtr, clabel);
2578
2579 clabel->column = scol;
2580 clabel->status = rf_ds_optimal;
2581 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2582 clabel->last_unit = raidPtr->raidid;
2583
2584 raidflush_component_label(raidPtr, sparecol);
2585 if (final == RF_FINAL_COMPONENT_UPDATE) {
2586 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2587 raidmarkclean(raidPtr, sparecol);
2588 }
2589 }
2590 }
2591 }
2592 }
2593
2594 void
2595 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2596 {
2597
2598 if (vp != NULL) {
2599 if (auto_configured == 1) {
2600 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2601 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2602 vput(vp);
2603
2604 } else {
2605 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2606 }
2607 }
2608 }
2609
2610
2611 void
2612 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2613 {
2614 int r,c;
2615 struct vnode *vp;
2616 int acd;
2617
2618
2619 /* We take this opportunity to close the vnodes like we should.. */
2620
2621 for (c = 0; c < raidPtr->numCol; c++) {
2622 vp = raidPtr->raid_cinfo[c].ci_vp;
2623 acd = raidPtr->Disks[c].auto_configured;
2624 rf_close_component(raidPtr, vp, acd);
2625 raidPtr->raid_cinfo[c].ci_vp = NULL;
2626 raidPtr->Disks[c].auto_configured = 0;
2627 }
2628
2629 for (r = 0; r < raidPtr->numSpare; r++) {
2630 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2631 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2632 rf_close_component(raidPtr, vp, acd);
2633 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2634 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2635 }
2636 }
2637
2638
2639 void
2640 rf_ReconThread(struct rf_recon_req_internal *req)
2641 {
2642 int s;
2643 RF_Raid_t *raidPtr;
2644
2645 s = splbio();
2646 raidPtr = (RF_Raid_t *) req->raidPtr;
2647 raidPtr->recon_in_progress = 1;
2648
2649 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2650 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2651
2652 RF_Free(req, sizeof(*req));
2653
2654 raidPtr->recon_in_progress = 0;
2655 splx(s);
2656
2657 /* That's all... */
2658 kthread_exit(0); /* does not return */
2659 }
2660
2661 void
2662 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2663 {
2664 int retcode;
2665 int s;
2666
2667 raidPtr->parity_rewrite_stripes_done = 0;
2668 raidPtr->parity_rewrite_in_progress = 1;
2669 s = splbio();
2670 retcode = rf_RewriteParity(raidPtr);
2671 splx(s);
2672 if (retcode) {
2673 printf("raid%d: Error re-writing parity (%d)!\n",
2674 raidPtr->raidid, retcode);
2675 } else {
2676 /* set the clean bit! If we shutdown correctly,
2677 the clean bit on each component label will get
2678 set */
2679 raidPtr->parity_good = RF_RAID_CLEAN;
2680 }
2681 raidPtr->parity_rewrite_in_progress = 0;
2682
2683 /* Anyone waiting for us to stop? If so, inform them... */
2684 if (raidPtr->waitShutdown) {
2685 rf_lock_mutex2(raidPtr->rad_lock);
2686 cv_broadcast(&raidPtr->parity_rewrite_cv);
2687 rf_unlock_mutex2(raidPtr->rad_lock);
2688 }
2689
2690 /* That's all... */
2691 kthread_exit(0); /* does not return */
2692 }
2693
2694
2695 void
2696 rf_CopybackThread(RF_Raid_t *raidPtr)
2697 {
2698 int s;
2699
2700 raidPtr->copyback_in_progress = 1;
2701 s = splbio();
2702 rf_CopybackReconstructedData(raidPtr);
2703 splx(s);
2704 raidPtr->copyback_in_progress = 0;
2705
2706 /* That's all... */
2707 kthread_exit(0); /* does not return */
2708 }
2709
2710
2711 void
2712 rf_ReconstructInPlaceThread(struct rf_recon_req_internal *req)
2713 {
2714 int s;
2715 RF_Raid_t *raidPtr;
2716
2717 s = splbio();
2718 raidPtr = req->raidPtr;
2719 raidPtr->recon_in_progress = 1;
2720 rf_ReconstructInPlace(raidPtr, req->col);
2721 RF_Free(req, sizeof(*req));
2722 raidPtr->recon_in_progress = 0;
2723 splx(s);
2724
2725 /* That's all... */
2726 kthread_exit(0); /* does not return */
2727 }
2728
2729 static RF_AutoConfig_t *
2730 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
2731 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
2732 unsigned secsize)
2733 {
2734 int good_one = 0;
2735 RF_ComponentLabel_t *clabel;
2736 RF_AutoConfig_t *ac;
2737
2738 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
2739 if (clabel == NULL) {
2740 oomem:
2741 while(ac_list) {
2742 ac = ac_list;
2743 if (ac->clabel)
2744 free(ac->clabel, M_RAIDFRAME);
2745 ac_list = ac_list->next;
2746 free(ac, M_RAIDFRAME);
2747 }
2748 printf("RAID auto config: out of memory!\n");
2749 return NULL; /* XXX probably should panic? */
2750 }
2751
2752 if (!raidread_component_label(secsize, dev, vp, clabel)) {
2753 /* Got the label. Does it look reasonable? */
2754 if (rf_reasonable_label(clabel, numsecs) &&
2755 (rf_component_label_partitionsize(clabel) <= size)) {
2756 #ifdef DEBUG
2757 printf("Component on: %s: %llu\n",
2758 cname, (unsigned long long)size);
2759 rf_print_component_label(clabel);
2760 #endif
2761 /* if it's reasonable, add it, else ignore it. */
2762 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
2763 M_NOWAIT);
2764 if (ac == NULL) {
2765 free(clabel, M_RAIDFRAME);
2766 goto oomem;
2767 }
2768 strlcpy(ac->devname, cname, sizeof(ac->devname));
2769 ac->dev = dev;
2770 ac->vp = vp;
2771 ac->clabel = clabel;
2772 ac->next = ac_list;
2773 ac_list = ac;
2774 good_one = 1;
2775 }
2776 }
2777 if (!good_one) {
2778 /* cleanup */
2779 free(clabel, M_RAIDFRAME);
2780 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2781 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2782 vput(vp);
2783 }
2784 return ac_list;
2785 }
2786
2787 RF_AutoConfig_t *
2788 rf_find_raid_components(void)
2789 {
2790 struct vnode *vp;
2791 struct disklabel label;
2792 device_t dv;
2793 deviter_t di;
2794 dev_t dev;
2795 int bmajor, bminor, wedge, rf_part_found;
2796 int error;
2797 int i;
2798 RF_AutoConfig_t *ac_list;
2799 uint64_t numsecs;
2800 unsigned secsize;
2801 int dowedges;
2802
2803 /* initialize the AutoConfig list */
2804 ac_list = NULL;
2805
2806 /*
2807 * we begin by trolling through *all* the devices on the system *twice*
2808 * first we scan for wedges, second for other devices. This avoids
2809 * using a raw partition instead of a wedge that covers the whole disk
2810 */
2811
2812 for (dowedges=1; dowedges>=0; --dowedges) {
2813 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
2814 dv = deviter_next(&di)) {
2815
2816 /* we are only interested in disks... */
2817 if (device_class(dv) != DV_DISK)
2818 continue;
2819
2820 /* we don't care about floppies... */
2821 if (device_is_a(dv, "fd")) {
2822 continue;
2823 }
2824
2825 /* we don't care about CD's... */
2826 if (device_is_a(dv, "cd")) {
2827 continue;
2828 }
2829
2830 /* we don't care about md's... */
2831 if (device_is_a(dv, "md")) {
2832 continue;
2833 }
2834
2835 /* hdfd is the Atari/Hades floppy driver */
2836 if (device_is_a(dv, "hdfd")) {
2837 continue;
2838 }
2839
2840 /* fdisa is the Atari/Milan floppy driver */
2841 if (device_is_a(dv, "fdisa")) {
2842 continue;
2843 }
2844
2845 /* are we in the wedges pass ? */
2846 wedge = device_is_a(dv, "dk");
2847 if (wedge != dowedges) {
2848 continue;
2849 }
2850
2851 /* need to find the device_name_to_block_device_major stuff */
2852 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
2853
2854 rf_part_found = 0; /*No raid partition as yet*/
2855
2856 /* get a vnode for the raw partition of this disk */
2857 bminor = minor(device_unit(dv));
2858 dev = wedge ? makedev(bmajor, bminor) :
2859 MAKEDISKDEV(bmajor, bminor, RAW_PART);
2860 if (bdevvp(dev, &vp))
2861 panic("RAID can't alloc vnode");
2862
2863 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
2864
2865 if (error) {
2866 /* "Who cares." Continue looking
2867 for something that exists*/
2868 vput(vp);
2869 continue;
2870 }
2871
2872 error = getdisksize(vp, &numsecs, &secsize);
2873 if (error) {
2874 /*
2875 * Pseudo devices like vnd and cgd can be
2876 * opened but may still need some configuration.
2877 * Ignore these quietly.
2878 */
2879 if (error != ENXIO)
2880 printf("RAIDframe: can't get disk size"
2881 " for dev %s (%d)\n",
2882 device_xname(dv), error);
2883 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2884 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2885 vput(vp);
2886 continue;
2887 }
2888 if (wedge) {
2889 struct dkwedge_info dkw;
2890 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
2891 NOCRED);
2892 if (error) {
2893 printf("RAIDframe: can't get wedge info for "
2894 "dev %s (%d)\n", device_xname(dv), error);
2895 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2896 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2897 vput(vp);
2898 continue;
2899 }
2900
2901 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
2902 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2903 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2904 vput(vp);
2905 continue;
2906 }
2907
2908 ac_list = rf_get_component(ac_list, dev, vp,
2909 device_xname(dv), dkw.dkw_size, numsecs, secsize);
2910 rf_part_found = 1; /*There is a raid component on this disk*/
2911 continue;
2912 }
2913
2914 /* Ok, the disk exists. Go get the disklabel. */
2915 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
2916 if (error) {
2917 /*
2918 * XXX can't happen - open() would
2919 * have errored out (or faked up one)
2920 */
2921 if (error != ENOTTY)
2922 printf("RAIDframe: can't get label for dev "
2923 "%s (%d)\n", device_xname(dv), error);
2924 }
2925
2926 /* don't need this any more. We'll allocate it again
2927 a little later if we really do... */
2928 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2929 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2930 vput(vp);
2931
2932 if (error)
2933 continue;
2934
2935 rf_part_found = 0; /*No raid partitions yet*/
2936 for (i = 0; i < label.d_npartitions; i++) {
2937 char cname[sizeof(ac_list->devname)];
2938
2939 /* We only support partitions marked as RAID */
2940 if (label.d_partitions[i].p_fstype != FS_RAID)
2941 continue;
2942
2943 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
2944 if (bdevvp(dev, &vp))
2945 panic("RAID can't alloc vnode");
2946
2947 error = VOP_OPEN(vp, FREAD, NOCRED);
2948 if (error) {
2949 /* Whatever... */
2950 vput(vp);
2951 continue;
2952 }
2953 snprintf(cname, sizeof(cname), "%s%c",
2954 device_xname(dv), 'a' + i);
2955 ac_list = rf_get_component(ac_list, dev, vp, cname,
2956 label.d_partitions[i].p_size, numsecs, secsize);
2957 rf_part_found = 1; /*There is at least one raid partition on this disk*/
2958 }
2959
2960 /*
2961 *If there is no raid component on this disk, either in a
2962 *disklabel or inside a wedge, check the raw partition as well,
2963 *as it is possible to configure raid components on raw disk
2964 *devices.
2965 */
2966
2967 if (!rf_part_found) {
2968 char cname[sizeof(ac_list->devname)];
2969
2970 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
2971 if (bdevvp(dev, &vp))
2972 panic("RAID can't alloc vnode");
2973
2974 error = VOP_OPEN(vp, FREAD, NOCRED);
2975 if (error) {
2976 /* Whatever... */
2977 vput(vp);
2978 continue;
2979 }
2980 snprintf(cname, sizeof(cname), "%s%c",
2981 device_xname(dv), 'a' + RAW_PART);
2982 ac_list = rf_get_component(ac_list, dev, vp, cname,
2983 label.d_partitions[RAW_PART].p_size, numsecs, secsize);
2984 }
2985 }
2986 deviter_release(&di);
2987 }
2988 return ac_list;
2989 }
2990
2991
2992 int
2993 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
2994 {
2995
2996 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2997 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2998 ((clabel->clean == RF_RAID_CLEAN) ||
2999 (clabel->clean == RF_RAID_DIRTY)) &&
3000 clabel->row >=0 &&
3001 clabel->column >= 0 &&
3002 clabel->num_rows > 0 &&
3003 clabel->num_columns > 0 &&
3004 clabel->row < clabel->num_rows &&
3005 clabel->column < clabel->num_columns &&
3006 clabel->blockSize > 0 &&
3007 /*
3008 * numBlocksHi may contain garbage, but it is ok since
3009 * the type is unsigned. If it is really garbage,
3010 * rf_fix_old_label_size() will fix it.
3011 */
3012 rf_component_label_numblocks(clabel) > 0) {
3013 /*
3014 * label looks reasonable enough...
3015 * let's make sure it has no old garbage.
3016 */
3017 if (numsecs)
3018 rf_fix_old_label_size(clabel, numsecs);
3019 return(1);
3020 }
3021 return(0);
3022 }
3023
3024
3025 /*
3026 * For reasons yet unknown, some old component labels have garbage in
3027 * the newer numBlocksHi region, and this causes lossage. Since those
3028 * disks will also have numsecs set to less than 32 bits of sectors,
3029 * we can determine when this corruption has occurred, and fix it.
3030 *
3031 * The exact same problem, with the same unknown reason, happens to
3032 * the partitionSizeHi member as well.
3033 */
3034 static void
3035 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3036 {
3037
3038 if (numsecs < ((uint64_t)1 << 32)) {
3039 if (clabel->numBlocksHi) {
3040 printf("WARNING: total sectors < 32 bits, yet "
3041 "numBlocksHi set\n"
3042 "WARNING: resetting numBlocksHi to zero.\n");
3043 clabel->numBlocksHi = 0;
3044 }
3045
3046 if (clabel->partitionSizeHi) {
3047 printf("WARNING: total sectors < 32 bits, yet "
3048 "partitionSizeHi set\n"
3049 "WARNING: resetting partitionSizeHi to zero.\n");
3050 clabel->partitionSizeHi = 0;
3051 }
3052 }
3053 }
3054
3055
3056 #ifdef DEBUG
3057 void
3058 rf_print_component_label(RF_ComponentLabel_t *clabel)
3059 {
3060 uint64_t numBlocks;
3061 static const char *rp[] = {
3062 "No", "Force", "Soft", "*invalid*"
3063 };
3064
3065
3066 numBlocks = rf_component_label_numblocks(clabel);
3067
3068 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3069 clabel->row, clabel->column,
3070 clabel->num_rows, clabel->num_columns);
3071 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3072 clabel->version, clabel->serial_number,
3073 clabel->mod_counter);
3074 printf(" Clean: %s Status: %d\n",
3075 clabel->clean ? "Yes" : "No", clabel->status);
3076 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3077 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3078 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3079 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3080 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3081 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]);
3082 printf(" Last configured as: raid%d\n", clabel->last_unit);
3083 #if 0
3084 printf(" Config order: %d\n", clabel->config_order);
3085 #endif
3086
3087 }
3088 #endif
3089
3090 RF_ConfigSet_t *
3091 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3092 {
3093 RF_AutoConfig_t *ac;
3094 RF_ConfigSet_t *config_sets;
3095 RF_ConfigSet_t *cset;
3096 RF_AutoConfig_t *ac_next;
3097
3098
3099 config_sets = NULL;
3100
3101 /* Go through the AutoConfig list, and figure out which components
3102 belong to what sets. */
3103 ac = ac_list;
3104 while(ac!=NULL) {
3105 /* we're going to putz with ac->next, so save it here
3106 for use at the end of the loop */
3107 ac_next = ac->next;
3108
3109 if (config_sets == NULL) {
3110 /* will need at least this one... */
3111 config_sets = (RF_ConfigSet_t *)
3112 malloc(sizeof(RF_ConfigSet_t),
3113 M_RAIDFRAME, M_NOWAIT);
3114 if (config_sets == NULL) {
3115 panic("rf_create_auto_sets: No memory!");
3116 }
3117 /* this one is easy :) */
3118 config_sets->ac = ac;
3119 config_sets->next = NULL;
3120 config_sets->rootable = 0;
3121 ac->next = NULL;
3122 } else {
3123 /* which set does this component fit into? */
3124 cset = config_sets;
3125 while(cset!=NULL) {
3126 if (rf_does_it_fit(cset, ac)) {
3127 /* looks like it matches... */
3128 ac->next = cset->ac;
3129 cset->ac = ac;
3130 break;
3131 }
3132 cset = cset->next;
3133 }
3134 if (cset==NULL) {
3135 /* didn't find a match above... new set..*/
3136 cset = (RF_ConfigSet_t *)
3137 malloc(sizeof(RF_ConfigSet_t),
3138 M_RAIDFRAME, M_NOWAIT);
3139 if (cset == NULL) {
3140 panic("rf_create_auto_sets: No memory!");
3141 }
3142 cset->ac = ac;
3143 ac->next = NULL;
3144 cset->next = config_sets;
3145 cset->rootable = 0;
3146 config_sets = cset;
3147 }
3148 }
3149 ac = ac_next;
3150 }
3151
3152
3153 return(config_sets);
3154 }
3155
3156 static int
3157 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3158 {
3159 RF_ComponentLabel_t *clabel1, *clabel2;
3160
3161 /* If this one matches the *first* one in the set, that's good
3162 enough, since the other members of the set would have been
3163 through here too... */
3164 /* note that we are not checking partitionSize here..
3165
3166 Note that we are also not checking the mod_counters here.
3167 If everything else matches except the mod_counter, that's
3168 good enough for this test. We will deal with the mod_counters
3169 a little later in the autoconfiguration process.
3170
3171 (clabel1->mod_counter == clabel2->mod_counter) &&
3172
3173 The reason we don't check for this is that failed disks
3174 will have lower modification counts. If those disks are
3175 not added to the set they used to belong to, then they will
3176 form their own set, which may result in 2 different sets,
3177 for example, competing to be configured at raid0, and
3178 perhaps competing to be the root filesystem set. If the
3179 wrong ones get configured, or both attempt to become /,
3180 weird behaviour and or serious lossage will occur. Thus we
3181 need to bring them into the fold here, and kick them out at
3182 a later point.
3183
3184 */
3185
3186 clabel1 = cset->ac->clabel;
3187 clabel2 = ac->clabel;
3188 if ((clabel1->version == clabel2->version) &&
3189 (clabel1->serial_number == clabel2->serial_number) &&
3190 (clabel1->num_rows == clabel2->num_rows) &&
3191 (clabel1->num_columns == clabel2->num_columns) &&
3192 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3193 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3194 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3195 (clabel1->parityConfig == clabel2->parityConfig) &&
3196 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3197 (clabel1->blockSize == clabel2->blockSize) &&
3198 rf_component_label_numblocks(clabel1) ==
3199 rf_component_label_numblocks(clabel2) &&
3200 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3201 (clabel1->root_partition == clabel2->root_partition) &&
3202 (clabel1->last_unit == clabel2->last_unit) &&
3203 (clabel1->config_order == clabel2->config_order)) {
3204 /* if it get's here, it almost *has* to be a match */
3205 } else {
3206 /* it's not consistent with somebody in the set..
3207 punt */
3208 return(0);
3209 }
3210 /* all was fine.. it must fit... */
3211 return(1);
3212 }
3213
3214 int
3215 rf_have_enough_components(RF_ConfigSet_t *cset)
3216 {
3217 RF_AutoConfig_t *ac;
3218 RF_AutoConfig_t *auto_config;
3219 RF_ComponentLabel_t *clabel;
3220 int c;
3221 int num_cols;
3222 int num_missing;
3223 int mod_counter;
3224 int mod_counter_found;
3225 int even_pair_failed;
3226 char parity_type;
3227
3228
3229 /* check to see that we have enough 'live' components
3230 of this set. If so, we can configure it if necessary */
3231
3232 num_cols = cset->ac->clabel->num_columns;
3233 parity_type = cset->ac->clabel->parityConfig;
3234
3235 /* XXX Check for duplicate components!?!?!? */
3236
3237 /* Determine what the mod_counter is supposed to be for this set. */
3238
3239 mod_counter_found = 0;
3240 mod_counter = 0;
3241 ac = cset->ac;
3242 while(ac!=NULL) {
3243 if (mod_counter_found==0) {
3244 mod_counter = ac->clabel->mod_counter;
3245 mod_counter_found = 1;
3246 } else {
3247 if (ac->clabel->mod_counter > mod_counter) {
3248 mod_counter = ac->clabel->mod_counter;
3249 }
3250 }
3251 ac = ac->next;
3252 }
3253
3254 num_missing = 0;
3255 auto_config = cset->ac;
3256
3257 even_pair_failed = 0;
3258 for(c=0; c<num_cols; c++) {
3259 ac = auto_config;
3260 while(ac!=NULL) {
3261 if ((ac->clabel->column == c) &&
3262 (ac->clabel->mod_counter == mod_counter)) {
3263 /* it's this one... */
3264 #ifdef DEBUG
3265 printf("Found: %s at %d\n",
3266 ac->devname,c);
3267 #endif
3268 break;
3269 }
3270 ac=ac->next;
3271 }
3272 if (ac==NULL) {
3273 /* Didn't find one here! */
3274 /* special case for RAID 1, especially
3275 where there are more than 2
3276 components (where RAIDframe treats
3277 things a little differently :( ) */
3278 if (parity_type == '1') {
3279 if (c%2 == 0) { /* even component */
3280 even_pair_failed = 1;
3281 } else { /* odd component. If
3282 we're failed, and
3283 so is the even
3284 component, it's
3285 "Good Night, Charlie" */
3286 if (even_pair_failed == 1) {
3287 return(0);
3288 }
3289 }
3290 } else {
3291 /* normal accounting */
3292 num_missing++;
3293 }
3294 }
3295 if ((parity_type == '1') && (c%2 == 1)) {
3296 /* Just did an even component, and we didn't
3297 bail.. reset the even_pair_failed flag,
3298 and go on to the next component.... */
3299 even_pair_failed = 0;
3300 }
3301 }
3302
3303 clabel = cset->ac->clabel;
3304
3305 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3306 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3307 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3308 /* XXX this needs to be made *much* more general */
3309 /* Too many failures */
3310 return(0);
3311 }
3312 /* otherwise, all is well, and we've got enough to take a kick
3313 at autoconfiguring this set */
3314 return(1);
3315 }
3316
3317 void
3318 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3319 RF_Raid_t *raidPtr)
3320 {
3321 RF_ComponentLabel_t *clabel;
3322 int i;
3323
3324 clabel = ac->clabel;
3325
3326 /* 1. Fill in the common stuff */
3327 config->numCol = clabel->num_columns;
3328 config->numSpare = 0; /* XXX should this be set here? */
3329 config->sectPerSU = clabel->sectPerSU;
3330 config->SUsPerPU = clabel->SUsPerPU;
3331 config->SUsPerRU = clabel->SUsPerRU;
3332 config->parityConfig = clabel->parityConfig;
3333 /* XXX... */
3334 strcpy(config->diskQueueType,"fifo");
3335 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3336 config->layoutSpecificSize = 0; /* XXX ?? */
3337
3338 while(ac!=NULL) {
3339 /* row/col values will be in range due to the checks
3340 in reasonable_label() */
3341 strcpy(config->devnames[0][ac->clabel->column],
3342 ac->devname);
3343 ac = ac->next;
3344 }
3345
3346 for(i=0;i<RF_MAXDBGV;i++) {
3347 config->debugVars[i][0] = 0;
3348 }
3349 }
3350
3351 int
3352 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3353 {
3354 RF_ComponentLabel_t *clabel;
3355 int column;
3356 int sparecol;
3357
3358 raidPtr->autoconfigure = new_value;
3359
3360 for(column=0; column<raidPtr->numCol; column++) {
3361 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3362 clabel = raidget_component_label(raidPtr, column);
3363 clabel->autoconfigure = new_value;
3364 raidflush_component_label(raidPtr, column);
3365 }
3366 }
3367 for(column = 0; column < raidPtr->numSpare ; column++) {
3368 sparecol = raidPtr->numCol + column;
3369 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3370 clabel = raidget_component_label(raidPtr, sparecol);
3371 clabel->autoconfigure = new_value;
3372 raidflush_component_label(raidPtr, sparecol);
3373 }
3374 }
3375 return(new_value);
3376 }
3377
3378 int
3379 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3380 {
3381 RF_ComponentLabel_t *clabel;
3382 int column;
3383 int sparecol;
3384
3385 raidPtr->root_partition = new_value;
3386 for(column=0; column<raidPtr->numCol; column++) {
3387 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3388 clabel = raidget_component_label(raidPtr, column);
3389 clabel->root_partition = new_value;
3390 raidflush_component_label(raidPtr, column);
3391 }
3392 }
3393 for(column = 0; column < raidPtr->numSpare ; column++) {
3394 sparecol = raidPtr->numCol + column;
3395 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3396 clabel = raidget_component_label(raidPtr, sparecol);
3397 clabel->root_partition = new_value;
3398 raidflush_component_label(raidPtr, sparecol);
3399 }
3400 }
3401 return(new_value);
3402 }
3403
3404 void
3405 rf_release_all_vps(RF_ConfigSet_t *cset)
3406 {
3407 RF_AutoConfig_t *ac;
3408
3409 ac = cset->ac;
3410 while(ac!=NULL) {
3411 /* Close the vp, and give it back */
3412 if (ac->vp) {
3413 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3414 VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
3415 vput(ac->vp);
3416 ac->vp = NULL;
3417 }
3418 ac = ac->next;
3419 }
3420 }
3421
3422
3423 void
3424 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3425 {
3426 RF_AutoConfig_t *ac;
3427 RF_AutoConfig_t *next_ac;
3428
3429 ac = cset->ac;
3430 while(ac!=NULL) {
3431 next_ac = ac->next;
3432 /* nuke the label */
3433 free(ac->clabel, M_RAIDFRAME);
3434 /* cleanup the config structure */
3435 free(ac, M_RAIDFRAME);
3436 /* "next.." */
3437 ac = next_ac;
3438 }
3439 /* and, finally, nuke the config set */
3440 free(cset, M_RAIDFRAME);
3441 }
3442
3443
3444 void
3445 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3446 {
3447 /* current version number */
3448 clabel->version = RF_COMPONENT_LABEL_VERSION;
3449 clabel->serial_number = raidPtr->serial_number;
3450 clabel->mod_counter = raidPtr->mod_counter;
3451
3452 clabel->num_rows = 1;
3453 clabel->num_columns = raidPtr->numCol;
3454 clabel->clean = RF_RAID_DIRTY; /* not clean */
3455 clabel->status = rf_ds_optimal; /* "It's good!" */
3456
3457 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3458 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3459 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3460
3461 clabel->blockSize = raidPtr->bytesPerSector;
3462 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3463
3464 /* XXX not portable */
3465 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3466 clabel->maxOutstanding = raidPtr->maxOutstanding;
3467 clabel->autoconfigure = raidPtr->autoconfigure;
3468 clabel->root_partition = raidPtr->root_partition;
3469 clabel->last_unit = raidPtr->raidid;
3470 clabel->config_order = raidPtr->config_order;
3471
3472 #ifndef RF_NO_PARITY_MAP
3473 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3474 #endif
3475 }
3476
3477 struct raid_softc *
3478 rf_auto_config_set(RF_ConfigSet_t *cset)
3479 {
3480 RF_Raid_t *raidPtr;
3481 RF_Config_t *config;
3482 int raidID;
3483 struct raid_softc *sc;
3484
3485 #ifdef DEBUG
3486 printf("RAID autoconfigure\n");
3487 #endif
3488
3489 /* 1. Create a config structure */
3490 config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO);
3491 if (config == NULL) {
3492 printf("%s: Out of mem - config!?!?\n", __func__);
3493 /* XXX do something more intelligent here. */
3494 return NULL;
3495 }
3496
3497 /*
3498 2. Figure out what RAID ID this one is supposed to live at
3499 See if we can get the same RAID dev that it was configured
3500 on last time..
3501 */
3502
3503 raidID = cset->ac->clabel->last_unit;
3504 for (sc = raidget(raidID, false); sc && sc->sc_r.valid != 0;
3505 sc = raidget(++raidID, false))
3506 continue;
3507 #ifdef DEBUG
3508 printf("Configuring raid%d:\n",raidID);
3509 #endif
3510
3511 if (sc == NULL)
3512 sc = raidget(raidID, true);
3513 if (sc == NULL) {
3514 printf("%s: Out of mem - softc!?!?\n", __func__);
3515 /* XXX do something more intelligent here. */
3516 free(config, M_RAIDFRAME);
3517 return NULL;
3518 }
3519
3520 raidPtr = &sc->sc_r;
3521
3522 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3523 raidPtr->softc = sc;
3524 raidPtr->raidid = raidID;
3525 raidPtr->openings = RAIDOUTSTANDING;
3526
3527 /* 3. Build the configuration structure */
3528 rf_create_configuration(cset->ac, config, raidPtr);
3529
3530 /* 4. Do the configuration */
3531 if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3532 raidinit(sc);
3533
3534 rf_markalldirty(raidPtr);
3535 raidPtr->autoconfigure = 1; /* XXX do this here? */
3536 switch (cset->ac->clabel->root_partition) {
3537 case 1: /* Force Root */
3538 case 2: /* Soft Root: root when boot partition part of raid */
3539 /*
3540 * everything configured just fine. Make a note
3541 * that this set is eligible to be root,
3542 * or forced to be root
3543 */
3544 cset->rootable = cset->ac->clabel->root_partition;
3545 /* XXX do this here? */
3546 raidPtr->root_partition = cset->rootable;
3547 break;
3548 default:
3549 break;
3550 }
3551 } else {
3552 raidput(sc);
3553 sc = NULL;
3554 }
3555
3556 /* 5. Cleanup */
3557 free(config, M_RAIDFRAME);
3558 return sc;
3559 }
3560
3561 void
3562 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3563 size_t xmin, size_t xmax)
3564 {
3565 int error;
3566
3567 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3568 pool_sethiwat(p, xmax);
3569 if ((error = pool_prime(p, xmin)) != 0)
3570 panic("%s: failed to prime pool: %d", __func__, error);
3571 pool_setlowat(p, xmin);
3572 }
3573
3574 /*
3575 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buffer queue
3576 * to see if there is IO pending and if that IO could possibly be done
3577 * for a given RAID set. Returns 0 if IO is waiting and can be done, 1
3578 * otherwise.
3579 *
3580 */
3581 int
3582 rf_buf_queue_check(RF_Raid_t *raidPtr)
3583 {
3584 struct raid_softc *rs;
3585 struct dk_softc *dksc;
3586
3587 rs = raidPtr->softc;
3588 dksc = &rs->sc_dksc;
3589
3590 if ((rs->sc_flags & RAIDF_INITED) == 0)
3591 return 1;
3592
3593 if (dk_strategy_pending(dksc) && raidPtr->openings > 0) {
3594 /* there is work to do */
3595 return 0;
3596 }
3597 /* default is nothing to do */
3598 return 1;
3599 }
3600
3601 int
3602 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3603 {
3604 uint64_t numsecs;
3605 unsigned secsize;
3606 int error;
3607
3608 error = getdisksize(vp, &numsecs, &secsize);
3609 if (error == 0) {
3610 diskPtr->blockSize = secsize;
3611 diskPtr->numBlocks = numsecs - rf_protectedSectors;
3612 diskPtr->partitionSize = numsecs;
3613 return 0;
3614 }
3615 return error;
3616 }
3617
3618 static int
3619 raid_match(device_t self, cfdata_t cfdata, void *aux)
3620 {
3621 return 1;
3622 }
3623
3624 static void
3625 raid_attach(device_t parent, device_t self, void *aux)
3626 {
3627 }
3628
3629
3630 static int
3631 raid_detach(device_t self, int flags)
3632 {
3633 int error;
3634 struct raid_softc *rs = raidsoftc(self);
3635
3636 if (rs == NULL)
3637 return ENXIO;
3638
3639 if ((error = raidlock(rs)) != 0)
3640 return (error);
3641
3642 error = raid_detach_unlocked(rs);
3643
3644 raidunlock(rs);
3645
3646 /* XXX raid can be referenced here */
3647
3648 if (error)
3649 return error;
3650
3651 /* Free the softc */
3652 raidput(rs);
3653
3654 return 0;
3655 }
3656
3657 static void
3658 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
3659 {
3660 struct dk_softc *dksc = &rs->sc_dksc;
3661 struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
3662
3663 memset(dg, 0, sizeof(*dg));
3664
3665 dg->dg_secperunit = raidPtr->totalSectors;
3666 dg->dg_secsize = raidPtr->bytesPerSector;
3667 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
3668 dg->dg_ntracks = 4 * raidPtr->numCol;
3669
3670 disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, NULL);
3671 }
3672
3673 /*
3674 * Get cache info for all the components (including spares).
3675 * Returns intersection of all the cache flags of all disks, or first
3676 * error if any encountered.
3677 * XXXfua feature flags can change as spares are added - lock down somehow
3678 */
3679 static int
3680 rf_get_component_caches(RF_Raid_t *raidPtr, int *data)
3681 {
3682 int c;
3683 int error;
3684 int dkwhole = 0, dkpart;
3685
3686 for (c = 0; c < raidPtr->numCol + raidPtr->numSpare; c++) {
3687 /*
3688 * Check any non-dead disk, even when currently being
3689 * reconstructed.
3690 */
3691 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)
3692 || raidPtr->Disks[c].status == rf_ds_reconstructing) {
3693 error = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp,
3694 DIOCGCACHE, &dkpart, FREAD, NOCRED);
3695 if (error) {
3696 if (error != ENODEV) {
3697 printf("raid%d: get cache for component %s failed\n",
3698 raidPtr->raidid,
3699 raidPtr->Disks[c].devname);
3700 }
3701
3702 return error;
3703 }
3704
3705 if (c == 0)
3706 dkwhole = dkpart;
3707 else
3708 dkwhole = DKCACHE_COMBINE(dkwhole, dkpart);
3709 }
3710 }
3711
3712 *data = dkwhole;
3713
3714 return 0;
3715 }
3716
3717 /*
3718 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3719 * We end up returning whatever error was returned by the first cache flush
3720 * that fails.
3721 */
3722
3723 int
3724 rf_sync_component_caches(RF_Raid_t *raidPtr)
3725 {
3726 int c, sparecol;
3727 int e,error;
3728 int force = 1;
3729
3730 error = 0;
3731 for (c = 0; c < raidPtr->numCol; c++) {
3732 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3733 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3734 &force, FWRITE, NOCRED);
3735 if (e) {
3736 if (e != ENODEV)
3737 printf("raid%d: cache flush to component %s failed.\n",
3738 raidPtr->raidid, raidPtr->Disks[c].devname);
3739 if (error == 0) {
3740 error = e;
3741 }
3742 }
3743 }
3744 }
3745
3746 for( c = 0; c < raidPtr->numSpare ; c++) {
3747 sparecol = raidPtr->numCol + c;
3748 /* Need to ensure that the reconstruct actually completed! */
3749 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3750 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3751 DIOCCACHESYNC, &force, FWRITE, NOCRED);
3752 if (e) {
3753 if (e != ENODEV)
3754 printf("raid%d: cache flush to component %s failed.\n",
3755 raidPtr->raidid, raidPtr->Disks[sparecol].devname);
3756 if (error == 0) {
3757 error = e;
3758 }
3759 }
3760 }
3761 }
3762 return error;
3763 }
3764
3765 /* Fill in info with the current status */
3766 void
3767 rf_check_recon_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3768 {
3769
3770 if (raidPtr->status != rf_rs_reconstructing) {
3771 info->total = 100;
3772 info->completed = 100;
3773 } else {
3774 info->total = raidPtr->reconControl->numRUsTotal;
3775 info->completed = raidPtr->reconControl->numRUsComplete;
3776 }
3777 info->remaining = info->total - info->completed;
3778 }
3779
3780 /* Fill in info with the current status */
3781 void
3782 rf_check_parityrewrite_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3783 {
3784
3785 if (raidPtr->parity_rewrite_in_progress == 1) {
3786 info->total = raidPtr->Layout.numStripe;
3787 info->completed = raidPtr->parity_rewrite_stripes_done;
3788 } else {
3789 info->completed = 100;
3790 info->total = 100;
3791 }
3792 info->remaining = info->total - info->completed;
3793 }
3794
3795 /* Fill in info with the current status */
3796 void
3797 rf_check_copyback_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3798 {
3799
3800 if (raidPtr->copyback_in_progress == 1) {
3801 info->total = raidPtr->Layout.numStripe;
3802 info->completed = raidPtr->copyback_stripes_done;
3803 info->remaining = info->total - info->completed;
3804 } else {
3805 info->remaining = 0;
3806 info->completed = 100;
3807 info->total = 100;
3808 }
3809 }
3810
3811 /* Fill in config with the current info */
3812 int
3813 rf_get_info(RF_Raid_t *raidPtr, RF_DeviceConfig_t *config)
3814 {
3815 int d, i, j;
3816
3817 if (!raidPtr->valid)
3818 return (ENODEV);
3819 config->cols = raidPtr->numCol;
3820 config->ndevs = raidPtr->numCol;
3821 if (config->ndevs >= RF_MAX_DISKS)
3822 return (ENOMEM);
3823 config->nspares = raidPtr->numSpare;
3824 if (config->nspares >= RF_MAX_DISKS)
3825 return (ENOMEM);
3826 config->maxqdepth = raidPtr->maxQueueDepth;
3827 d = 0;
3828 for (j = 0; j < config->cols; j++) {
3829 config->devs[d] = raidPtr->Disks[j];
3830 d++;
3831 }
3832 for (j = config->cols, i = 0; i < config->nspares; i++, j++) {
3833 config->spares[i] = raidPtr->Disks[j];
3834 if (config->spares[i].status == rf_ds_rebuilding_spare) {
3835 /* XXX: raidctl(8) expects to see this as a used spare */
3836 config->spares[i].status = rf_ds_used_spare;
3837 }
3838 }
3839 return 0;
3840 }
3841
3842 int
3843 rf_get_component_label(RF_Raid_t *raidPtr, void *data)
3844 {
3845 RF_ComponentLabel_t *clabel = (RF_ComponentLabel_t *)data;
3846 RF_ComponentLabel_t *raid_clabel;
3847 int column = clabel->column;
3848
3849 if ((column < 0) || (column >= raidPtr->numCol + raidPtr->numSpare))
3850 return EINVAL;
3851 raid_clabel = raidget_component_label(raidPtr, column);
3852 memcpy(clabel, raid_clabel, sizeof *clabel);
3853
3854 return 0;
3855 }
3856
3857 /*
3858 * Module interface
3859 */
3860
3861 MODULE(MODULE_CLASS_DRIVER, raid, "dk_subr,bufq_fcfs");
3862
3863 #ifdef _MODULE
3864 CFDRIVER_DECL(raid, DV_DISK, NULL);
3865 #endif
3866
3867 static int raid_modcmd(modcmd_t, void *);
3868 static int raid_modcmd_init(void);
3869 static int raid_modcmd_fini(void);
3870
3871 static int
3872 raid_modcmd(modcmd_t cmd, void *data)
3873 {
3874 int error;
3875
3876 error = 0;
3877 switch (cmd) {
3878 case MODULE_CMD_INIT:
3879 error = raid_modcmd_init();
3880 break;
3881 case MODULE_CMD_FINI:
3882 error = raid_modcmd_fini();
3883 break;
3884 default:
3885 error = ENOTTY;
3886 break;
3887 }
3888 return error;
3889 }
3890
3891 static int
3892 raid_modcmd_init(void)
3893 {
3894 int error;
3895 int bmajor, cmajor;
3896
3897 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
3898 mutex_enter(&raid_lock);
3899 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
3900 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
3901 rf_init_cond2(rf_sparet_wait_cv, "sparetw");
3902 rf_init_cond2(rf_sparet_resp_cv, "rfgst");
3903
3904 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
3905 #endif
3906
3907 bmajor = cmajor = -1;
3908 error = devsw_attach("raid", &raid_bdevsw, &bmajor,
3909 &raid_cdevsw, &cmajor);
3910 if (error != 0 && error != EEXIST) {
3911 aprint_error("%s: devsw_attach failed %d\n", __func__, error);
3912 mutex_exit(&raid_lock);
3913 return error;
3914 }
3915 #ifdef _MODULE
3916 error = config_cfdriver_attach(&raid_cd);
3917 if (error != 0) {
3918 aprint_error("%s: config_cfdriver_attach failed %d\n",
3919 __func__, error);
3920 devsw_detach(&raid_bdevsw, &raid_cdevsw);
3921 mutex_exit(&raid_lock);
3922 return error;
3923 }
3924 #endif
3925 error = config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3926 if (error != 0) {
3927 aprint_error("%s: config_cfattach_attach failed %d\n",
3928 __func__, error);
3929 #ifdef _MODULE
3930 config_cfdriver_detach(&raid_cd);
3931 #endif
3932 devsw_detach(&raid_bdevsw, &raid_cdevsw);
3933 mutex_exit(&raid_lock);
3934 return error;
3935 }
3936
3937 raidautoconfigdone = false;
3938
3939 mutex_exit(&raid_lock);
3940
3941 if (error == 0) {
3942 if (rf_BootRaidframe(true) == 0)
3943 aprint_verbose("Kernelized RAIDframe activated\n");
3944 else
3945 panic("Serious error activating RAID!!");
3946 }
3947
3948 /*
3949 * Register a finalizer which will be used to auto-config RAID
3950 * sets once all real hardware devices have been found.
3951 */
3952 error = config_finalize_register(NULL, rf_autoconfig);
3953 if (error != 0) {
3954 aprint_error("WARNING: unable to register RAIDframe "
3955 "finalizer\n");
3956 error = 0;
3957 }
3958
3959 return error;
3960 }
3961
3962 static int
3963 raid_modcmd_fini(void)
3964 {
3965 int error;
3966
3967 mutex_enter(&raid_lock);
3968
3969 /* Don't allow unload if raid device(s) exist. */
3970 if (!LIST_EMPTY(&raids)) {
3971 mutex_exit(&raid_lock);
3972 return EBUSY;
3973 }
3974
3975 error = config_cfattach_detach(raid_cd.cd_name, &raid_ca);
3976 if (error != 0) {
3977 aprint_error("%s: cannot detach cfattach\n",__func__);
3978 mutex_exit(&raid_lock);
3979 return error;
3980 }
3981 #ifdef _MODULE
3982 error = config_cfdriver_detach(&raid_cd);
3983 if (error != 0) {
3984 aprint_error("%s: cannot detach cfdriver\n",__func__);
3985 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3986 mutex_exit(&raid_lock);
3987 return error;
3988 }
3989 #endif
3990 error = devsw_detach(&raid_bdevsw, &raid_cdevsw);
3991 if (error != 0) {
3992 aprint_error("%s: cannot detach devsw\n",__func__);
3993 #ifdef _MODULE
3994 config_cfdriver_attach(&raid_cd);
3995 #endif
3996 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3997 mutex_exit(&raid_lock);
3998 return error;
3999 }
4000 rf_BootRaidframe(false);
4001 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
4002 rf_destroy_mutex2(rf_sparet_wait_mutex);
4003 rf_destroy_cond2(rf_sparet_wait_cv);
4004 rf_destroy_cond2(rf_sparet_resp_cv);
4005 #endif
4006 mutex_exit(&raid_lock);
4007 mutex_destroy(&raid_lock);
4008
4009 return error;
4010 }
4011