rf_netbsdkintf.c revision 1.365 1 /* $NetBSD: rf_netbsdkintf.c,v 1.365 2019/02/05 09:45:38 mrg Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: cd.c 1.6 90/11/28$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
89 * School of Computer Science
90 * Carnegie Mellon University
91 * Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97 /***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.365 2019/02/05 09:45:38 mrg Exp $");
105
106 #ifdef _KERNEL_OPT
107 #include "opt_raid_autoconfig.h"
108 #include "opt_compat_netbsd32.h"
109 #endif
110
111 #include <sys/param.h>
112 #include <sys/errno.h>
113 #include <sys/pool.h>
114 #include <sys/proc.h>
115 #include <sys/queue.h>
116 #include <sys/disk.h>
117 #include <sys/device.h>
118 #include <sys/stat.h>
119 #include <sys/ioctl.h>
120 #include <sys/fcntl.h>
121 #include <sys/systm.h>
122 #include <sys/vnode.h>
123 #include <sys/disklabel.h>
124 #include <sys/conf.h>
125 #include <sys/buf.h>
126 #include <sys/bufq.h>
127 #include <sys/reboot.h>
128 #include <sys/kauth.h>
129 #include <sys/module.h>
130 #include <sys/compat_stub.h>
131
132 #include <prop/proplib.h>
133
134 #include <dev/raidframe/raidframevar.h>
135 #include <dev/raidframe/raidframeio.h>
136 #include <dev/raidframe/rf_paritymap.h>
137
138 #include "rf_raid.h"
139 #include "rf_copyback.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_desc.h"
143 #include "rf_diskqueue.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_kintf.h"
147 #include "rf_options.h"
148 #include "rf_driver.h"
149 #include "rf_parityscan.h"
150 #include "rf_threadstuff.h"
151
152 #include "rf_compat80.h"
153
154 #ifdef COMPAT_NETBSD32
155 #ifdef _LP64
156 #include "rf_compat32.h"
157 #define RAID_COMPAT32
158 #endif
159 #endif
160
161 #include "ioconf.h"
162
163 #ifdef DEBUG
164 int rf_kdebug_level = 0;
165 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
166 #else /* DEBUG */
167 #define db1_printf(a) { }
168 #endif /* DEBUG */
169
170 #ifdef DEBUG_ROOT
171 #define DPRINTF(a, ...) printf(a, __VA_ARGS__)
172 #else
173 #define DPRINTF(a, ...)
174 #endif
175
176 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
177 static rf_declare_mutex2(rf_sparet_wait_mutex);
178 static rf_declare_cond2(rf_sparet_wait_cv);
179 static rf_declare_cond2(rf_sparet_resp_cv);
180
181 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
182 * spare table */
183 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
184 * installation process */
185 #endif
186
187 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
188
189 /* prototypes */
190 static void KernelWakeupFunc(struct buf *);
191 static void InitBP(struct buf *, struct vnode *, unsigned,
192 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
193 void *, int, struct proc *);
194 struct raid_softc;
195 static void raidinit(struct raid_softc *);
196 static int raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp);
197 static int rf_get_component_caches(RF_Raid_t *raidPtr, int *);
198
199 static int raid_match(device_t, cfdata_t, void *);
200 static void raid_attach(device_t, device_t, void *);
201 static int raid_detach(device_t, int);
202
203 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
204 daddr_t, daddr_t);
205 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
206 daddr_t, daddr_t, int);
207
208 static int raidwrite_component_label(unsigned,
209 dev_t, struct vnode *, RF_ComponentLabel_t *);
210 static int raidread_component_label(unsigned,
211 dev_t, struct vnode *, RF_ComponentLabel_t *);
212
213 static int raid_diskstart(device_t, struct buf *bp);
214 static int raid_dumpblocks(device_t, void *, daddr_t, int);
215 static int raid_lastclose(device_t);
216
217 static dev_type_open(raidopen);
218 static dev_type_close(raidclose);
219 static dev_type_read(raidread);
220 static dev_type_write(raidwrite);
221 static dev_type_ioctl(raidioctl);
222 static dev_type_strategy(raidstrategy);
223 static dev_type_dump(raiddump);
224 static dev_type_size(raidsize);
225
226 const struct bdevsw raid_bdevsw = {
227 .d_open = raidopen,
228 .d_close = raidclose,
229 .d_strategy = raidstrategy,
230 .d_ioctl = raidioctl,
231 .d_dump = raiddump,
232 .d_psize = raidsize,
233 .d_discard = nodiscard,
234 .d_flag = D_DISK
235 };
236
237 const struct cdevsw raid_cdevsw = {
238 .d_open = raidopen,
239 .d_close = raidclose,
240 .d_read = raidread,
241 .d_write = raidwrite,
242 .d_ioctl = raidioctl,
243 .d_stop = nostop,
244 .d_tty = notty,
245 .d_poll = nopoll,
246 .d_mmap = nommap,
247 .d_kqfilter = nokqfilter,
248 .d_discard = nodiscard,
249 .d_flag = D_DISK
250 };
251
252 static struct dkdriver rf_dkdriver = {
253 .d_open = raidopen,
254 .d_close = raidclose,
255 .d_strategy = raidstrategy,
256 .d_diskstart = raid_diskstart,
257 .d_dumpblocks = raid_dumpblocks,
258 .d_lastclose = raid_lastclose,
259 .d_minphys = minphys
260 };
261
262 struct raid_softc {
263 struct dk_softc sc_dksc;
264 int sc_unit;
265 int sc_flags; /* flags */
266 int sc_cflags; /* configuration flags */
267 kmutex_t sc_mutex; /* interlock mutex */
268 kcondvar_t sc_cv; /* and the condvar */
269 uint64_t sc_size; /* size of the raid device */
270 char sc_xname[20]; /* XXX external name */
271 RF_Raid_t sc_r;
272 LIST_ENTRY(raid_softc) sc_link;
273 };
274 /* sc_flags */
275 #define RAIDF_INITED 0x01 /* unit has been initialized */
276 #define RAIDF_SHUTDOWN 0x02 /* unit is being shutdown */
277 #define RAIDF_DETACH 0x04 /* detach after final close */
278 #define RAIDF_WANTED 0x08 /* someone waiting to obtain a lock */
279 #define RAIDF_LOCKED 0x10 /* unit is locked */
280 #define RAIDF_UNIT_CHANGED 0x20 /* unit is being changed */
281
282 #define raidunit(x) DISKUNIT(x)
283 #define raidsoftc(dev) (((struct raid_softc *)device_private(dev))->sc_r.softc)
284
285 extern struct cfdriver raid_cd;
286 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
287 raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
288 DVF_DETACH_SHUTDOWN);
289
290 /* Internal representation of a rf_recon_req */
291 struct rf_recon_req_internal {
292 RF_RowCol_t col;
293 RF_ReconReqFlags_t flags;
294 void *raidPtr;
295 };
296
297 /*
298 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
299 * Be aware that large numbers can allow the driver to consume a lot of
300 * kernel memory, especially on writes, and in degraded mode reads.
301 *
302 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
303 * a single 64K write will typically require 64K for the old data,
304 * 64K for the old parity, and 64K for the new parity, for a total
305 * of 192K (if the parity buffer is not re-used immediately).
306 * Even it if is used immediately, that's still 128K, which when multiplied
307 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
308 *
309 * Now in degraded mode, for example, a 64K read on the above setup may
310 * require data reconstruction, which will require *all* of the 4 remaining
311 * disks to participate -- 4 * 32K/disk == 128K again.
312 */
313
314 #ifndef RAIDOUTSTANDING
315 #define RAIDOUTSTANDING 6
316 #endif
317
318 #define RAIDLABELDEV(dev) \
319 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
320
321 /* declared here, and made public, for the benefit of KVM stuff.. */
322
323 static int raidlock(struct raid_softc *);
324 static void raidunlock(struct raid_softc *);
325
326 static int raid_detach_unlocked(struct raid_softc *);
327
328 static void rf_markalldirty(RF_Raid_t *);
329 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
330
331 void rf_ReconThread(struct rf_recon_req_internal *);
332 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
333 void rf_CopybackThread(RF_Raid_t *raidPtr);
334 void rf_ReconstructInPlaceThread(struct rf_recon_req_internal *);
335 int rf_autoconfig(device_t);
336 void rf_buildroothack(RF_ConfigSet_t *);
337
338 RF_AutoConfig_t *rf_find_raid_components(void);
339 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
340 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
341 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
342 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
343 int rf_set_autoconfig(RF_Raid_t *, int);
344 int rf_set_rootpartition(RF_Raid_t *, int);
345 void rf_release_all_vps(RF_ConfigSet_t *);
346 void rf_cleanup_config_set(RF_ConfigSet_t *);
347 int rf_have_enough_components(RF_ConfigSet_t *);
348 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
349 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
350
351 /*
352 * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
353 * Note that this is overridden by having RAID_AUTOCONFIG as an option
354 * in the kernel config file.
355 */
356 #ifdef RAID_AUTOCONFIG
357 int raidautoconfig = 1;
358 #else
359 int raidautoconfig = 0;
360 #endif
361 static bool raidautoconfigdone = false;
362
363 struct RF_Pools_s rf_pools;
364
365 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
366 static kmutex_t raid_lock;
367
368 static struct raid_softc *
369 raidcreate(int unit) {
370 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
371 sc->sc_unit = unit;
372 cv_init(&sc->sc_cv, "raidunit");
373 mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_NONE);
374 return sc;
375 }
376
377 static void
378 raiddestroy(struct raid_softc *sc) {
379 cv_destroy(&sc->sc_cv);
380 mutex_destroy(&sc->sc_mutex);
381 kmem_free(sc, sizeof(*sc));
382 }
383
384 static struct raid_softc *
385 raidget(int unit, bool create) {
386 struct raid_softc *sc;
387 if (unit < 0) {
388 #ifdef DIAGNOSTIC
389 panic("%s: unit %d!", __func__, unit);
390 #endif
391 return NULL;
392 }
393 mutex_enter(&raid_lock);
394 LIST_FOREACH(sc, &raids, sc_link) {
395 if (sc->sc_unit == unit) {
396 mutex_exit(&raid_lock);
397 return sc;
398 }
399 }
400 mutex_exit(&raid_lock);
401 if (!create)
402 return NULL;
403 if ((sc = raidcreate(unit)) == NULL)
404 return NULL;
405 mutex_enter(&raid_lock);
406 LIST_INSERT_HEAD(&raids, sc, sc_link);
407 mutex_exit(&raid_lock);
408 return sc;
409 }
410
411 static void
412 raidput(struct raid_softc *sc) {
413 mutex_enter(&raid_lock);
414 LIST_REMOVE(sc, sc_link);
415 mutex_exit(&raid_lock);
416 raiddestroy(sc);
417 }
418
419 void
420 raidattach(int num)
421 {
422
423 /*
424 * Device attachment and associated initialization now occurs
425 * as part of the module initialization.
426 */
427 }
428
429 int
430 rf_autoconfig(device_t self)
431 {
432 RF_AutoConfig_t *ac_list;
433 RF_ConfigSet_t *config_sets;
434
435 if (!raidautoconfig || raidautoconfigdone == true)
436 return (0);
437
438 /* XXX This code can only be run once. */
439 raidautoconfigdone = true;
440
441 #ifdef __HAVE_CPU_BOOTCONF
442 /*
443 * 0. find the boot device if needed first so we can use it later
444 * this needs to be done before we autoconfigure any raid sets,
445 * because if we use wedges we are not going to be able to open
446 * the boot device later
447 */
448 if (booted_device == NULL)
449 cpu_bootconf();
450 #endif
451 /* 1. locate all RAID components on the system */
452 aprint_debug("Searching for RAID components...\n");
453 ac_list = rf_find_raid_components();
454
455 /* 2. Sort them into their respective sets. */
456 config_sets = rf_create_auto_sets(ac_list);
457
458 /*
459 * 3. Evaluate each set and configure the valid ones.
460 * This gets done in rf_buildroothack().
461 */
462 rf_buildroothack(config_sets);
463
464 return 1;
465 }
466
467 static int
468 rf_containsboot(RF_Raid_t *r, device_t bdv) {
469 const char *bootname;
470 size_t len;
471
472 /* if bdv is NULL, the set can't contain it. exit early. */
473 if (bdv == NULL)
474 return 0;
475
476 bootname = device_xname(bdv);
477 len = strlen(bootname);
478
479 for (int col = 0; col < r->numCol; col++) {
480 const char *devname = r->Disks[col].devname;
481 devname += sizeof("/dev/") - 1;
482 if (strncmp(devname, "dk", 2) == 0) {
483 const char *parent =
484 dkwedge_get_parent_name(r->Disks[col].dev);
485 if (parent != NULL)
486 devname = parent;
487 }
488 if (strncmp(devname, bootname, len) == 0) {
489 struct raid_softc *sc = r->softc;
490 aprint_debug("raid%d includes boot device %s\n",
491 sc->sc_unit, devname);
492 return 1;
493 }
494 }
495 return 0;
496 }
497
498 void
499 rf_buildroothack(RF_ConfigSet_t *config_sets)
500 {
501 RF_ConfigSet_t *cset;
502 RF_ConfigSet_t *next_cset;
503 int num_root;
504 struct raid_softc *sc, *rsc;
505 struct dk_softc *dksc;
506
507 sc = rsc = NULL;
508 num_root = 0;
509 cset = config_sets;
510 while (cset != NULL) {
511 next_cset = cset->next;
512 if (rf_have_enough_components(cset) &&
513 cset->ac->clabel->autoconfigure == 1) {
514 sc = rf_auto_config_set(cset);
515 if (sc != NULL) {
516 aprint_debug("raid%d: configured ok, rootable %d\n",
517 sc->sc_unit, cset->rootable);
518 if (cset->rootable) {
519 rsc = sc;
520 num_root++;
521 }
522 } else {
523 /* The autoconfig didn't work :( */
524 aprint_debug("Autoconfig failed\n");
525 rf_release_all_vps(cset);
526 }
527 } else {
528 /* we're not autoconfiguring this set...
529 release the associated resources */
530 rf_release_all_vps(cset);
531 }
532 /* cleanup */
533 rf_cleanup_config_set(cset);
534 cset = next_cset;
535 }
536 dksc = &rsc->sc_dksc;
537
538 /* if the user has specified what the root device should be
539 then we don't touch booted_device or boothowto... */
540
541 if (rootspec != NULL) {
542 DPRINTF("%s: rootspec %s\n", __func__, rootspec);
543 return;
544 }
545
546 /* we found something bootable... */
547
548 /*
549 * XXX: The following code assumes that the root raid
550 * is the first ('a') partition. This is about the best
551 * we can do with a BSD disklabel, but we might be able
552 * to do better with a GPT label, by setting a specified
553 * attribute to indicate the root partition. We can then
554 * stash the partition number in the r->root_partition
555 * high bits (the bottom 2 bits are already used). For
556 * now we just set booted_partition to 0 when we override
557 * root.
558 */
559 if (num_root == 1) {
560 device_t candidate_root;
561 if (dksc->sc_dkdev.dk_nwedges != 0) {
562 char cname[sizeof(cset->ac->devname)];
563 /* XXX: assume partition 'a' first */
564 snprintf(cname, sizeof(cname), "%s%c",
565 device_xname(dksc->sc_dev), 'a');
566 candidate_root = dkwedge_find_by_wname(cname);
567 DPRINTF("%s: candidate wedge root=%s\n", __func__,
568 cname);
569 if (candidate_root == NULL) {
570 /*
571 * If that is not found, because we don't use
572 * disklabel, return the first dk child
573 * XXX: we can skip the 'a' check above
574 * and always do this...
575 */
576 size_t i = 0;
577 candidate_root = dkwedge_find_by_parent(
578 device_xname(dksc->sc_dev), &i);
579 }
580 DPRINTF("%s: candidate wedge root=%p\n", __func__,
581 candidate_root);
582 } else
583 candidate_root = dksc->sc_dev;
584 DPRINTF("%s: candidate root=%p\n", __func__, candidate_root);
585 DPRINTF("%s: booted_device=%p root_partition=%d "
586 "contains_boot=%d",
587 __func__, booted_device, rsc->sc_r.root_partition,
588 rf_containsboot(&rsc->sc_r, booted_device));
589 /* XXX the check for booted_device == NULL can probably be
590 * dropped, now that rf_containsboot handles that case.
591 */
592 if (booted_device == NULL ||
593 rsc->sc_r.root_partition == 1 ||
594 rf_containsboot(&rsc->sc_r, booted_device)) {
595 booted_device = candidate_root;
596 booted_method = "raidframe/single";
597 booted_partition = 0; /* XXX assume 'a' */
598 }
599 } else if (num_root > 1) {
600 DPRINTF("%s: many roots=%d, %p\n", __func__, num_root,
601 booted_device);
602
603 /*
604 * Maybe the MD code can help. If it cannot, then
605 * setroot() will discover that we have no
606 * booted_device and will ask the user if nothing was
607 * hardwired in the kernel config file
608 */
609 if (booted_device == NULL)
610 return;
611
612 num_root = 0;
613 mutex_enter(&raid_lock);
614 LIST_FOREACH(sc, &raids, sc_link) {
615 RF_Raid_t *r = &sc->sc_r;
616 if (r->valid == 0)
617 continue;
618
619 if (r->root_partition == 0)
620 continue;
621
622 if (rf_containsboot(r, booted_device)) {
623 num_root++;
624 rsc = sc;
625 dksc = &rsc->sc_dksc;
626 }
627 }
628 mutex_exit(&raid_lock);
629
630 if (num_root == 1) {
631 booted_device = dksc->sc_dev;
632 booted_method = "raidframe/multi";
633 booted_partition = 0; /* XXX assume 'a' */
634 } else {
635 /* we can't guess.. require the user to answer... */
636 boothowto |= RB_ASKNAME;
637 }
638 }
639 }
640
641 static int
642 raidsize(dev_t dev)
643 {
644 struct raid_softc *rs;
645 struct dk_softc *dksc;
646 unsigned int unit;
647
648 unit = raidunit(dev);
649 if ((rs = raidget(unit, false)) == NULL)
650 return -1;
651 dksc = &rs->sc_dksc;
652
653 if ((rs->sc_flags & RAIDF_INITED) == 0)
654 return -1;
655
656 return dk_size(dksc, dev);
657 }
658
659 static int
660 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
661 {
662 unsigned int unit;
663 struct raid_softc *rs;
664 struct dk_softc *dksc;
665
666 unit = raidunit(dev);
667 if ((rs = raidget(unit, false)) == NULL)
668 return ENXIO;
669 dksc = &rs->sc_dksc;
670
671 if ((rs->sc_flags & RAIDF_INITED) == 0)
672 return ENODEV;
673
674 /*
675 Note that blkno is relative to this particular partition.
676 By adding adding RF_PROTECTED_SECTORS, we get a value that
677 is relative to the partition used for the underlying component.
678 */
679 blkno += RF_PROTECTED_SECTORS;
680
681 return dk_dump(dksc, dev, blkno, va, size);
682 }
683
684 static int
685 raid_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk)
686 {
687 struct raid_softc *rs = raidsoftc(dev);
688 const struct bdevsw *bdev;
689 RF_Raid_t *raidPtr;
690 int c, sparecol, j, scol, dumpto;
691 int error = 0;
692
693 raidPtr = &rs->sc_r;
694
695 /* we only support dumping to RAID 1 sets */
696 if (raidPtr->Layout.numDataCol != 1 ||
697 raidPtr->Layout.numParityCol != 1)
698 return EINVAL;
699
700 if ((error = raidlock(rs)) != 0)
701 return error;
702
703 /* figure out what device is alive.. */
704
705 /*
706 Look for a component to dump to. The preference for the
707 component to dump to is as follows:
708 1) the master
709 2) a used_spare of the master
710 3) the slave
711 4) a used_spare of the slave
712 */
713
714 dumpto = -1;
715 for (c = 0; c < raidPtr->numCol; c++) {
716 if (raidPtr->Disks[c].status == rf_ds_optimal) {
717 /* this might be the one */
718 dumpto = c;
719 break;
720 }
721 }
722
723 /*
724 At this point we have possibly selected a live master or a
725 live slave. We now check to see if there is a spared
726 master (or a spared slave), if we didn't find a live master
727 or a live slave.
728 */
729
730 for (c = 0; c < raidPtr->numSpare; c++) {
731 sparecol = raidPtr->numCol + c;
732 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
733 /* How about this one? */
734 scol = -1;
735 for(j=0;j<raidPtr->numCol;j++) {
736 if (raidPtr->Disks[j].spareCol == sparecol) {
737 scol = j;
738 break;
739 }
740 }
741 if (scol == 0) {
742 /*
743 We must have found a spared master!
744 We'll take that over anything else
745 found so far. (We couldn't have
746 found a real master before, since
747 this is a used spare, and it's
748 saying that it's replacing the
749 master.) On reboot (with
750 autoconfiguration turned on)
751 sparecol will become the 1st
752 component (component0) of this set.
753 */
754 dumpto = sparecol;
755 break;
756 } else if (scol != -1) {
757 /*
758 Must be a spared slave. We'll dump
759 to that if we havn't found anything
760 else so far.
761 */
762 if (dumpto == -1)
763 dumpto = sparecol;
764 }
765 }
766 }
767
768 if (dumpto == -1) {
769 /* we couldn't find any live components to dump to!?!?
770 */
771 error = EINVAL;
772 goto out;
773 }
774
775 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
776 if (bdev == NULL) {
777 error = ENXIO;
778 goto out;
779 }
780
781 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
782 blkno, va, nblk * raidPtr->bytesPerSector);
783
784 out:
785 raidunlock(rs);
786
787 return error;
788 }
789
790 /* ARGSUSED */
791 static int
792 raidopen(dev_t dev, int flags, int fmt,
793 struct lwp *l)
794 {
795 int unit = raidunit(dev);
796 struct raid_softc *rs;
797 struct dk_softc *dksc;
798 int error = 0;
799 int part, pmask;
800
801 if ((rs = raidget(unit, true)) == NULL)
802 return ENXIO;
803 if ((error = raidlock(rs)) != 0)
804 return (error);
805
806 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
807 error = EBUSY;
808 goto bad;
809 }
810
811 dksc = &rs->sc_dksc;
812
813 part = DISKPART(dev);
814 pmask = (1 << part);
815
816 if (!DK_BUSY(dksc, pmask) &&
817 ((rs->sc_flags & RAIDF_INITED) != 0)) {
818 /* First one... mark things as dirty... Note that we *MUST*
819 have done a configure before this. I DO NOT WANT TO BE
820 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
821 THAT THEY BELONG TOGETHER!!!!! */
822 /* XXX should check to see if we're only open for reading
823 here... If so, we needn't do this, but then need some
824 other way of keeping track of what's happened.. */
825
826 rf_markalldirty(&rs->sc_r);
827 }
828
829 if ((rs->sc_flags & RAIDF_INITED) != 0)
830 error = dk_open(dksc, dev, flags, fmt, l);
831
832 bad:
833 raidunlock(rs);
834
835 return (error);
836
837
838 }
839
840 static int
841 raid_lastclose(device_t self)
842 {
843 struct raid_softc *rs = raidsoftc(self);
844
845 /* Last one... device is not unconfigured yet.
846 Device shutdown has taken care of setting the
847 clean bits if RAIDF_INITED is not set
848 mark things as clean... */
849
850 rf_update_component_labels(&rs->sc_r,
851 RF_FINAL_COMPONENT_UPDATE);
852
853 /* pass to unlocked code */
854 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
855 rs->sc_flags |= RAIDF_DETACH;
856
857 return 0;
858 }
859
860 /* ARGSUSED */
861 static int
862 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
863 {
864 int unit = raidunit(dev);
865 struct raid_softc *rs;
866 struct dk_softc *dksc;
867 cfdata_t cf;
868 int error = 0, do_detach = 0, do_put = 0;
869
870 if ((rs = raidget(unit, false)) == NULL)
871 return ENXIO;
872 dksc = &rs->sc_dksc;
873
874 if ((error = raidlock(rs)) != 0)
875 return (error);
876
877 if ((rs->sc_flags & RAIDF_INITED) != 0) {
878 error = dk_close(dksc, dev, flags, fmt, l);
879 if ((rs->sc_flags & RAIDF_DETACH) != 0)
880 do_detach = 1;
881 } else if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
882 do_put = 1;
883
884 raidunlock(rs);
885
886 if (do_detach) {
887 /* free the pseudo device attach bits */
888 cf = device_cfdata(dksc->sc_dev);
889 error = config_detach(dksc->sc_dev, 0);
890 if (error == 0)
891 free(cf, M_RAIDFRAME);
892 } else if (do_put) {
893 raidput(rs);
894 }
895
896 return (error);
897
898 }
899
900 static void
901 raid_wakeup(RF_Raid_t *raidPtr)
902 {
903 rf_lock_mutex2(raidPtr->iodone_lock);
904 rf_signal_cond2(raidPtr->iodone_cv);
905 rf_unlock_mutex2(raidPtr->iodone_lock);
906 }
907
908 static void
909 raidstrategy(struct buf *bp)
910 {
911 unsigned int unit;
912 struct raid_softc *rs;
913 struct dk_softc *dksc;
914 RF_Raid_t *raidPtr;
915
916 unit = raidunit(bp->b_dev);
917 if ((rs = raidget(unit, false)) == NULL) {
918 bp->b_error = ENXIO;
919 goto fail;
920 }
921 if ((rs->sc_flags & RAIDF_INITED) == 0) {
922 bp->b_error = ENXIO;
923 goto fail;
924 }
925 dksc = &rs->sc_dksc;
926 raidPtr = &rs->sc_r;
927
928 /* Queue IO only */
929 if (dk_strategy_defer(dksc, bp))
930 goto done;
931
932 /* schedule the IO to happen at the next convenient time */
933 raid_wakeup(raidPtr);
934
935 done:
936 return;
937
938 fail:
939 bp->b_resid = bp->b_bcount;
940 biodone(bp);
941 }
942
943 static int
944 raid_diskstart(device_t dev, struct buf *bp)
945 {
946 struct raid_softc *rs = raidsoftc(dev);
947 RF_Raid_t *raidPtr;
948
949 raidPtr = &rs->sc_r;
950 if (!raidPtr->valid) {
951 db1_printf(("raid is not valid..\n"));
952 return ENODEV;
953 }
954
955 /* XXX */
956 bp->b_resid = 0;
957
958 return raiddoaccess(raidPtr, bp);
959 }
960
961 void
962 raiddone(RF_Raid_t *raidPtr, struct buf *bp)
963 {
964 struct raid_softc *rs;
965 struct dk_softc *dksc;
966
967 rs = raidPtr->softc;
968 dksc = &rs->sc_dksc;
969
970 dk_done(dksc, bp);
971
972 rf_lock_mutex2(raidPtr->mutex);
973 raidPtr->openings++;
974 rf_unlock_mutex2(raidPtr->mutex);
975
976 /* schedule more IO */
977 raid_wakeup(raidPtr);
978 }
979
980 /* ARGSUSED */
981 static int
982 raidread(dev_t dev, struct uio *uio, int flags)
983 {
984 int unit = raidunit(dev);
985 struct raid_softc *rs;
986
987 if ((rs = raidget(unit, false)) == NULL)
988 return ENXIO;
989
990 if ((rs->sc_flags & RAIDF_INITED) == 0)
991 return (ENXIO);
992
993 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
994
995 }
996
997 /* ARGSUSED */
998 static int
999 raidwrite(dev_t dev, struct uio *uio, int flags)
1000 {
1001 int unit = raidunit(dev);
1002 struct raid_softc *rs;
1003
1004 if ((rs = raidget(unit, false)) == NULL)
1005 return ENXIO;
1006
1007 if ((rs->sc_flags & RAIDF_INITED) == 0)
1008 return (ENXIO);
1009
1010 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
1011
1012 }
1013
1014 static int
1015 raid_detach_unlocked(struct raid_softc *rs)
1016 {
1017 struct dk_softc *dksc = &rs->sc_dksc;
1018 RF_Raid_t *raidPtr;
1019 int error;
1020
1021 raidPtr = &rs->sc_r;
1022
1023 if (DK_BUSY(dksc, 0) ||
1024 raidPtr->recon_in_progress != 0 ||
1025 raidPtr->parity_rewrite_in_progress != 0 ||
1026 raidPtr->copyback_in_progress != 0)
1027 return EBUSY;
1028
1029 if ((rs->sc_flags & RAIDF_INITED) == 0)
1030 return 0;
1031
1032 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1033
1034 if ((error = rf_Shutdown(raidPtr)) != 0)
1035 return error;
1036
1037 rs->sc_flags &= ~RAIDF_INITED;
1038
1039 /* Kill off any queued buffers */
1040 dk_drain(dksc);
1041 bufq_free(dksc->sc_bufq);
1042
1043 /* Detach the disk. */
1044 dkwedge_delall(&dksc->sc_dkdev);
1045 disk_detach(&dksc->sc_dkdev);
1046 disk_destroy(&dksc->sc_dkdev);
1047 dk_detach(dksc);
1048
1049 return 0;
1050 }
1051
1052 static int
1053 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1054 {
1055 int unit = raidunit(dev);
1056 int error = 0;
1057 int part, pmask;
1058 struct raid_softc *rs;
1059 struct dk_softc *dksc;
1060 RF_Config_t *k_cfg, *u_cfg;
1061 RF_Raid_t *raidPtr;
1062 RF_RaidDisk_t *diskPtr;
1063 RF_AccTotals_t *totals;
1064 RF_DeviceConfig_t *d_cfg, *ucfgp;
1065 u_char *specific_buf;
1066 int retcode = 0;
1067 int column;
1068 /* int raidid; */
1069 struct rf_recon_req *rr;
1070 struct rf_recon_req_internal *rrint;
1071 RF_ComponentLabel_t *clabel;
1072 RF_ComponentLabel_t *ci_label;
1073 RF_SingleComponent_t *sparePtr,*componentPtr;
1074 RF_SingleComponent_t component;
1075 int d;
1076
1077 if ((rs = raidget(unit, false)) == NULL)
1078 return ENXIO;
1079 dksc = &rs->sc_dksc;
1080 raidPtr = &rs->sc_r;
1081
1082 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1083 (int) DISKPART(dev), (int) unit, cmd));
1084
1085 /* Must be initialized for these... */
1086 switch (cmd) {
1087 case RAIDFRAME_REWRITEPARITY:
1088 case RAIDFRAME_GET_INFO:
1089 case RAIDFRAME_RESET_ACCTOTALS:
1090 case RAIDFRAME_GET_ACCTOTALS:
1091 case RAIDFRAME_KEEP_ACCTOTALS:
1092 case RAIDFRAME_GET_SIZE:
1093 case RAIDFRAME_FAIL_DISK:
1094 case RAIDFRAME_COPYBACK:
1095 case RAIDFRAME_CHECK_RECON_STATUS:
1096 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1097 case RAIDFRAME_GET_COMPONENT_LABEL:
1098 case RAIDFRAME_SET_COMPONENT_LABEL:
1099 case RAIDFRAME_ADD_HOT_SPARE:
1100 case RAIDFRAME_REMOVE_HOT_SPARE:
1101 case RAIDFRAME_INIT_LABELS:
1102 case RAIDFRAME_REBUILD_IN_PLACE:
1103 case RAIDFRAME_CHECK_PARITY:
1104 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1105 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1106 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1107 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1108 case RAIDFRAME_SET_AUTOCONFIG:
1109 case RAIDFRAME_SET_ROOT:
1110 case RAIDFRAME_DELETE_COMPONENT:
1111 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1112 case RAIDFRAME_PARITYMAP_STATUS:
1113 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1114 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1115 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1116 #ifdef RAID_COMPAT32
1117 case RAIDFRAME_GET_INFO32:
1118 #endif
1119 if ((rs->sc_flags & RAIDF_INITED) == 0)
1120 return (ENXIO);
1121 }
1122
1123 /*
1124 * Handle compat ioctl calls
1125 *
1126 * * If compat code is not loaded, stub returns ENOSYS and we just
1127 * check the "native" cmd's
1128 * * If compat code is loaded but does not recognize the cmd, it
1129 * returns EPASSTHROUGH, and we just check the "native" cmd's
1130 * * If compat code returns EAGAIN, we need to finish via config
1131 * * Otherwise the cmd has been handled and we just return
1132 */
1133 module_autoload("compat_raid_50", MODULE_CLASS_EXEC);
1134 MODULE_CALL_HOOK(raidframe_ioctl_50_hook,
1135 (cmd, (rs->sc_flags & RAIDF_INITED),raidPtr, unit, data, &k_cfg),
1136 enosys(), retcode);
1137 if (retcode == ENOSYS)
1138 retcode = 0;
1139 else if (retcode == EAGAIN)
1140 goto config;
1141 else if (retcode != EPASSTHROUGH)
1142 return retcode;
1143
1144 module_autoload("compat_raid_80", MODULE_CLASS_EXEC);
1145 MODULE_CALL_HOOK(raidframe_ioctl_80_hook,
1146 (cmd, (rs->sc_flags & RAIDF_INITED),raidPtr, unit, data, &k_cfg),
1147 enosys(), retcode);
1148 if (retcode == ENOSYS)
1149 retcode = 0;
1150 else if (retcode == EAGAIN)
1151 goto config;
1152 else if (retcode != EPASSTHROUGH)
1153 return retcode;
1154
1155 /*
1156 * XXX
1157 * Handling of FAIL_DISK80 command requires us to retain retcode's
1158 * value of EPASSTHROUGH. If you add more compat code later, make
1159 * sure you don't overwrite retcode and break this!
1160 */
1161
1162 switch (cmd) {
1163
1164 /* configure the system */
1165 case RAIDFRAME_CONFIGURE:
1166 #ifdef RAID_COMPAT32
1167 case RAIDFRAME_CONFIGURE32:
1168 #endif
1169
1170 if (raidPtr->valid) {
1171 /* There is a valid RAID set running on this unit! */
1172 printf("raid%d: Device already configured!\n",unit);
1173 return(EINVAL);
1174 }
1175
1176 /* copy-in the configuration information */
1177 /* data points to a pointer to the configuration structure */
1178
1179 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1180 if (k_cfg == NULL) {
1181 return (ENOMEM);
1182 }
1183 #ifdef RAID_COMPAT32
1184 if (cmd == RAIDFRAME_CONFIGURE32 &&
1185 (l->l_proc->p_flag & PK_32) != 0)
1186 MODULE_CALL_HOOK(raidframe_netbsd32_config_hook,
1187 (data, k_cfg), enosys(), retcode);
1188 else
1189 #endif
1190 {
1191 u_cfg = *((RF_Config_t **) data);
1192 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1193 }
1194 if (retcode) {
1195 RF_Free(k_cfg, sizeof(RF_Config_t));
1196 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1197 retcode));
1198 goto no_config;
1199 }
1200 goto config;
1201 config:
1202 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1203
1204 /* allocate a buffer for the layout-specific data, and copy it
1205 * in */
1206 if (k_cfg->layoutSpecificSize) {
1207 if (k_cfg->layoutSpecificSize > 10000) {
1208 /* sanity check */
1209 RF_Free(k_cfg, sizeof(RF_Config_t));
1210 retcode = EINVAL;
1211 goto no_config;
1212 }
1213 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1214 (u_char *));
1215 if (specific_buf == NULL) {
1216 RF_Free(k_cfg, sizeof(RF_Config_t));
1217 retcode = ENOMEM;
1218 goto no_config;
1219 }
1220 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1221 k_cfg->layoutSpecificSize);
1222 if (retcode) {
1223 RF_Free(k_cfg, sizeof(RF_Config_t));
1224 RF_Free(specific_buf,
1225 k_cfg->layoutSpecificSize);
1226 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1227 retcode));
1228 goto no_config;
1229 }
1230 } else
1231 specific_buf = NULL;
1232 k_cfg->layoutSpecific = specific_buf;
1233
1234 /* should do some kind of sanity check on the configuration.
1235 * Store the sum of all the bytes in the last byte? */
1236
1237 /* configure the system */
1238
1239 /*
1240 * Clear the entire RAID descriptor, just to make sure
1241 * there is no stale data left in the case of a
1242 * reconfiguration
1243 */
1244 memset(raidPtr, 0, sizeof(*raidPtr));
1245 raidPtr->softc = rs;
1246 raidPtr->raidid = unit;
1247
1248 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1249
1250 if (retcode == 0) {
1251
1252 /* allow this many simultaneous IO's to
1253 this RAID device */
1254 raidPtr->openings = RAIDOUTSTANDING;
1255
1256 raidinit(rs);
1257 raid_wakeup(raidPtr);
1258 rf_markalldirty(raidPtr);
1259 }
1260 /* free the buffers. No return code here. */
1261 if (k_cfg->layoutSpecificSize) {
1262 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1263 }
1264 RF_Free(k_cfg, sizeof(RF_Config_t));
1265
1266 no_config:
1267 /*
1268 * If configuration failed, set sc_flags so that we
1269 * will detach the device when we close it.
1270 */
1271 if (retcode != 0)
1272 rs->sc_flags |= RAIDF_SHUTDOWN;
1273 return (retcode);
1274
1275 /* shutdown the system */
1276 case RAIDFRAME_SHUTDOWN:
1277
1278 part = DISKPART(dev);
1279 pmask = (1 << part);
1280
1281 if ((error = raidlock(rs)) != 0)
1282 return (error);
1283
1284 if (DK_BUSY(dksc, pmask) ||
1285 raidPtr->recon_in_progress != 0 ||
1286 raidPtr->parity_rewrite_in_progress != 0 ||
1287 raidPtr->copyback_in_progress != 0)
1288 retcode = EBUSY;
1289 else {
1290 /* detach and free on close */
1291 rs->sc_flags |= RAIDF_SHUTDOWN;
1292 retcode = 0;
1293 }
1294
1295 raidunlock(rs);
1296
1297 return (retcode);
1298 case RAIDFRAME_GET_COMPONENT_LABEL:
1299 return rf_get_component_label(raidPtr, data);
1300
1301 #if 0
1302 case RAIDFRAME_SET_COMPONENT_LABEL:
1303 clabel = (RF_ComponentLabel_t *) data;
1304
1305 /* XXX check the label for valid stuff... */
1306 /* Note that some things *should not* get modified --
1307 the user should be re-initing the labels instead of
1308 trying to patch things.
1309 */
1310
1311 raidid = raidPtr->raidid;
1312 #ifdef DEBUG
1313 printf("raid%d: Got component label:\n", raidid);
1314 printf("raid%d: Version: %d\n", raidid, clabel->version);
1315 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1316 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1317 printf("raid%d: Column: %d\n", raidid, clabel->column);
1318 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1319 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1320 printf("raid%d: Status: %d\n", raidid, clabel->status);
1321 #endif /* DEBUG */
1322 clabel->row = 0;
1323 column = clabel->column;
1324
1325 if ((column < 0) || (column >= raidPtr->numCol)) {
1326 return(EINVAL);
1327 }
1328
1329 /* XXX this isn't allowed to do anything for now :-) */
1330
1331 /* XXX and before it is, we need to fill in the rest
1332 of the fields!?!?!?! */
1333 memcpy(raidget_component_label(raidPtr, column),
1334 clabel, sizeof(*clabel));
1335 raidflush_component_label(raidPtr, column);
1336 return (0);
1337 #endif /* 0 */
1338
1339 case RAIDFRAME_INIT_LABELS:
1340 clabel = (RF_ComponentLabel_t *) data;
1341 /*
1342 we only want the serial number from
1343 the above. We get all the rest of the information
1344 from the config that was used to create this RAID
1345 set.
1346 */
1347
1348 raidPtr->serial_number = clabel->serial_number;
1349
1350 for(column=0;column<raidPtr->numCol;column++) {
1351 diskPtr = &raidPtr->Disks[column];
1352 if (!RF_DEAD_DISK(diskPtr->status)) {
1353 ci_label = raidget_component_label(raidPtr,
1354 column);
1355 /* Zeroing this is important. */
1356 memset(ci_label, 0, sizeof(*ci_label));
1357 raid_init_component_label(raidPtr, ci_label);
1358 ci_label->serial_number =
1359 raidPtr->serial_number;
1360 ci_label->row = 0; /* we dont' pretend to support more */
1361 rf_component_label_set_partitionsize(ci_label,
1362 diskPtr->partitionSize);
1363 ci_label->column = column;
1364 raidflush_component_label(raidPtr, column);
1365 }
1366 /* XXXjld what about the spares? */
1367 }
1368
1369 return (retcode);
1370 case RAIDFRAME_SET_AUTOCONFIG:
1371 d = rf_set_autoconfig(raidPtr, *(int *) data);
1372 printf("raid%d: New autoconfig value is: %d\n",
1373 raidPtr->raidid, d);
1374 *(int *) data = d;
1375 return (retcode);
1376
1377 case RAIDFRAME_SET_ROOT:
1378 d = rf_set_rootpartition(raidPtr, *(int *) data);
1379 printf("raid%d: New rootpartition value is: %d\n",
1380 raidPtr->raidid, d);
1381 *(int *) data = d;
1382 return (retcode);
1383
1384 /* initialize all parity */
1385 case RAIDFRAME_REWRITEPARITY:
1386
1387 if (raidPtr->Layout.map->faultsTolerated == 0) {
1388 /* Parity for RAID 0 is trivially correct */
1389 raidPtr->parity_good = RF_RAID_CLEAN;
1390 return(0);
1391 }
1392
1393 if (raidPtr->parity_rewrite_in_progress == 1) {
1394 /* Re-write is already in progress! */
1395 return(EINVAL);
1396 }
1397
1398 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1399 rf_RewriteParityThread,
1400 raidPtr,"raid_parity");
1401 return (retcode);
1402
1403
1404 case RAIDFRAME_ADD_HOT_SPARE:
1405 sparePtr = (RF_SingleComponent_t *) data;
1406 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1407 retcode = rf_add_hot_spare(raidPtr, &component);
1408 return(retcode);
1409
1410 case RAIDFRAME_REMOVE_HOT_SPARE:
1411 return(retcode);
1412
1413 case RAIDFRAME_DELETE_COMPONENT:
1414 componentPtr = (RF_SingleComponent_t *)data;
1415 memcpy( &component, componentPtr,
1416 sizeof(RF_SingleComponent_t));
1417 retcode = rf_delete_component(raidPtr, &component);
1418 return(retcode);
1419
1420 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1421 componentPtr = (RF_SingleComponent_t *)data;
1422 memcpy( &component, componentPtr,
1423 sizeof(RF_SingleComponent_t));
1424 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1425 return(retcode);
1426
1427 case RAIDFRAME_REBUILD_IN_PLACE:
1428
1429 if (raidPtr->Layout.map->faultsTolerated == 0) {
1430 /* Can't do this on a RAID 0!! */
1431 return(EINVAL);
1432 }
1433
1434 if (raidPtr->recon_in_progress == 1) {
1435 /* a reconstruct is already in progress! */
1436 return(EINVAL);
1437 }
1438
1439 componentPtr = (RF_SingleComponent_t *) data;
1440 memcpy( &component, componentPtr,
1441 sizeof(RF_SingleComponent_t));
1442 component.row = 0; /* we don't support any more */
1443 column = component.column;
1444
1445 if ((column < 0) || (column >= raidPtr->numCol)) {
1446 return(EINVAL);
1447 }
1448
1449 rf_lock_mutex2(raidPtr->mutex);
1450 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1451 (raidPtr->numFailures > 0)) {
1452 /* XXX 0 above shouldn't be constant!!! */
1453 /* some component other than this has failed.
1454 Let's not make things worse than they already
1455 are... */
1456 printf("raid%d: Unable to reconstruct to disk at:\n",
1457 raidPtr->raidid);
1458 printf("raid%d: Col: %d Too many failures.\n",
1459 raidPtr->raidid, column);
1460 rf_unlock_mutex2(raidPtr->mutex);
1461 return (EINVAL);
1462 }
1463 if (raidPtr->Disks[column].status ==
1464 rf_ds_reconstructing) {
1465 printf("raid%d: Unable to reconstruct to disk at:\n",
1466 raidPtr->raidid);
1467 printf("raid%d: Col: %d Reconstruction already occurring!\n", raidPtr->raidid, column);
1468
1469 rf_unlock_mutex2(raidPtr->mutex);
1470 return (EINVAL);
1471 }
1472 if (raidPtr->Disks[column].status == rf_ds_spared) {
1473 rf_unlock_mutex2(raidPtr->mutex);
1474 return (EINVAL);
1475 }
1476 rf_unlock_mutex2(raidPtr->mutex);
1477
1478 RF_Malloc(rrint, sizeof(*rrint), (struct rf_recon_req_internal *));
1479 if (rrint == NULL)
1480 return(ENOMEM);
1481
1482 rrint->col = column;
1483 rrint->raidPtr = raidPtr;
1484
1485 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1486 rf_ReconstructInPlaceThread,
1487 rrint, "raid_reconip");
1488 return(retcode);
1489
1490 case RAIDFRAME_GET_INFO:
1491 #ifdef RAID_COMPAT32
1492 case RAIDFRAME_GET_INFO32:
1493 #endif
1494 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1495 (RF_DeviceConfig_t *));
1496 if (d_cfg == NULL)
1497 return (ENOMEM);
1498 retcode = rf_get_info(raidPtr, d_cfg);
1499 if (retcode == 0) {
1500 #ifdef RAID_COMPAT32
1501 if (raidframe_netbsd32_config_hook.hooked &&
1502 cmd == RAIDFRAME_GET_INFO32)
1503 ucfgp = NETBSD32PTR64(*(netbsd32_pointer_t *)data);
1504 else
1505 #endif
1506 ucfgp = *(RF_DeviceConfig_t **)data;
1507 retcode = copyout(d_cfg, ucfgp, sizeof(RF_DeviceConfig_t));
1508 }
1509 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1510
1511 return (retcode);
1512
1513 case RAIDFRAME_CHECK_PARITY:
1514 *(int *) data = raidPtr->parity_good;
1515 return (0);
1516
1517 case RAIDFRAME_PARITYMAP_STATUS:
1518 if (rf_paritymap_ineligible(raidPtr))
1519 return EINVAL;
1520 rf_paritymap_status(raidPtr->parity_map,
1521 (struct rf_pmstat *)data);
1522 return 0;
1523
1524 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1525 if (rf_paritymap_ineligible(raidPtr))
1526 return EINVAL;
1527 if (raidPtr->parity_map == NULL)
1528 return ENOENT; /* ??? */
1529 if (0 != rf_paritymap_set_params(raidPtr->parity_map,
1530 (struct rf_pmparams *)data, 1))
1531 return EINVAL;
1532 return 0;
1533
1534 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1535 if (rf_paritymap_ineligible(raidPtr))
1536 return EINVAL;
1537 *(int *) data = rf_paritymap_get_disable(raidPtr);
1538 return 0;
1539
1540 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1541 if (rf_paritymap_ineligible(raidPtr))
1542 return EINVAL;
1543 rf_paritymap_set_disable(raidPtr, *(int *)data);
1544 /* XXX should errors be passed up? */
1545 return 0;
1546
1547 case RAIDFRAME_RESET_ACCTOTALS:
1548 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1549 return (0);
1550
1551 case RAIDFRAME_GET_ACCTOTALS:
1552 totals = (RF_AccTotals_t *) data;
1553 *totals = raidPtr->acc_totals;
1554 return (0);
1555
1556 case RAIDFRAME_KEEP_ACCTOTALS:
1557 raidPtr->keep_acc_totals = *(int *)data;
1558 return (0);
1559
1560 case RAIDFRAME_GET_SIZE:
1561 *(int *) data = raidPtr->totalSectors;
1562 return (0);
1563
1564 /* fail a disk & optionally start reconstruction */
1565 case RAIDFRAME_FAIL_DISK80:
1566 /* Check if we called compat code for this cmd */
1567 if (retcode != EPASSTHROUGH)
1568 return EINVAL;
1569 /* FALLTHRU */
1570 case RAIDFRAME_FAIL_DISK:
1571 if (raidPtr->Layout.map->faultsTolerated == 0) {
1572 /* Can't do this on a RAID 0!! */
1573 return(EINVAL);
1574 }
1575
1576 rr = (struct rf_recon_req *) data;
1577 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1578 return (EINVAL);
1579
1580 rf_lock_mutex2(raidPtr->mutex);
1581 if (raidPtr->status == rf_rs_reconstructing) {
1582 /* you can't fail a disk while we're reconstructing! */
1583 /* XXX wrong for RAID6 */
1584 rf_unlock_mutex2(raidPtr->mutex);
1585 return (EINVAL);
1586 }
1587 if ((raidPtr->Disks[rr->col].status ==
1588 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1589 /* some other component has failed. Let's not make
1590 things worse. XXX wrong for RAID6 */
1591 rf_unlock_mutex2(raidPtr->mutex);
1592 return (EINVAL);
1593 }
1594 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1595 /* Can't fail a spared disk! */
1596 rf_unlock_mutex2(raidPtr->mutex);
1597 return (EINVAL);
1598 }
1599 rf_unlock_mutex2(raidPtr->mutex);
1600
1601 /* make a copy of the recon request so that we don't rely on
1602 * the user's buffer */
1603 RF_Malloc(rrint, sizeof(*rrint), (struct rf_recon_req_internal *));
1604 if (rrint == NULL)
1605 return(ENOMEM);
1606 rrint->col = rr->col;
1607 rrint->flags = rr->flags;
1608 rrint->raidPtr = raidPtr;
1609
1610 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1611 rf_ReconThread,
1612 rrint, "raid_recon");
1613 return (0);
1614
1615 /* invoke a copyback operation after recon on whatever disk
1616 * needs it, if any */
1617 case RAIDFRAME_COPYBACK:
1618
1619 if (raidPtr->Layout.map->faultsTolerated == 0) {
1620 /* This makes no sense on a RAID 0!! */
1621 return(EINVAL);
1622 }
1623
1624 if (raidPtr->copyback_in_progress == 1) {
1625 /* Copyback is already in progress! */
1626 return(EINVAL);
1627 }
1628
1629 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1630 rf_CopybackThread,
1631 raidPtr,"raid_copyback");
1632 return (retcode);
1633
1634 /* return the percentage completion of reconstruction */
1635 case RAIDFRAME_CHECK_RECON_STATUS:
1636 if (raidPtr->Layout.map->faultsTolerated == 0) {
1637 /* This makes no sense on a RAID 0, so tell the
1638 user it's done. */
1639 *(int *) data = 100;
1640 return(0);
1641 }
1642 if (raidPtr->status != rf_rs_reconstructing)
1643 *(int *) data = 100;
1644 else {
1645 if (raidPtr->reconControl->numRUsTotal > 0) {
1646 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1647 } else {
1648 *(int *) data = 0;
1649 }
1650 }
1651 return (0);
1652 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1653 rf_check_recon_status_ext(raidPtr, data);
1654 return (0);
1655
1656 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1657 if (raidPtr->Layout.map->faultsTolerated == 0) {
1658 /* This makes no sense on a RAID 0, so tell the
1659 user it's done. */
1660 *(int *) data = 100;
1661 return(0);
1662 }
1663 if (raidPtr->parity_rewrite_in_progress == 1) {
1664 *(int *) data = 100 *
1665 raidPtr->parity_rewrite_stripes_done /
1666 raidPtr->Layout.numStripe;
1667 } else {
1668 *(int *) data = 100;
1669 }
1670 return (0);
1671
1672 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1673 rf_check_parityrewrite_status_ext(raidPtr, data);
1674 return (0);
1675
1676 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1677 if (raidPtr->Layout.map->faultsTolerated == 0) {
1678 /* This makes no sense on a RAID 0 */
1679 *(int *) data = 100;
1680 return(0);
1681 }
1682 if (raidPtr->copyback_in_progress == 1) {
1683 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1684 raidPtr->Layout.numStripe;
1685 } else {
1686 *(int *) data = 100;
1687 }
1688 return (0);
1689
1690 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1691 rf_check_copyback_status_ext(raidPtr, data);
1692 return 0;
1693
1694 case RAIDFRAME_SET_LAST_UNIT:
1695 for (column = 0; column < raidPtr->numCol; column++)
1696 if (raidPtr->Disks[column].status != rf_ds_optimal)
1697 return EBUSY;
1698
1699 for (column = 0; column < raidPtr->numCol; column++) {
1700 clabel = raidget_component_label(raidPtr, column);
1701 clabel->last_unit = *(int *)data;
1702 raidflush_component_label(raidPtr, column);
1703 }
1704 rs->sc_cflags |= RAIDF_UNIT_CHANGED;
1705 return 0;
1706
1707 /* the sparetable daemon calls this to wait for the kernel to
1708 * need a spare table. this ioctl does not return until a
1709 * spare table is needed. XXX -- calling mpsleep here in the
1710 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1711 * -- I should either compute the spare table in the kernel,
1712 * or have a different -- XXX XXX -- interface (a different
1713 * character device) for delivering the table -- XXX */
1714 #if 0
1715 case RAIDFRAME_SPARET_WAIT:
1716 rf_lock_mutex2(rf_sparet_wait_mutex);
1717 while (!rf_sparet_wait_queue)
1718 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1719 waitreq = rf_sparet_wait_queue;
1720 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1721 rf_unlock_mutex2(rf_sparet_wait_mutex);
1722
1723 /* structure assignment */
1724 *((RF_SparetWait_t *) data) = *waitreq;
1725
1726 RF_Free(waitreq, sizeof(*waitreq));
1727 return (0);
1728
1729 /* wakes up a process waiting on SPARET_WAIT and puts an error
1730 * code in it that will cause the dameon to exit */
1731 case RAIDFRAME_ABORT_SPARET_WAIT:
1732 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1733 waitreq->fcol = -1;
1734 rf_lock_mutex2(rf_sparet_wait_mutex);
1735 waitreq->next = rf_sparet_wait_queue;
1736 rf_sparet_wait_queue = waitreq;
1737 rf_broadcast_conf2(rf_sparet_wait_cv);
1738 rf_unlock_mutex2(rf_sparet_wait_mutex);
1739 return (0);
1740
1741 /* used by the spare table daemon to deliver a spare table
1742 * into the kernel */
1743 case RAIDFRAME_SEND_SPARET:
1744
1745 /* install the spare table */
1746 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1747
1748 /* respond to the requestor. the return status of the spare
1749 * table installation is passed in the "fcol" field */
1750 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1751 waitreq->fcol = retcode;
1752 rf_lock_mutex2(rf_sparet_wait_mutex);
1753 waitreq->next = rf_sparet_resp_queue;
1754 rf_sparet_resp_queue = waitreq;
1755 rf_broadcast_cond2(rf_sparet_resp_cv);
1756 rf_unlock_mutex2(rf_sparet_wait_mutex);
1757
1758 return (retcode);
1759 #endif
1760
1761 default:
1762 break; /* fall through to the os-specific code below */
1763
1764 }
1765
1766 if (!raidPtr->valid)
1767 return (EINVAL);
1768
1769 /*
1770 * Add support for "regular" device ioctls here.
1771 */
1772
1773 switch (cmd) {
1774 case DIOCGCACHE:
1775 retcode = rf_get_component_caches(raidPtr, (int *)data);
1776 break;
1777
1778 case DIOCCACHESYNC:
1779 retcode = rf_sync_component_caches(raidPtr);
1780 break;
1781
1782 default:
1783 retcode = dk_ioctl(dksc, dev, cmd, data, flag, l);
1784 break;
1785 }
1786
1787 return (retcode);
1788
1789 }
1790
1791
1792 /* raidinit -- complete the rest of the initialization for the
1793 RAIDframe device. */
1794
1795
1796 static void
1797 raidinit(struct raid_softc *rs)
1798 {
1799 cfdata_t cf;
1800 unsigned int unit;
1801 struct dk_softc *dksc = &rs->sc_dksc;
1802 RF_Raid_t *raidPtr = &rs->sc_r;
1803 device_t dev;
1804
1805 unit = raidPtr->raidid;
1806
1807 /* XXX doesn't check bounds. */
1808 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%u", unit);
1809
1810 /* attach the pseudo device */
1811 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1812 cf->cf_name = raid_cd.cd_name;
1813 cf->cf_atname = raid_cd.cd_name;
1814 cf->cf_unit = unit;
1815 cf->cf_fstate = FSTATE_STAR;
1816
1817 dev = config_attach_pseudo(cf);
1818 if (dev == NULL) {
1819 printf("raid%d: config_attach_pseudo failed\n",
1820 raidPtr->raidid);
1821 free(cf, M_RAIDFRAME);
1822 return;
1823 }
1824
1825 /* provide a backpointer to the real softc */
1826 raidsoftc(dev) = rs;
1827
1828 /* disk_attach actually creates space for the CPU disklabel, among
1829 * other things, so it's critical to call this *BEFORE* we try putzing
1830 * with disklabels. */
1831 dk_init(dksc, dev, DKTYPE_RAID);
1832 disk_init(&dksc->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1833
1834 /* XXX There may be a weird interaction here between this, and
1835 * protectedSectors, as used in RAIDframe. */
1836
1837 rs->sc_size = raidPtr->totalSectors;
1838
1839 /* Attach dk and disk subsystems */
1840 dk_attach(dksc);
1841 disk_attach(&dksc->sc_dkdev);
1842 rf_set_geometry(rs, raidPtr);
1843
1844 bufq_alloc(&dksc->sc_bufq, "fcfs", BUFQ_SORT_RAWBLOCK);
1845
1846 /* mark unit as usuable */
1847 rs->sc_flags |= RAIDF_INITED;
1848
1849 dkwedge_discover(&dksc->sc_dkdev);
1850 }
1851
1852 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1853 /* wake up the daemon & tell it to get us a spare table
1854 * XXX
1855 * the entries in the queues should be tagged with the raidPtr
1856 * so that in the extremely rare case that two recons happen at once,
1857 * we know for which device were requesting a spare table
1858 * XXX
1859 *
1860 * XXX This code is not currently used. GO
1861 */
1862 int
1863 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1864 {
1865 int retcode;
1866
1867 rf_lock_mutex2(rf_sparet_wait_mutex);
1868 req->next = rf_sparet_wait_queue;
1869 rf_sparet_wait_queue = req;
1870 rf_broadcast_cond2(rf_sparet_wait_cv);
1871
1872 /* mpsleep unlocks the mutex */
1873 while (!rf_sparet_resp_queue) {
1874 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
1875 }
1876 req = rf_sparet_resp_queue;
1877 rf_sparet_resp_queue = req->next;
1878 rf_unlock_mutex2(rf_sparet_wait_mutex);
1879
1880 retcode = req->fcol;
1881 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1882 * alloc'd */
1883 return (retcode);
1884 }
1885 #endif
1886
1887 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1888 * bp & passes it down.
1889 * any calls originating in the kernel must use non-blocking I/O
1890 * do some extra sanity checking to return "appropriate" error values for
1891 * certain conditions (to make some standard utilities work)
1892 *
1893 * Formerly known as: rf_DoAccessKernel
1894 */
1895 void
1896 raidstart(RF_Raid_t *raidPtr)
1897 {
1898 struct raid_softc *rs;
1899 struct dk_softc *dksc;
1900
1901 rs = raidPtr->softc;
1902 dksc = &rs->sc_dksc;
1903 /* quick check to see if anything has died recently */
1904 rf_lock_mutex2(raidPtr->mutex);
1905 if (raidPtr->numNewFailures > 0) {
1906 rf_unlock_mutex2(raidPtr->mutex);
1907 rf_update_component_labels(raidPtr,
1908 RF_NORMAL_COMPONENT_UPDATE);
1909 rf_lock_mutex2(raidPtr->mutex);
1910 raidPtr->numNewFailures--;
1911 }
1912 rf_unlock_mutex2(raidPtr->mutex);
1913
1914 if ((rs->sc_flags & RAIDF_INITED) == 0) {
1915 printf("raid%d: raidstart not ready\n", raidPtr->raidid);
1916 return;
1917 }
1918
1919 dk_start(dksc, NULL);
1920 }
1921
1922 static int
1923 raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp)
1924 {
1925 RF_SectorCount_t num_blocks, pb, sum;
1926 RF_RaidAddr_t raid_addr;
1927 daddr_t blocknum;
1928 int do_async;
1929 int rc;
1930
1931 rf_lock_mutex2(raidPtr->mutex);
1932 if (raidPtr->openings == 0) {
1933 rf_unlock_mutex2(raidPtr->mutex);
1934 return EAGAIN;
1935 }
1936 rf_unlock_mutex2(raidPtr->mutex);
1937
1938 blocknum = bp->b_rawblkno;
1939
1940 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1941 (int) blocknum));
1942
1943 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1944 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1945
1946 /* *THIS* is where we adjust what block we're going to...
1947 * but DO NOT TOUCH bp->b_blkno!!! */
1948 raid_addr = blocknum;
1949
1950 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1951 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1952 sum = raid_addr + num_blocks + pb;
1953 if (1 || rf_debugKernelAccess) {
1954 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1955 (int) raid_addr, (int) sum, (int) num_blocks,
1956 (int) pb, (int) bp->b_resid));
1957 }
1958 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1959 || (sum < num_blocks) || (sum < pb)) {
1960 rc = ENOSPC;
1961 goto done;
1962 }
1963 /*
1964 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1965 */
1966
1967 if (bp->b_bcount & raidPtr->sectorMask) {
1968 rc = ENOSPC;
1969 goto done;
1970 }
1971 db1_printf(("Calling DoAccess..\n"));
1972
1973
1974 rf_lock_mutex2(raidPtr->mutex);
1975 raidPtr->openings--;
1976 rf_unlock_mutex2(raidPtr->mutex);
1977
1978 /*
1979 * Everything is async.
1980 */
1981 do_async = 1;
1982
1983 /* don't ever condition on bp->b_flags & B_WRITE.
1984 * always condition on B_READ instead */
1985
1986 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1987 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1988 do_async, raid_addr, num_blocks,
1989 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1990
1991 done:
1992 return rc;
1993 }
1994
1995 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1996
1997 int
1998 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
1999 {
2000 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2001 struct buf *bp;
2002
2003 req->queue = queue;
2004 bp = req->bp;
2005
2006 switch (req->type) {
2007 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2008 /* XXX need to do something extra here.. */
2009 /* I'm leaving this in, as I've never actually seen it used,
2010 * and I'd like folks to report it... GO */
2011 printf(("WAKEUP CALLED\n"));
2012 queue->numOutstanding++;
2013
2014 bp->b_flags = 0;
2015 bp->b_private = req;
2016
2017 KernelWakeupFunc(bp);
2018 break;
2019
2020 case RF_IO_TYPE_READ:
2021 case RF_IO_TYPE_WRITE:
2022 #if RF_ACC_TRACE > 0
2023 if (req->tracerec) {
2024 RF_ETIMER_START(req->tracerec->timer);
2025 }
2026 #endif
2027 InitBP(bp, queue->rf_cinfo->ci_vp,
2028 op, queue->rf_cinfo->ci_dev,
2029 req->sectorOffset, req->numSector,
2030 req->buf, KernelWakeupFunc, (void *) req,
2031 queue->raidPtr->logBytesPerSector, req->b_proc);
2032
2033 if (rf_debugKernelAccess) {
2034 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2035 (long) bp->b_blkno));
2036 }
2037 queue->numOutstanding++;
2038 queue->last_deq_sector = req->sectorOffset;
2039 /* acc wouldn't have been let in if there were any pending
2040 * reqs at any other priority */
2041 queue->curPriority = req->priority;
2042
2043 db1_printf(("Going for %c to unit %d col %d\n",
2044 req->type, queue->raidPtr->raidid,
2045 queue->col));
2046 db1_printf(("sector %d count %d (%d bytes) %d\n",
2047 (int) req->sectorOffset, (int) req->numSector,
2048 (int) (req->numSector <<
2049 queue->raidPtr->logBytesPerSector),
2050 (int) queue->raidPtr->logBytesPerSector));
2051
2052 /*
2053 * XXX: drop lock here since this can block at
2054 * least with backing SCSI devices. Retake it
2055 * to minimize fuss with calling interfaces.
2056 */
2057
2058 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2059 bdev_strategy(bp);
2060 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2061 break;
2062
2063 default:
2064 panic("bad req->type in rf_DispatchKernelIO");
2065 }
2066 db1_printf(("Exiting from DispatchKernelIO\n"));
2067
2068 return (0);
2069 }
2070 /* this is the callback function associated with a I/O invoked from
2071 kernel code.
2072 */
2073 static void
2074 KernelWakeupFunc(struct buf *bp)
2075 {
2076 RF_DiskQueueData_t *req = NULL;
2077 RF_DiskQueue_t *queue;
2078
2079 db1_printf(("recovering the request queue:\n"));
2080
2081 req = bp->b_private;
2082
2083 queue = (RF_DiskQueue_t *) req->queue;
2084
2085 rf_lock_mutex2(queue->raidPtr->iodone_lock);
2086
2087 #if RF_ACC_TRACE > 0
2088 if (req->tracerec) {
2089 RF_ETIMER_STOP(req->tracerec->timer);
2090 RF_ETIMER_EVAL(req->tracerec->timer);
2091 rf_lock_mutex2(rf_tracing_mutex);
2092 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2093 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2094 req->tracerec->num_phys_ios++;
2095 rf_unlock_mutex2(rf_tracing_mutex);
2096 }
2097 #endif
2098
2099 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2100 * ballistic, and mark the component as hosed... */
2101
2102 if (bp->b_error != 0) {
2103 /* Mark the disk as dead */
2104 /* but only mark it once... */
2105 /* and only if it wouldn't leave this RAID set
2106 completely broken */
2107 if (((queue->raidPtr->Disks[queue->col].status ==
2108 rf_ds_optimal) ||
2109 (queue->raidPtr->Disks[queue->col].status ==
2110 rf_ds_used_spare)) &&
2111 (queue->raidPtr->numFailures <
2112 queue->raidPtr->Layout.map->faultsTolerated)) {
2113 printf("raid%d: IO Error (%d). Marking %s as failed.\n",
2114 queue->raidPtr->raidid,
2115 bp->b_error,
2116 queue->raidPtr->Disks[queue->col].devname);
2117 queue->raidPtr->Disks[queue->col].status =
2118 rf_ds_failed;
2119 queue->raidPtr->status = rf_rs_degraded;
2120 queue->raidPtr->numFailures++;
2121 queue->raidPtr->numNewFailures++;
2122 } else { /* Disk is already dead... */
2123 /* printf("Disk already marked as dead!\n"); */
2124 }
2125
2126 }
2127
2128 /* Fill in the error value */
2129 req->error = bp->b_error;
2130
2131 /* Drop this one on the "finished" queue... */
2132 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2133
2134 /* Let the raidio thread know there is work to be done. */
2135 rf_signal_cond2(queue->raidPtr->iodone_cv);
2136
2137 rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2138 }
2139
2140
2141 /*
2142 * initialize a buf structure for doing an I/O in the kernel.
2143 */
2144 static void
2145 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2146 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2147 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2148 struct proc *b_proc)
2149 {
2150 /* bp->b_flags = B_PHYS | rw_flag; */
2151 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */
2152 bp->b_oflags = 0;
2153 bp->b_cflags = 0;
2154 bp->b_bcount = numSect << logBytesPerSector;
2155 bp->b_bufsize = bp->b_bcount;
2156 bp->b_error = 0;
2157 bp->b_dev = dev;
2158 bp->b_data = bf;
2159 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2160 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2161 if (bp->b_bcount == 0) {
2162 panic("bp->b_bcount is zero in InitBP!!");
2163 }
2164 bp->b_proc = b_proc;
2165 bp->b_iodone = cbFunc;
2166 bp->b_private = cbArg;
2167 }
2168
2169 /*
2170 * Wait interruptibly for an exclusive lock.
2171 *
2172 * XXX
2173 * Several drivers do this; it should be abstracted and made MP-safe.
2174 * (Hmm... where have we seen this warning before :-> GO )
2175 */
2176 static int
2177 raidlock(struct raid_softc *rs)
2178 {
2179 int error;
2180
2181 error = 0;
2182 mutex_enter(&rs->sc_mutex);
2183 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2184 rs->sc_flags |= RAIDF_WANTED;
2185 error = cv_wait_sig(&rs->sc_cv, &rs->sc_mutex);
2186 if (error != 0)
2187 goto done;
2188 }
2189 rs->sc_flags |= RAIDF_LOCKED;
2190 done:
2191 mutex_exit(&rs->sc_mutex);
2192 return (error);
2193 }
2194 /*
2195 * Unlock and wake up any waiters.
2196 */
2197 static void
2198 raidunlock(struct raid_softc *rs)
2199 {
2200
2201 mutex_enter(&rs->sc_mutex);
2202 rs->sc_flags &= ~RAIDF_LOCKED;
2203 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2204 rs->sc_flags &= ~RAIDF_WANTED;
2205 cv_broadcast(&rs->sc_cv);
2206 }
2207 mutex_exit(&rs->sc_mutex);
2208 }
2209
2210
2211 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2212 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2213 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2214
2215 static daddr_t
2216 rf_component_info_offset(void)
2217 {
2218
2219 return RF_COMPONENT_INFO_OFFSET;
2220 }
2221
2222 static daddr_t
2223 rf_component_info_size(unsigned secsize)
2224 {
2225 daddr_t info_size;
2226
2227 KASSERT(secsize);
2228 if (secsize > RF_COMPONENT_INFO_SIZE)
2229 info_size = secsize;
2230 else
2231 info_size = RF_COMPONENT_INFO_SIZE;
2232
2233 return info_size;
2234 }
2235
2236 static daddr_t
2237 rf_parity_map_offset(RF_Raid_t *raidPtr)
2238 {
2239 daddr_t map_offset;
2240
2241 KASSERT(raidPtr->bytesPerSector);
2242 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2243 map_offset = raidPtr->bytesPerSector;
2244 else
2245 map_offset = RF_COMPONENT_INFO_SIZE;
2246 map_offset += rf_component_info_offset();
2247
2248 return map_offset;
2249 }
2250
2251 static daddr_t
2252 rf_parity_map_size(RF_Raid_t *raidPtr)
2253 {
2254 daddr_t map_size;
2255
2256 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2257 map_size = raidPtr->bytesPerSector;
2258 else
2259 map_size = RF_PARITY_MAP_SIZE;
2260
2261 return map_size;
2262 }
2263
2264 int
2265 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2266 {
2267 RF_ComponentLabel_t *clabel;
2268
2269 clabel = raidget_component_label(raidPtr, col);
2270 clabel->clean = RF_RAID_CLEAN;
2271 raidflush_component_label(raidPtr, col);
2272 return(0);
2273 }
2274
2275
2276 int
2277 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2278 {
2279 RF_ComponentLabel_t *clabel;
2280
2281 clabel = raidget_component_label(raidPtr, col);
2282 clabel->clean = RF_RAID_DIRTY;
2283 raidflush_component_label(raidPtr, col);
2284 return(0);
2285 }
2286
2287 int
2288 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2289 {
2290 KASSERT(raidPtr->bytesPerSector);
2291 return raidread_component_label(raidPtr->bytesPerSector,
2292 raidPtr->Disks[col].dev,
2293 raidPtr->raid_cinfo[col].ci_vp,
2294 &raidPtr->raid_cinfo[col].ci_label);
2295 }
2296
2297 RF_ComponentLabel_t *
2298 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2299 {
2300 return &raidPtr->raid_cinfo[col].ci_label;
2301 }
2302
2303 int
2304 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2305 {
2306 RF_ComponentLabel_t *label;
2307
2308 label = &raidPtr->raid_cinfo[col].ci_label;
2309 label->mod_counter = raidPtr->mod_counter;
2310 #ifndef RF_NO_PARITY_MAP
2311 label->parity_map_modcount = label->mod_counter;
2312 #endif
2313 return raidwrite_component_label(raidPtr->bytesPerSector,
2314 raidPtr->Disks[col].dev,
2315 raidPtr->raid_cinfo[col].ci_vp, label);
2316 }
2317
2318
2319 static int
2320 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2321 RF_ComponentLabel_t *clabel)
2322 {
2323 return raidread_component_area(dev, b_vp, clabel,
2324 sizeof(RF_ComponentLabel_t),
2325 rf_component_info_offset(),
2326 rf_component_info_size(secsize));
2327 }
2328
2329 /* ARGSUSED */
2330 static int
2331 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2332 size_t msize, daddr_t offset, daddr_t dsize)
2333 {
2334 struct buf *bp;
2335 int error;
2336
2337 /* XXX should probably ensure that we don't try to do this if
2338 someone has changed rf_protected_sectors. */
2339
2340 if (b_vp == NULL) {
2341 /* For whatever reason, this component is not valid.
2342 Don't try to read a component label from it. */
2343 return(EINVAL);
2344 }
2345
2346 /* get a block of the appropriate size... */
2347 bp = geteblk((int)dsize);
2348 bp->b_dev = dev;
2349
2350 /* get our ducks in a row for the read */
2351 bp->b_blkno = offset / DEV_BSIZE;
2352 bp->b_bcount = dsize;
2353 bp->b_flags |= B_READ;
2354 bp->b_resid = dsize;
2355
2356 bdev_strategy(bp);
2357 error = biowait(bp);
2358
2359 if (!error) {
2360 memcpy(data, bp->b_data, msize);
2361 }
2362
2363 brelse(bp, 0);
2364 return(error);
2365 }
2366
2367
2368 static int
2369 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2370 RF_ComponentLabel_t *clabel)
2371 {
2372 return raidwrite_component_area(dev, b_vp, clabel,
2373 sizeof(RF_ComponentLabel_t),
2374 rf_component_info_offset(),
2375 rf_component_info_size(secsize), 0);
2376 }
2377
2378 /* ARGSUSED */
2379 static int
2380 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2381 size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2382 {
2383 struct buf *bp;
2384 int error;
2385
2386 /* get a block of the appropriate size... */
2387 bp = geteblk((int)dsize);
2388 bp->b_dev = dev;
2389
2390 /* get our ducks in a row for the write */
2391 bp->b_blkno = offset / DEV_BSIZE;
2392 bp->b_bcount = dsize;
2393 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2394 bp->b_resid = dsize;
2395
2396 memset(bp->b_data, 0, dsize);
2397 memcpy(bp->b_data, data, msize);
2398
2399 bdev_strategy(bp);
2400 if (asyncp)
2401 return 0;
2402 error = biowait(bp);
2403 brelse(bp, 0);
2404 if (error) {
2405 #if 1
2406 printf("Failed to write RAID component info!\n");
2407 #endif
2408 }
2409
2410 return(error);
2411 }
2412
2413 void
2414 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2415 {
2416 int c;
2417
2418 for (c = 0; c < raidPtr->numCol; c++) {
2419 /* Skip dead disks. */
2420 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2421 continue;
2422 /* XXXjld: what if an error occurs here? */
2423 raidwrite_component_area(raidPtr->Disks[c].dev,
2424 raidPtr->raid_cinfo[c].ci_vp, map,
2425 RF_PARITYMAP_NBYTE,
2426 rf_parity_map_offset(raidPtr),
2427 rf_parity_map_size(raidPtr), 0);
2428 }
2429 }
2430
2431 void
2432 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2433 {
2434 struct rf_paritymap_ondisk tmp;
2435 int c,first;
2436
2437 first=1;
2438 for (c = 0; c < raidPtr->numCol; c++) {
2439 /* Skip dead disks. */
2440 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2441 continue;
2442 raidread_component_area(raidPtr->Disks[c].dev,
2443 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2444 RF_PARITYMAP_NBYTE,
2445 rf_parity_map_offset(raidPtr),
2446 rf_parity_map_size(raidPtr));
2447 if (first) {
2448 memcpy(map, &tmp, sizeof(*map));
2449 first = 0;
2450 } else {
2451 rf_paritymap_merge(map, &tmp);
2452 }
2453 }
2454 }
2455
2456 void
2457 rf_markalldirty(RF_Raid_t *raidPtr)
2458 {
2459 RF_ComponentLabel_t *clabel;
2460 int sparecol;
2461 int c;
2462 int j;
2463 int scol = -1;
2464
2465 raidPtr->mod_counter++;
2466 for (c = 0; c < raidPtr->numCol; c++) {
2467 /* we don't want to touch (at all) a disk that has
2468 failed */
2469 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2470 clabel = raidget_component_label(raidPtr, c);
2471 if (clabel->status == rf_ds_spared) {
2472 /* XXX do something special...
2473 but whatever you do, don't
2474 try to access it!! */
2475 } else {
2476 raidmarkdirty(raidPtr, c);
2477 }
2478 }
2479 }
2480
2481 for( c = 0; c < raidPtr->numSpare ; c++) {
2482 sparecol = raidPtr->numCol + c;
2483 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2484 /*
2485
2486 we claim this disk is "optimal" if it's
2487 rf_ds_used_spare, as that means it should be
2488 directly substitutable for the disk it replaced.
2489 We note that too...
2490
2491 */
2492
2493 for(j=0;j<raidPtr->numCol;j++) {
2494 if (raidPtr->Disks[j].spareCol == sparecol) {
2495 scol = j;
2496 break;
2497 }
2498 }
2499
2500 clabel = raidget_component_label(raidPtr, sparecol);
2501 /* make sure status is noted */
2502
2503 raid_init_component_label(raidPtr, clabel);
2504
2505 clabel->row = 0;
2506 clabel->column = scol;
2507 /* Note: we *don't* change status from rf_ds_used_spare
2508 to rf_ds_optimal */
2509 /* clabel.status = rf_ds_optimal; */
2510
2511 raidmarkdirty(raidPtr, sparecol);
2512 }
2513 }
2514 }
2515
2516
2517 void
2518 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2519 {
2520 RF_ComponentLabel_t *clabel;
2521 int sparecol;
2522 int c;
2523 int j;
2524 int scol;
2525 struct raid_softc *rs = raidPtr->softc;
2526
2527 scol = -1;
2528
2529 /* XXX should do extra checks to make sure things really are clean,
2530 rather than blindly setting the clean bit... */
2531
2532 raidPtr->mod_counter++;
2533
2534 for (c = 0; c < raidPtr->numCol; c++) {
2535 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2536 clabel = raidget_component_label(raidPtr, c);
2537 /* make sure status is noted */
2538 clabel->status = rf_ds_optimal;
2539
2540 /* note what unit we are configured as */
2541 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2542 clabel->last_unit = raidPtr->raidid;
2543
2544 raidflush_component_label(raidPtr, c);
2545 if (final == RF_FINAL_COMPONENT_UPDATE) {
2546 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2547 raidmarkclean(raidPtr, c);
2548 }
2549 }
2550 }
2551 /* else we don't touch it.. */
2552 }
2553
2554 for( c = 0; c < raidPtr->numSpare ; c++) {
2555 sparecol = raidPtr->numCol + c;
2556 /* Need to ensure that the reconstruct actually completed! */
2557 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2558 /*
2559
2560 we claim this disk is "optimal" if it's
2561 rf_ds_used_spare, as that means it should be
2562 directly substitutable for the disk it replaced.
2563 We note that too...
2564
2565 */
2566
2567 for(j=0;j<raidPtr->numCol;j++) {
2568 if (raidPtr->Disks[j].spareCol == sparecol) {
2569 scol = j;
2570 break;
2571 }
2572 }
2573
2574 /* XXX shouldn't *really* need this... */
2575 clabel = raidget_component_label(raidPtr, sparecol);
2576 /* make sure status is noted */
2577
2578 raid_init_component_label(raidPtr, clabel);
2579
2580 clabel->column = scol;
2581 clabel->status = rf_ds_optimal;
2582 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2583 clabel->last_unit = raidPtr->raidid;
2584
2585 raidflush_component_label(raidPtr, sparecol);
2586 if (final == RF_FINAL_COMPONENT_UPDATE) {
2587 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2588 raidmarkclean(raidPtr, sparecol);
2589 }
2590 }
2591 }
2592 }
2593 }
2594
2595 void
2596 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2597 {
2598
2599 if (vp != NULL) {
2600 if (auto_configured == 1) {
2601 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2602 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2603 vput(vp);
2604
2605 } else {
2606 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2607 }
2608 }
2609 }
2610
2611
2612 void
2613 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2614 {
2615 int r,c;
2616 struct vnode *vp;
2617 int acd;
2618
2619
2620 /* We take this opportunity to close the vnodes like we should.. */
2621
2622 for (c = 0; c < raidPtr->numCol; c++) {
2623 vp = raidPtr->raid_cinfo[c].ci_vp;
2624 acd = raidPtr->Disks[c].auto_configured;
2625 rf_close_component(raidPtr, vp, acd);
2626 raidPtr->raid_cinfo[c].ci_vp = NULL;
2627 raidPtr->Disks[c].auto_configured = 0;
2628 }
2629
2630 for (r = 0; r < raidPtr->numSpare; r++) {
2631 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2632 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2633 rf_close_component(raidPtr, vp, acd);
2634 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2635 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2636 }
2637 }
2638
2639
2640 void
2641 rf_ReconThread(struct rf_recon_req_internal *req)
2642 {
2643 int s;
2644 RF_Raid_t *raidPtr;
2645
2646 s = splbio();
2647 raidPtr = (RF_Raid_t *) req->raidPtr;
2648 raidPtr->recon_in_progress = 1;
2649
2650 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2651 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2652
2653 RF_Free(req, sizeof(*req));
2654
2655 raidPtr->recon_in_progress = 0;
2656 splx(s);
2657
2658 /* That's all... */
2659 kthread_exit(0); /* does not return */
2660 }
2661
2662 void
2663 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2664 {
2665 int retcode;
2666 int s;
2667
2668 raidPtr->parity_rewrite_stripes_done = 0;
2669 raidPtr->parity_rewrite_in_progress = 1;
2670 s = splbio();
2671 retcode = rf_RewriteParity(raidPtr);
2672 splx(s);
2673 if (retcode) {
2674 printf("raid%d: Error re-writing parity (%d)!\n",
2675 raidPtr->raidid, retcode);
2676 } else {
2677 /* set the clean bit! If we shutdown correctly,
2678 the clean bit on each component label will get
2679 set */
2680 raidPtr->parity_good = RF_RAID_CLEAN;
2681 }
2682 raidPtr->parity_rewrite_in_progress = 0;
2683
2684 /* Anyone waiting for us to stop? If so, inform them... */
2685 if (raidPtr->waitShutdown) {
2686 rf_lock_mutex2(raidPtr->rad_lock);
2687 cv_broadcast(&raidPtr->parity_rewrite_cv);
2688 rf_unlock_mutex2(raidPtr->rad_lock);
2689 }
2690
2691 /* That's all... */
2692 kthread_exit(0); /* does not return */
2693 }
2694
2695
2696 void
2697 rf_CopybackThread(RF_Raid_t *raidPtr)
2698 {
2699 int s;
2700
2701 raidPtr->copyback_in_progress = 1;
2702 s = splbio();
2703 rf_CopybackReconstructedData(raidPtr);
2704 splx(s);
2705 raidPtr->copyback_in_progress = 0;
2706
2707 /* That's all... */
2708 kthread_exit(0); /* does not return */
2709 }
2710
2711
2712 void
2713 rf_ReconstructInPlaceThread(struct rf_recon_req_internal *req)
2714 {
2715 int s;
2716 RF_Raid_t *raidPtr;
2717
2718 s = splbio();
2719 raidPtr = req->raidPtr;
2720 raidPtr->recon_in_progress = 1;
2721 rf_ReconstructInPlace(raidPtr, req->col);
2722 RF_Free(req, sizeof(*req));
2723 raidPtr->recon_in_progress = 0;
2724 splx(s);
2725
2726 /* That's all... */
2727 kthread_exit(0); /* does not return */
2728 }
2729
2730 static RF_AutoConfig_t *
2731 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
2732 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
2733 unsigned secsize)
2734 {
2735 int good_one = 0;
2736 RF_ComponentLabel_t *clabel;
2737 RF_AutoConfig_t *ac;
2738
2739 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
2740 if (clabel == NULL) {
2741 oomem:
2742 while(ac_list) {
2743 ac = ac_list;
2744 if (ac->clabel)
2745 free(ac->clabel, M_RAIDFRAME);
2746 ac_list = ac_list->next;
2747 free(ac, M_RAIDFRAME);
2748 }
2749 printf("RAID auto config: out of memory!\n");
2750 return NULL; /* XXX probably should panic? */
2751 }
2752
2753 if (!raidread_component_label(secsize, dev, vp, clabel)) {
2754 /* Got the label. Does it look reasonable? */
2755 if (rf_reasonable_label(clabel, numsecs) &&
2756 (rf_component_label_partitionsize(clabel) <= size)) {
2757 #ifdef DEBUG
2758 printf("Component on: %s: %llu\n",
2759 cname, (unsigned long long)size);
2760 rf_print_component_label(clabel);
2761 #endif
2762 /* if it's reasonable, add it, else ignore it. */
2763 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
2764 M_NOWAIT);
2765 if (ac == NULL) {
2766 free(clabel, M_RAIDFRAME);
2767 goto oomem;
2768 }
2769 strlcpy(ac->devname, cname, sizeof(ac->devname));
2770 ac->dev = dev;
2771 ac->vp = vp;
2772 ac->clabel = clabel;
2773 ac->next = ac_list;
2774 ac_list = ac;
2775 good_one = 1;
2776 }
2777 }
2778 if (!good_one) {
2779 /* cleanup */
2780 free(clabel, M_RAIDFRAME);
2781 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2782 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2783 vput(vp);
2784 }
2785 return ac_list;
2786 }
2787
2788 RF_AutoConfig_t *
2789 rf_find_raid_components(void)
2790 {
2791 struct vnode *vp;
2792 struct disklabel label;
2793 device_t dv;
2794 deviter_t di;
2795 dev_t dev;
2796 int bmajor, bminor, wedge, rf_part_found;
2797 int error;
2798 int i;
2799 RF_AutoConfig_t *ac_list;
2800 uint64_t numsecs;
2801 unsigned secsize;
2802 int dowedges;
2803
2804 /* initialize the AutoConfig list */
2805 ac_list = NULL;
2806
2807 /*
2808 * we begin by trolling through *all* the devices on the system *twice*
2809 * first we scan for wedges, second for other devices. This avoids
2810 * using a raw partition instead of a wedge that covers the whole disk
2811 */
2812
2813 for (dowedges=1; dowedges>=0; --dowedges) {
2814 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
2815 dv = deviter_next(&di)) {
2816
2817 /* we are only interested in disks... */
2818 if (device_class(dv) != DV_DISK)
2819 continue;
2820
2821 /* we don't care about floppies... */
2822 if (device_is_a(dv, "fd")) {
2823 continue;
2824 }
2825
2826 /* we don't care about CD's... */
2827 if (device_is_a(dv, "cd")) {
2828 continue;
2829 }
2830
2831 /* we don't care about md's... */
2832 if (device_is_a(dv, "md")) {
2833 continue;
2834 }
2835
2836 /* hdfd is the Atari/Hades floppy driver */
2837 if (device_is_a(dv, "hdfd")) {
2838 continue;
2839 }
2840
2841 /* fdisa is the Atari/Milan floppy driver */
2842 if (device_is_a(dv, "fdisa")) {
2843 continue;
2844 }
2845
2846 /* are we in the wedges pass ? */
2847 wedge = device_is_a(dv, "dk");
2848 if (wedge != dowedges) {
2849 continue;
2850 }
2851
2852 /* need to find the device_name_to_block_device_major stuff */
2853 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
2854
2855 rf_part_found = 0; /*No raid partition as yet*/
2856
2857 /* get a vnode for the raw partition of this disk */
2858 bminor = minor(device_unit(dv));
2859 dev = wedge ? makedev(bmajor, bminor) :
2860 MAKEDISKDEV(bmajor, bminor, RAW_PART);
2861 if (bdevvp(dev, &vp))
2862 panic("RAID can't alloc vnode");
2863
2864 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
2865
2866 if (error) {
2867 /* "Who cares." Continue looking
2868 for something that exists*/
2869 vput(vp);
2870 continue;
2871 }
2872
2873 error = getdisksize(vp, &numsecs, &secsize);
2874 if (error) {
2875 /*
2876 * Pseudo devices like vnd and cgd can be
2877 * opened but may still need some configuration.
2878 * Ignore these quietly.
2879 */
2880 if (error != ENXIO)
2881 printf("RAIDframe: can't get disk size"
2882 " for dev %s (%d)\n",
2883 device_xname(dv), error);
2884 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2885 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2886 vput(vp);
2887 continue;
2888 }
2889 if (wedge) {
2890 struct dkwedge_info dkw;
2891 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
2892 NOCRED);
2893 if (error) {
2894 printf("RAIDframe: can't get wedge info for "
2895 "dev %s (%d)\n", device_xname(dv), error);
2896 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2897 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2898 vput(vp);
2899 continue;
2900 }
2901
2902 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
2903 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2904 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2905 vput(vp);
2906 continue;
2907 }
2908
2909 ac_list = rf_get_component(ac_list, dev, vp,
2910 device_xname(dv), dkw.dkw_size, numsecs, secsize);
2911 rf_part_found = 1; /*There is a raid component on this disk*/
2912 continue;
2913 }
2914
2915 /* Ok, the disk exists. Go get the disklabel. */
2916 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
2917 if (error) {
2918 /*
2919 * XXX can't happen - open() would
2920 * have errored out (or faked up one)
2921 */
2922 if (error != ENOTTY)
2923 printf("RAIDframe: can't get label for dev "
2924 "%s (%d)\n", device_xname(dv), error);
2925 }
2926
2927 /* don't need this any more. We'll allocate it again
2928 a little later if we really do... */
2929 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2930 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2931 vput(vp);
2932
2933 if (error)
2934 continue;
2935
2936 rf_part_found = 0; /*No raid partitions yet*/
2937 for (i = 0; i < label.d_npartitions; i++) {
2938 char cname[sizeof(ac_list->devname)];
2939
2940 /* We only support partitions marked as RAID */
2941 if (label.d_partitions[i].p_fstype != FS_RAID)
2942 continue;
2943
2944 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
2945 if (bdevvp(dev, &vp))
2946 panic("RAID can't alloc vnode");
2947
2948 error = VOP_OPEN(vp, FREAD, NOCRED);
2949 if (error) {
2950 /* Whatever... */
2951 vput(vp);
2952 continue;
2953 }
2954 snprintf(cname, sizeof(cname), "%s%c",
2955 device_xname(dv), 'a' + i);
2956 ac_list = rf_get_component(ac_list, dev, vp, cname,
2957 label.d_partitions[i].p_size, numsecs, secsize);
2958 rf_part_found = 1; /*There is at least one raid partition on this disk*/
2959 }
2960
2961 /*
2962 *If there is no raid component on this disk, either in a
2963 *disklabel or inside a wedge, check the raw partition as well,
2964 *as it is possible to configure raid components on raw disk
2965 *devices.
2966 */
2967
2968 if (!rf_part_found) {
2969 char cname[sizeof(ac_list->devname)];
2970
2971 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
2972 if (bdevvp(dev, &vp))
2973 panic("RAID can't alloc vnode");
2974
2975 error = VOP_OPEN(vp, FREAD, NOCRED);
2976 if (error) {
2977 /* Whatever... */
2978 vput(vp);
2979 continue;
2980 }
2981 snprintf(cname, sizeof(cname), "%s%c",
2982 device_xname(dv), 'a' + RAW_PART);
2983 ac_list = rf_get_component(ac_list, dev, vp, cname,
2984 label.d_partitions[RAW_PART].p_size, numsecs, secsize);
2985 }
2986 }
2987 deviter_release(&di);
2988 }
2989 return ac_list;
2990 }
2991
2992
2993 int
2994 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
2995 {
2996
2997 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2998 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2999 ((clabel->clean == RF_RAID_CLEAN) ||
3000 (clabel->clean == RF_RAID_DIRTY)) &&
3001 clabel->row >=0 &&
3002 clabel->column >= 0 &&
3003 clabel->num_rows > 0 &&
3004 clabel->num_columns > 0 &&
3005 clabel->row < clabel->num_rows &&
3006 clabel->column < clabel->num_columns &&
3007 clabel->blockSize > 0 &&
3008 /*
3009 * numBlocksHi may contain garbage, but it is ok since
3010 * the type is unsigned. If it is really garbage,
3011 * rf_fix_old_label_size() will fix it.
3012 */
3013 rf_component_label_numblocks(clabel) > 0) {
3014 /*
3015 * label looks reasonable enough...
3016 * let's make sure it has no old garbage.
3017 */
3018 if (numsecs)
3019 rf_fix_old_label_size(clabel, numsecs);
3020 return(1);
3021 }
3022 return(0);
3023 }
3024
3025
3026 /*
3027 * For reasons yet unknown, some old component labels have garbage in
3028 * the newer numBlocksHi region, and this causes lossage. Since those
3029 * disks will also have numsecs set to less than 32 bits of sectors,
3030 * we can determine when this corruption has occurred, and fix it.
3031 *
3032 * The exact same problem, with the same unknown reason, happens to
3033 * the partitionSizeHi member as well.
3034 */
3035 static void
3036 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3037 {
3038
3039 if (numsecs < ((uint64_t)1 << 32)) {
3040 if (clabel->numBlocksHi) {
3041 printf("WARNING: total sectors < 32 bits, yet "
3042 "numBlocksHi set\n"
3043 "WARNING: resetting numBlocksHi to zero.\n");
3044 clabel->numBlocksHi = 0;
3045 }
3046
3047 if (clabel->partitionSizeHi) {
3048 printf("WARNING: total sectors < 32 bits, yet "
3049 "partitionSizeHi set\n"
3050 "WARNING: resetting partitionSizeHi to zero.\n");
3051 clabel->partitionSizeHi = 0;
3052 }
3053 }
3054 }
3055
3056
3057 #ifdef DEBUG
3058 void
3059 rf_print_component_label(RF_ComponentLabel_t *clabel)
3060 {
3061 uint64_t numBlocks;
3062 static const char *rp[] = {
3063 "No", "Force", "Soft", "*invalid*"
3064 };
3065
3066
3067 numBlocks = rf_component_label_numblocks(clabel);
3068
3069 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3070 clabel->row, clabel->column,
3071 clabel->num_rows, clabel->num_columns);
3072 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3073 clabel->version, clabel->serial_number,
3074 clabel->mod_counter);
3075 printf(" Clean: %s Status: %d\n",
3076 clabel->clean ? "Yes" : "No", clabel->status);
3077 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3078 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3079 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3080 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3081 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3082 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]);
3083 printf(" Last configured as: raid%d\n", clabel->last_unit);
3084 #if 0
3085 printf(" Config order: %d\n", clabel->config_order);
3086 #endif
3087
3088 }
3089 #endif
3090
3091 RF_ConfigSet_t *
3092 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3093 {
3094 RF_AutoConfig_t *ac;
3095 RF_ConfigSet_t *config_sets;
3096 RF_ConfigSet_t *cset;
3097 RF_AutoConfig_t *ac_next;
3098
3099
3100 config_sets = NULL;
3101
3102 /* Go through the AutoConfig list, and figure out which components
3103 belong to what sets. */
3104 ac = ac_list;
3105 while(ac!=NULL) {
3106 /* we're going to putz with ac->next, so save it here
3107 for use at the end of the loop */
3108 ac_next = ac->next;
3109
3110 if (config_sets == NULL) {
3111 /* will need at least this one... */
3112 config_sets = (RF_ConfigSet_t *)
3113 malloc(sizeof(RF_ConfigSet_t),
3114 M_RAIDFRAME, M_NOWAIT);
3115 if (config_sets == NULL) {
3116 panic("rf_create_auto_sets: No memory!");
3117 }
3118 /* this one is easy :) */
3119 config_sets->ac = ac;
3120 config_sets->next = NULL;
3121 config_sets->rootable = 0;
3122 ac->next = NULL;
3123 } else {
3124 /* which set does this component fit into? */
3125 cset = config_sets;
3126 while(cset!=NULL) {
3127 if (rf_does_it_fit(cset, ac)) {
3128 /* looks like it matches... */
3129 ac->next = cset->ac;
3130 cset->ac = ac;
3131 break;
3132 }
3133 cset = cset->next;
3134 }
3135 if (cset==NULL) {
3136 /* didn't find a match above... new set..*/
3137 cset = (RF_ConfigSet_t *)
3138 malloc(sizeof(RF_ConfigSet_t),
3139 M_RAIDFRAME, M_NOWAIT);
3140 if (cset == NULL) {
3141 panic("rf_create_auto_sets: No memory!");
3142 }
3143 cset->ac = ac;
3144 ac->next = NULL;
3145 cset->next = config_sets;
3146 cset->rootable = 0;
3147 config_sets = cset;
3148 }
3149 }
3150 ac = ac_next;
3151 }
3152
3153
3154 return(config_sets);
3155 }
3156
3157 static int
3158 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3159 {
3160 RF_ComponentLabel_t *clabel1, *clabel2;
3161
3162 /* If this one matches the *first* one in the set, that's good
3163 enough, since the other members of the set would have been
3164 through here too... */
3165 /* note that we are not checking partitionSize here..
3166
3167 Note that we are also not checking the mod_counters here.
3168 If everything else matches except the mod_counter, that's
3169 good enough for this test. We will deal with the mod_counters
3170 a little later in the autoconfiguration process.
3171
3172 (clabel1->mod_counter == clabel2->mod_counter) &&
3173
3174 The reason we don't check for this is that failed disks
3175 will have lower modification counts. If those disks are
3176 not added to the set they used to belong to, then they will
3177 form their own set, which may result in 2 different sets,
3178 for example, competing to be configured at raid0, and
3179 perhaps competing to be the root filesystem set. If the
3180 wrong ones get configured, or both attempt to become /,
3181 weird behaviour and or serious lossage will occur. Thus we
3182 need to bring them into the fold here, and kick them out at
3183 a later point.
3184
3185 */
3186
3187 clabel1 = cset->ac->clabel;
3188 clabel2 = ac->clabel;
3189 if ((clabel1->version == clabel2->version) &&
3190 (clabel1->serial_number == clabel2->serial_number) &&
3191 (clabel1->num_rows == clabel2->num_rows) &&
3192 (clabel1->num_columns == clabel2->num_columns) &&
3193 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3194 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3195 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3196 (clabel1->parityConfig == clabel2->parityConfig) &&
3197 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3198 (clabel1->blockSize == clabel2->blockSize) &&
3199 rf_component_label_numblocks(clabel1) ==
3200 rf_component_label_numblocks(clabel2) &&
3201 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3202 (clabel1->root_partition == clabel2->root_partition) &&
3203 (clabel1->last_unit == clabel2->last_unit) &&
3204 (clabel1->config_order == clabel2->config_order)) {
3205 /* if it get's here, it almost *has* to be a match */
3206 } else {
3207 /* it's not consistent with somebody in the set..
3208 punt */
3209 return(0);
3210 }
3211 /* all was fine.. it must fit... */
3212 return(1);
3213 }
3214
3215 int
3216 rf_have_enough_components(RF_ConfigSet_t *cset)
3217 {
3218 RF_AutoConfig_t *ac;
3219 RF_AutoConfig_t *auto_config;
3220 RF_ComponentLabel_t *clabel;
3221 int c;
3222 int num_cols;
3223 int num_missing;
3224 int mod_counter;
3225 int mod_counter_found;
3226 int even_pair_failed;
3227 char parity_type;
3228
3229
3230 /* check to see that we have enough 'live' components
3231 of this set. If so, we can configure it if necessary */
3232
3233 num_cols = cset->ac->clabel->num_columns;
3234 parity_type = cset->ac->clabel->parityConfig;
3235
3236 /* XXX Check for duplicate components!?!?!? */
3237
3238 /* Determine what the mod_counter is supposed to be for this set. */
3239
3240 mod_counter_found = 0;
3241 mod_counter = 0;
3242 ac = cset->ac;
3243 while(ac!=NULL) {
3244 if (mod_counter_found==0) {
3245 mod_counter = ac->clabel->mod_counter;
3246 mod_counter_found = 1;
3247 } else {
3248 if (ac->clabel->mod_counter > mod_counter) {
3249 mod_counter = ac->clabel->mod_counter;
3250 }
3251 }
3252 ac = ac->next;
3253 }
3254
3255 num_missing = 0;
3256 auto_config = cset->ac;
3257
3258 even_pair_failed = 0;
3259 for(c=0; c<num_cols; c++) {
3260 ac = auto_config;
3261 while(ac!=NULL) {
3262 if ((ac->clabel->column == c) &&
3263 (ac->clabel->mod_counter == mod_counter)) {
3264 /* it's this one... */
3265 #ifdef DEBUG
3266 printf("Found: %s at %d\n",
3267 ac->devname,c);
3268 #endif
3269 break;
3270 }
3271 ac=ac->next;
3272 }
3273 if (ac==NULL) {
3274 /* Didn't find one here! */
3275 /* special case for RAID 1, especially
3276 where there are more than 2
3277 components (where RAIDframe treats
3278 things a little differently :( ) */
3279 if (parity_type == '1') {
3280 if (c%2 == 0) { /* even component */
3281 even_pair_failed = 1;
3282 } else { /* odd component. If
3283 we're failed, and
3284 so is the even
3285 component, it's
3286 "Good Night, Charlie" */
3287 if (even_pair_failed == 1) {
3288 return(0);
3289 }
3290 }
3291 } else {
3292 /* normal accounting */
3293 num_missing++;
3294 }
3295 }
3296 if ((parity_type == '1') && (c%2 == 1)) {
3297 /* Just did an even component, and we didn't
3298 bail.. reset the even_pair_failed flag,
3299 and go on to the next component.... */
3300 even_pair_failed = 0;
3301 }
3302 }
3303
3304 clabel = cset->ac->clabel;
3305
3306 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3307 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3308 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3309 /* XXX this needs to be made *much* more general */
3310 /* Too many failures */
3311 return(0);
3312 }
3313 /* otherwise, all is well, and we've got enough to take a kick
3314 at autoconfiguring this set */
3315 return(1);
3316 }
3317
3318 void
3319 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3320 RF_Raid_t *raidPtr)
3321 {
3322 RF_ComponentLabel_t *clabel;
3323 int i;
3324
3325 clabel = ac->clabel;
3326
3327 /* 1. Fill in the common stuff */
3328 config->numCol = clabel->num_columns;
3329 config->numSpare = 0; /* XXX should this be set here? */
3330 config->sectPerSU = clabel->sectPerSU;
3331 config->SUsPerPU = clabel->SUsPerPU;
3332 config->SUsPerRU = clabel->SUsPerRU;
3333 config->parityConfig = clabel->parityConfig;
3334 /* XXX... */
3335 strcpy(config->diskQueueType,"fifo");
3336 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3337 config->layoutSpecificSize = 0; /* XXX ?? */
3338
3339 while(ac!=NULL) {
3340 /* row/col values will be in range due to the checks
3341 in reasonable_label() */
3342 strcpy(config->devnames[0][ac->clabel->column],
3343 ac->devname);
3344 ac = ac->next;
3345 }
3346
3347 for(i=0;i<RF_MAXDBGV;i++) {
3348 config->debugVars[i][0] = 0;
3349 }
3350 }
3351
3352 int
3353 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3354 {
3355 RF_ComponentLabel_t *clabel;
3356 int column;
3357 int sparecol;
3358
3359 raidPtr->autoconfigure = new_value;
3360
3361 for(column=0; column<raidPtr->numCol; column++) {
3362 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3363 clabel = raidget_component_label(raidPtr, column);
3364 clabel->autoconfigure = new_value;
3365 raidflush_component_label(raidPtr, column);
3366 }
3367 }
3368 for(column = 0; column < raidPtr->numSpare ; column++) {
3369 sparecol = raidPtr->numCol + column;
3370 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3371 clabel = raidget_component_label(raidPtr, sparecol);
3372 clabel->autoconfigure = new_value;
3373 raidflush_component_label(raidPtr, sparecol);
3374 }
3375 }
3376 return(new_value);
3377 }
3378
3379 int
3380 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3381 {
3382 RF_ComponentLabel_t *clabel;
3383 int column;
3384 int sparecol;
3385
3386 raidPtr->root_partition = new_value;
3387 for(column=0; column<raidPtr->numCol; column++) {
3388 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3389 clabel = raidget_component_label(raidPtr, column);
3390 clabel->root_partition = new_value;
3391 raidflush_component_label(raidPtr, column);
3392 }
3393 }
3394 for(column = 0; column < raidPtr->numSpare ; column++) {
3395 sparecol = raidPtr->numCol + column;
3396 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3397 clabel = raidget_component_label(raidPtr, sparecol);
3398 clabel->root_partition = new_value;
3399 raidflush_component_label(raidPtr, sparecol);
3400 }
3401 }
3402 return(new_value);
3403 }
3404
3405 void
3406 rf_release_all_vps(RF_ConfigSet_t *cset)
3407 {
3408 RF_AutoConfig_t *ac;
3409
3410 ac = cset->ac;
3411 while(ac!=NULL) {
3412 /* Close the vp, and give it back */
3413 if (ac->vp) {
3414 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3415 VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
3416 vput(ac->vp);
3417 ac->vp = NULL;
3418 }
3419 ac = ac->next;
3420 }
3421 }
3422
3423
3424 void
3425 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3426 {
3427 RF_AutoConfig_t *ac;
3428 RF_AutoConfig_t *next_ac;
3429
3430 ac = cset->ac;
3431 while(ac!=NULL) {
3432 next_ac = ac->next;
3433 /* nuke the label */
3434 free(ac->clabel, M_RAIDFRAME);
3435 /* cleanup the config structure */
3436 free(ac, M_RAIDFRAME);
3437 /* "next.." */
3438 ac = next_ac;
3439 }
3440 /* and, finally, nuke the config set */
3441 free(cset, M_RAIDFRAME);
3442 }
3443
3444
3445 void
3446 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3447 {
3448 /* current version number */
3449 clabel->version = RF_COMPONENT_LABEL_VERSION;
3450 clabel->serial_number = raidPtr->serial_number;
3451 clabel->mod_counter = raidPtr->mod_counter;
3452
3453 clabel->num_rows = 1;
3454 clabel->num_columns = raidPtr->numCol;
3455 clabel->clean = RF_RAID_DIRTY; /* not clean */
3456 clabel->status = rf_ds_optimal; /* "It's good!" */
3457
3458 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3459 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3460 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3461
3462 clabel->blockSize = raidPtr->bytesPerSector;
3463 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3464
3465 /* XXX not portable */
3466 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3467 clabel->maxOutstanding = raidPtr->maxOutstanding;
3468 clabel->autoconfigure = raidPtr->autoconfigure;
3469 clabel->root_partition = raidPtr->root_partition;
3470 clabel->last_unit = raidPtr->raidid;
3471 clabel->config_order = raidPtr->config_order;
3472
3473 #ifndef RF_NO_PARITY_MAP
3474 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3475 #endif
3476 }
3477
3478 struct raid_softc *
3479 rf_auto_config_set(RF_ConfigSet_t *cset)
3480 {
3481 RF_Raid_t *raidPtr;
3482 RF_Config_t *config;
3483 int raidID;
3484 struct raid_softc *sc;
3485
3486 #ifdef DEBUG
3487 printf("RAID autoconfigure\n");
3488 #endif
3489
3490 /* 1. Create a config structure */
3491 config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO);
3492 if (config == NULL) {
3493 printf("%s: Out of mem - config!?!?\n", __func__);
3494 /* XXX do something more intelligent here. */
3495 return NULL;
3496 }
3497
3498 /*
3499 2. Figure out what RAID ID this one is supposed to live at
3500 See if we can get the same RAID dev that it was configured
3501 on last time..
3502 */
3503
3504 raidID = cset->ac->clabel->last_unit;
3505 for (sc = raidget(raidID, false); sc && sc->sc_r.valid != 0;
3506 sc = raidget(++raidID, false))
3507 continue;
3508 #ifdef DEBUG
3509 printf("Configuring raid%d:\n",raidID);
3510 #endif
3511
3512 if (sc == NULL)
3513 sc = raidget(raidID, true);
3514 if (sc == NULL) {
3515 printf("%s: Out of mem - softc!?!?\n", __func__);
3516 /* XXX do something more intelligent here. */
3517 free(config, M_RAIDFRAME);
3518 return NULL;
3519 }
3520
3521 raidPtr = &sc->sc_r;
3522
3523 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3524 raidPtr->softc = sc;
3525 raidPtr->raidid = raidID;
3526 raidPtr->openings = RAIDOUTSTANDING;
3527
3528 /* 3. Build the configuration structure */
3529 rf_create_configuration(cset->ac, config, raidPtr);
3530
3531 /* 4. Do the configuration */
3532 if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3533 raidinit(sc);
3534
3535 rf_markalldirty(raidPtr);
3536 raidPtr->autoconfigure = 1; /* XXX do this here? */
3537 switch (cset->ac->clabel->root_partition) {
3538 case 1: /* Force Root */
3539 case 2: /* Soft Root: root when boot partition part of raid */
3540 /*
3541 * everything configured just fine. Make a note
3542 * that this set is eligible to be root,
3543 * or forced to be root
3544 */
3545 cset->rootable = cset->ac->clabel->root_partition;
3546 /* XXX do this here? */
3547 raidPtr->root_partition = cset->rootable;
3548 break;
3549 default:
3550 break;
3551 }
3552 } else {
3553 raidput(sc);
3554 sc = NULL;
3555 }
3556
3557 /* 5. Cleanup */
3558 free(config, M_RAIDFRAME);
3559 return sc;
3560 }
3561
3562 void
3563 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3564 size_t xmin, size_t xmax)
3565 {
3566 int error;
3567
3568 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3569 pool_sethiwat(p, xmax);
3570 if ((error = pool_prime(p, xmin)) != 0)
3571 panic("%s: failed to prime pool: %d", __func__, error);
3572 pool_setlowat(p, xmin);
3573 }
3574
3575 /*
3576 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buffer queue
3577 * to see if there is IO pending and if that IO could possibly be done
3578 * for a given RAID set. Returns 0 if IO is waiting and can be done, 1
3579 * otherwise.
3580 *
3581 */
3582 int
3583 rf_buf_queue_check(RF_Raid_t *raidPtr)
3584 {
3585 struct raid_softc *rs;
3586 struct dk_softc *dksc;
3587
3588 rs = raidPtr->softc;
3589 dksc = &rs->sc_dksc;
3590
3591 if ((rs->sc_flags & RAIDF_INITED) == 0)
3592 return 1;
3593
3594 if (dk_strategy_pending(dksc) && raidPtr->openings > 0) {
3595 /* there is work to do */
3596 return 0;
3597 }
3598 /* default is nothing to do */
3599 return 1;
3600 }
3601
3602 int
3603 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3604 {
3605 uint64_t numsecs;
3606 unsigned secsize;
3607 int error;
3608
3609 error = getdisksize(vp, &numsecs, &secsize);
3610 if (error == 0) {
3611 diskPtr->blockSize = secsize;
3612 diskPtr->numBlocks = numsecs - rf_protectedSectors;
3613 diskPtr->partitionSize = numsecs;
3614 return 0;
3615 }
3616 return error;
3617 }
3618
3619 static int
3620 raid_match(device_t self, cfdata_t cfdata, void *aux)
3621 {
3622 return 1;
3623 }
3624
3625 static void
3626 raid_attach(device_t parent, device_t self, void *aux)
3627 {
3628 }
3629
3630
3631 static int
3632 raid_detach(device_t self, int flags)
3633 {
3634 int error;
3635 struct raid_softc *rs = raidsoftc(self);
3636
3637 if (rs == NULL)
3638 return ENXIO;
3639
3640 if ((error = raidlock(rs)) != 0)
3641 return (error);
3642
3643 error = raid_detach_unlocked(rs);
3644
3645 raidunlock(rs);
3646
3647 /* XXX raid can be referenced here */
3648
3649 if (error)
3650 return error;
3651
3652 /* Free the softc */
3653 raidput(rs);
3654
3655 return 0;
3656 }
3657
3658 static void
3659 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
3660 {
3661 struct dk_softc *dksc = &rs->sc_dksc;
3662 struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
3663
3664 memset(dg, 0, sizeof(*dg));
3665
3666 dg->dg_secperunit = raidPtr->totalSectors;
3667 dg->dg_secsize = raidPtr->bytesPerSector;
3668 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
3669 dg->dg_ntracks = 4 * raidPtr->numCol;
3670
3671 disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, NULL);
3672 }
3673
3674 /*
3675 * Get cache info for all the components (including spares).
3676 * Returns intersection of all the cache flags of all disks, or first
3677 * error if any encountered.
3678 * XXXfua feature flags can change as spares are added - lock down somehow
3679 */
3680 static int
3681 rf_get_component_caches(RF_Raid_t *raidPtr, int *data)
3682 {
3683 int c;
3684 int error;
3685 int dkwhole = 0, dkpart;
3686
3687 for (c = 0; c < raidPtr->numCol + raidPtr->numSpare; c++) {
3688 /*
3689 * Check any non-dead disk, even when currently being
3690 * reconstructed.
3691 */
3692 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)
3693 || raidPtr->Disks[c].status == rf_ds_reconstructing) {
3694 error = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp,
3695 DIOCGCACHE, &dkpart, FREAD, NOCRED);
3696 if (error) {
3697 if (error != ENODEV) {
3698 printf("raid%d: get cache for component %s failed\n",
3699 raidPtr->raidid,
3700 raidPtr->Disks[c].devname);
3701 }
3702
3703 return error;
3704 }
3705
3706 if (c == 0)
3707 dkwhole = dkpart;
3708 else
3709 dkwhole = DKCACHE_COMBINE(dkwhole, dkpart);
3710 }
3711 }
3712
3713 *data = dkwhole;
3714
3715 return 0;
3716 }
3717
3718 /*
3719 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3720 * We end up returning whatever error was returned by the first cache flush
3721 * that fails.
3722 */
3723
3724 int
3725 rf_sync_component_caches(RF_Raid_t *raidPtr)
3726 {
3727 int c, sparecol;
3728 int e,error;
3729 int force = 1;
3730
3731 error = 0;
3732 for (c = 0; c < raidPtr->numCol; c++) {
3733 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3734 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3735 &force, FWRITE, NOCRED);
3736 if (e) {
3737 if (e != ENODEV)
3738 printf("raid%d: cache flush to component %s failed.\n",
3739 raidPtr->raidid, raidPtr->Disks[c].devname);
3740 if (error == 0) {
3741 error = e;
3742 }
3743 }
3744 }
3745 }
3746
3747 for( c = 0; c < raidPtr->numSpare ; c++) {
3748 sparecol = raidPtr->numCol + c;
3749 /* Need to ensure that the reconstruct actually completed! */
3750 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3751 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3752 DIOCCACHESYNC, &force, FWRITE, NOCRED);
3753 if (e) {
3754 if (e != ENODEV)
3755 printf("raid%d: cache flush to component %s failed.\n",
3756 raidPtr->raidid, raidPtr->Disks[sparecol].devname);
3757 if (error == 0) {
3758 error = e;
3759 }
3760 }
3761 }
3762 }
3763 return error;
3764 }
3765
3766 /* Fill in info with the current status */
3767 void
3768 rf_check_recon_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3769 {
3770
3771 if (raidPtr->status != rf_rs_reconstructing) {
3772 info->total = 100;
3773 info->completed = 100;
3774 } else {
3775 info->total = raidPtr->reconControl->numRUsTotal;
3776 info->completed = raidPtr->reconControl->numRUsComplete;
3777 }
3778 info->remaining = info->total - info->completed;
3779 }
3780
3781 /* Fill in info with the current status */
3782 void
3783 rf_check_parityrewrite_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3784 {
3785
3786 if (raidPtr->parity_rewrite_in_progress == 1) {
3787 info->total = raidPtr->Layout.numStripe;
3788 info->completed = raidPtr->parity_rewrite_stripes_done;
3789 } else {
3790 info->completed = 100;
3791 info->total = 100;
3792 }
3793 info->remaining = info->total - info->completed;
3794 }
3795
3796 /* Fill in info with the current status */
3797 void
3798 rf_check_copyback_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3799 {
3800
3801 if (raidPtr->copyback_in_progress == 1) {
3802 info->total = raidPtr->Layout.numStripe;
3803 info->completed = raidPtr->copyback_stripes_done;
3804 info->remaining = info->total - info->completed;
3805 } else {
3806 info->remaining = 0;
3807 info->completed = 100;
3808 info->total = 100;
3809 }
3810 }
3811
3812 /* Fill in config with the current info */
3813 int
3814 rf_get_info(RF_Raid_t *raidPtr, RF_DeviceConfig_t *config)
3815 {
3816 int d, i, j;
3817
3818 if (!raidPtr->valid)
3819 return (ENODEV);
3820 config->cols = raidPtr->numCol;
3821 config->ndevs = raidPtr->numCol;
3822 if (config->ndevs >= RF_MAX_DISKS)
3823 return (ENOMEM);
3824 config->nspares = raidPtr->numSpare;
3825 if (config->nspares >= RF_MAX_DISKS)
3826 return (ENOMEM);
3827 config->maxqdepth = raidPtr->maxQueueDepth;
3828 d = 0;
3829 for (j = 0; j < config->cols; j++) {
3830 config->devs[d] = raidPtr->Disks[j];
3831 d++;
3832 }
3833 for (j = config->cols, i = 0; i < config->nspares; i++, j++) {
3834 config->spares[i] = raidPtr->Disks[j];
3835 if (config->spares[i].status == rf_ds_rebuilding_spare) {
3836 /* XXX: raidctl(8) expects to see this as a used spare */
3837 config->spares[i].status = rf_ds_used_spare;
3838 }
3839 }
3840 return 0;
3841 }
3842
3843 int
3844 rf_get_component_label(RF_Raid_t *raidPtr, void *data)
3845 {
3846 RF_ComponentLabel_t *clabel = (RF_ComponentLabel_t *)data;
3847 RF_ComponentLabel_t *raid_clabel;
3848 int column = clabel->column;
3849
3850 if ((column < 0) || (column >= raidPtr->numCol + raidPtr->numSpare))
3851 return EINVAL;
3852 raid_clabel = raidget_component_label(raidPtr, column);
3853 memcpy(clabel, raid_clabel, sizeof *clabel);
3854
3855 return 0;
3856 }
3857
3858 /*
3859 * Module interface
3860 */
3861
3862 MODULE(MODULE_CLASS_DRIVER, raid, "dk_subr,bufq_fcfs");
3863
3864 #ifdef _MODULE
3865 CFDRIVER_DECL(raid, DV_DISK, NULL);
3866 #endif
3867
3868 static int raid_modcmd(modcmd_t, void *);
3869 static int raid_modcmd_init(void);
3870 static int raid_modcmd_fini(void);
3871
3872 static int
3873 raid_modcmd(modcmd_t cmd, void *data)
3874 {
3875 int error;
3876
3877 error = 0;
3878 switch (cmd) {
3879 case MODULE_CMD_INIT:
3880 error = raid_modcmd_init();
3881 break;
3882 case MODULE_CMD_FINI:
3883 error = raid_modcmd_fini();
3884 break;
3885 default:
3886 error = ENOTTY;
3887 break;
3888 }
3889 return error;
3890 }
3891
3892 static int
3893 raid_modcmd_init(void)
3894 {
3895 int error;
3896 int bmajor, cmajor;
3897
3898 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
3899 mutex_enter(&raid_lock);
3900 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
3901 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
3902 rf_init_cond2(rf_sparet_wait_cv, "sparetw");
3903 rf_init_cond2(rf_sparet_resp_cv, "rfgst");
3904
3905 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
3906 #endif
3907
3908 bmajor = cmajor = -1;
3909 error = devsw_attach("raid", &raid_bdevsw, &bmajor,
3910 &raid_cdevsw, &cmajor);
3911 if (error != 0 && error != EEXIST) {
3912 aprint_error("%s: devsw_attach failed %d\n", __func__, error);
3913 mutex_exit(&raid_lock);
3914 return error;
3915 }
3916 #ifdef _MODULE
3917 error = config_cfdriver_attach(&raid_cd);
3918 if (error != 0) {
3919 aprint_error("%s: config_cfdriver_attach failed %d\n",
3920 __func__, error);
3921 devsw_detach(&raid_bdevsw, &raid_cdevsw);
3922 mutex_exit(&raid_lock);
3923 return error;
3924 }
3925 #endif
3926 error = config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3927 if (error != 0) {
3928 aprint_error("%s: config_cfattach_attach failed %d\n",
3929 __func__, error);
3930 #ifdef _MODULE
3931 config_cfdriver_detach(&raid_cd);
3932 #endif
3933 devsw_detach(&raid_bdevsw, &raid_cdevsw);
3934 mutex_exit(&raid_lock);
3935 return error;
3936 }
3937
3938 raidautoconfigdone = false;
3939
3940 mutex_exit(&raid_lock);
3941
3942 if (error == 0) {
3943 if (rf_BootRaidframe(true) == 0)
3944 aprint_verbose("Kernelized RAIDframe activated\n");
3945 else
3946 panic("Serious error activating RAID!!");
3947 }
3948
3949 /*
3950 * Register a finalizer which will be used to auto-config RAID
3951 * sets once all real hardware devices have been found.
3952 */
3953 error = config_finalize_register(NULL, rf_autoconfig);
3954 if (error != 0) {
3955 aprint_error("WARNING: unable to register RAIDframe "
3956 "finalizer\n");
3957 error = 0;
3958 }
3959
3960 return error;
3961 }
3962
3963 static int
3964 raid_modcmd_fini(void)
3965 {
3966 int error;
3967
3968 mutex_enter(&raid_lock);
3969
3970 /* Don't allow unload if raid device(s) exist. */
3971 if (!LIST_EMPTY(&raids)) {
3972 mutex_exit(&raid_lock);
3973 return EBUSY;
3974 }
3975
3976 error = config_cfattach_detach(raid_cd.cd_name, &raid_ca);
3977 if (error != 0) {
3978 aprint_error("%s: cannot detach cfattach\n",__func__);
3979 mutex_exit(&raid_lock);
3980 return error;
3981 }
3982 #ifdef _MODULE
3983 error = config_cfdriver_detach(&raid_cd);
3984 if (error != 0) {
3985 aprint_error("%s: cannot detach cfdriver\n",__func__);
3986 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3987 mutex_exit(&raid_lock);
3988 return error;
3989 }
3990 #endif
3991 error = devsw_detach(&raid_bdevsw, &raid_cdevsw);
3992 if (error != 0) {
3993 aprint_error("%s: cannot detach devsw\n",__func__);
3994 #ifdef _MODULE
3995 config_cfdriver_attach(&raid_cd);
3996 #endif
3997 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3998 mutex_exit(&raid_lock);
3999 return error;
4000 }
4001 rf_BootRaidframe(false);
4002 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
4003 rf_destroy_mutex2(rf_sparet_wait_mutex);
4004 rf_destroy_cond2(rf_sparet_wait_cv);
4005 rf_destroy_cond2(rf_sparet_resp_cv);
4006 #endif
4007 mutex_exit(&raid_lock);
4008 mutex_destroy(&raid_lock);
4009
4010 return error;
4011 }
4012