rf_netbsdkintf.c revision 1.364 1 /* $NetBSD: rf_netbsdkintf.c,v 1.364 2019/02/05 09:28:00 mrg Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: cd.c 1.6 90/11/28$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
89 * School of Computer Science
90 * Carnegie Mellon University
91 * Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97 /***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.364 2019/02/05 09:28:00 mrg Exp $");
105
106 #ifdef _KERNEL_OPT
107 #include "opt_raid_autoconfig.h"
108 #include "opt_compat_netbsd32.h"
109 #endif
110
111 #include <sys/param.h>
112 #include <sys/errno.h>
113 #include <sys/pool.h>
114 #include <sys/proc.h>
115 #include <sys/queue.h>
116 #include <sys/disk.h>
117 #include <sys/device.h>
118 #include <sys/stat.h>
119 #include <sys/ioctl.h>
120 #include <sys/fcntl.h>
121 #include <sys/systm.h>
122 #include <sys/vnode.h>
123 #include <sys/disklabel.h>
124 #include <sys/conf.h>
125 #include <sys/buf.h>
126 #include <sys/bufq.h>
127 #include <sys/reboot.h>
128 #include <sys/kauth.h>
129 #include <sys/module.h>
130 #include <sys/compat_stub.h>
131
132 #include <prop/proplib.h>
133
134 #include <dev/raidframe/raidframevar.h>
135 #include <dev/raidframe/raidframeio.h>
136 #include <dev/raidframe/rf_paritymap.h>
137
138 #include "rf_raid.h"
139 #include "rf_copyback.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_desc.h"
143 #include "rf_diskqueue.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_kintf.h"
147 #include "rf_options.h"
148 #include "rf_driver.h"
149 #include "rf_parityscan.h"
150 #include "rf_threadstuff.h"
151
152 #include "rf_compat80.h"
153
154 #ifdef COMPAT_NETBSD32
155 #ifdef _LP64
156 #include "rf_compat32.h"
157 #define RAID_COMPAT32
158 #define RAID_COMPAT32
159 #endif
160 #endif
161
162 #include "ioconf.h"
163
164 #ifdef DEBUG
165 int rf_kdebug_level = 0;
166 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
167 #else /* DEBUG */
168 #define db1_printf(a) { }
169 #endif /* DEBUG */
170
171 #ifdef DEBUG_ROOT
172 #define DPRINTF(a, ...) printf(a, __VA_ARGS__)
173 #else
174 #define DPRINTF(a, ...)
175 #endif
176
177 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
178 static rf_declare_mutex2(rf_sparet_wait_mutex);
179 static rf_declare_cond2(rf_sparet_wait_cv);
180 static rf_declare_cond2(rf_sparet_resp_cv);
181
182 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
183 * spare table */
184 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
185 * installation process */
186 #endif
187
188 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
189
190 /* prototypes */
191 static void KernelWakeupFunc(struct buf *);
192 static void InitBP(struct buf *, struct vnode *, unsigned,
193 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
194 void *, int, struct proc *);
195 struct raid_softc;
196 static void raidinit(struct raid_softc *);
197 static int raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp);
198 static int rf_get_component_caches(RF_Raid_t *raidPtr, int *);
199
200 static int raid_match(device_t, cfdata_t, void *);
201 static void raid_attach(device_t, device_t, void *);
202 static int raid_detach(device_t, int);
203
204 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
205 daddr_t, daddr_t);
206 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
207 daddr_t, daddr_t, int);
208
209 static int raidwrite_component_label(unsigned,
210 dev_t, struct vnode *, RF_ComponentLabel_t *);
211 static int raidread_component_label(unsigned,
212 dev_t, struct vnode *, RF_ComponentLabel_t *);
213
214 static int raid_diskstart(device_t, struct buf *bp);
215 static int raid_dumpblocks(device_t, void *, daddr_t, int);
216 static int raid_lastclose(device_t);
217
218 static dev_type_open(raidopen);
219 static dev_type_close(raidclose);
220 static dev_type_read(raidread);
221 static dev_type_write(raidwrite);
222 static dev_type_ioctl(raidioctl);
223 static dev_type_strategy(raidstrategy);
224 static dev_type_dump(raiddump);
225 static dev_type_size(raidsize);
226
227 const struct bdevsw raid_bdevsw = {
228 .d_open = raidopen,
229 .d_close = raidclose,
230 .d_strategy = raidstrategy,
231 .d_ioctl = raidioctl,
232 .d_dump = raiddump,
233 .d_psize = raidsize,
234 .d_discard = nodiscard,
235 .d_flag = D_DISK
236 };
237
238 const struct cdevsw raid_cdevsw = {
239 .d_open = raidopen,
240 .d_close = raidclose,
241 .d_read = raidread,
242 .d_write = raidwrite,
243 .d_ioctl = raidioctl,
244 .d_stop = nostop,
245 .d_tty = notty,
246 .d_poll = nopoll,
247 .d_mmap = nommap,
248 .d_kqfilter = nokqfilter,
249 .d_discard = nodiscard,
250 .d_flag = D_DISK
251 };
252
253 static struct dkdriver rf_dkdriver = {
254 .d_open = raidopen,
255 .d_close = raidclose,
256 .d_strategy = raidstrategy,
257 .d_diskstart = raid_diskstart,
258 .d_dumpblocks = raid_dumpblocks,
259 .d_lastclose = raid_lastclose,
260 .d_minphys = minphys
261 };
262
263 struct raid_softc {
264 struct dk_softc sc_dksc;
265 int sc_unit;
266 int sc_flags; /* flags */
267 int sc_cflags; /* configuration flags */
268 kmutex_t sc_mutex; /* interlock mutex */
269 kcondvar_t sc_cv; /* and the condvar */
270 uint64_t sc_size; /* size of the raid device */
271 char sc_xname[20]; /* XXX external name */
272 RF_Raid_t sc_r;
273 LIST_ENTRY(raid_softc) sc_link;
274 };
275 /* sc_flags */
276 #define RAIDF_INITED 0x01 /* unit has been initialized */
277 #define RAIDF_SHUTDOWN 0x02 /* unit is being shutdown */
278 #define RAIDF_DETACH 0x04 /* detach after final close */
279 #define RAIDF_WANTED 0x08 /* someone waiting to obtain a lock */
280 #define RAIDF_LOCKED 0x10 /* unit is locked */
281 #define RAIDF_UNIT_CHANGED 0x20 /* unit is being changed */
282
283 #define raidunit(x) DISKUNIT(x)
284 #define raidsoftc(dev) (((struct raid_softc *)device_private(dev))->sc_r.softc)
285
286 extern struct cfdriver raid_cd;
287 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
288 raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
289 DVF_DETACH_SHUTDOWN);
290
291 /* Internal representation of a rf_recon_req */
292 struct rf_recon_req_internal {
293 RF_RowCol_t col;
294 RF_ReconReqFlags_t flags;
295 void *raidPtr;
296 };
297
298 /*
299 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
300 * Be aware that large numbers can allow the driver to consume a lot of
301 * kernel memory, especially on writes, and in degraded mode reads.
302 *
303 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
304 * a single 64K write will typically require 64K for the old data,
305 * 64K for the old parity, and 64K for the new parity, for a total
306 * of 192K (if the parity buffer is not re-used immediately).
307 * Even it if is used immediately, that's still 128K, which when multiplied
308 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
309 *
310 * Now in degraded mode, for example, a 64K read on the above setup may
311 * require data reconstruction, which will require *all* of the 4 remaining
312 * disks to participate -- 4 * 32K/disk == 128K again.
313 */
314
315 #ifndef RAIDOUTSTANDING
316 #define RAIDOUTSTANDING 6
317 #endif
318
319 #define RAIDLABELDEV(dev) \
320 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
321
322 /* declared here, and made public, for the benefit of KVM stuff.. */
323
324 static int raidlock(struct raid_softc *);
325 static void raidunlock(struct raid_softc *);
326
327 static int raid_detach_unlocked(struct raid_softc *);
328
329 static void rf_markalldirty(RF_Raid_t *);
330 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
331
332 void rf_ReconThread(struct rf_recon_req_internal *);
333 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
334 void rf_CopybackThread(RF_Raid_t *raidPtr);
335 void rf_ReconstructInPlaceThread(struct rf_recon_req_internal *);
336 int rf_autoconfig(device_t);
337 void rf_buildroothack(RF_ConfigSet_t *);
338
339 RF_AutoConfig_t *rf_find_raid_components(void);
340 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
341 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
342 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
343 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
344 int rf_set_autoconfig(RF_Raid_t *, int);
345 int rf_set_rootpartition(RF_Raid_t *, int);
346 void rf_release_all_vps(RF_ConfigSet_t *);
347 void rf_cleanup_config_set(RF_ConfigSet_t *);
348 int rf_have_enough_components(RF_ConfigSet_t *);
349 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
350 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
351
352 /*
353 * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
354 * Note that this is overridden by having RAID_AUTOCONFIG as an option
355 * in the kernel config file.
356 */
357 #ifdef RAID_AUTOCONFIG
358 int raidautoconfig = 1;
359 #else
360 int raidautoconfig = 0;
361 #endif
362 static bool raidautoconfigdone = false;
363
364 struct RF_Pools_s rf_pools;
365
366 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
367 static kmutex_t raid_lock;
368
369 static struct raid_softc *
370 raidcreate(int unit) {
371 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
372 sc->sc_unit = unit;
373 cv_init(&sc->sc_cv, "raidunit");
374 mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_NONE);
375 return sc;
376 }
377
378 static void
379 raiddestroy(struct raid_softc *sc) {
380 cv_destroy(&sc->sc_cv);
381 mutex_destroy(&sc->sc_mutex);
382 kmem_free(sc, sizeof(*sc));
383 }
384
385 static struct raid_softc *
386 raidget(int unit, bool create) {
387 struct raid_softc *sc;
388 if (unit < 0) {
389 #ifdef DIAGNOSTIC
390 panic("%s: unit %d!", __func__, unit);
391 #endif
392 return NULL;
393 }
394 mutex_enter(&raid_lock);
395 LIST_FOREACH(sc, &raids, sc_link) {
396 if (sc->sc_unit == unit) {
397 mutex_exit(&raid_lock);
398 return sc;
399 }
400 }
401 mutex_exit(&raid_lock);
402 if (!create)
403 return NULL;
404 if ((sc = raidcreate(unit)) == NULL)
405 return NULL;
406 mutex_enter(&raid_lock);
407 LIST_INSERT_HEAD(&raids, sc, sc_link);
408 mutex_exit(&raid_lock);
409 return sc;
410 }
411
412 static void
413 raidput(struct raid_softc *sc) {
414 mutex_enter(&raid_lock);
415 LIST_REMOVE(sc, sc_link);
416 mutex_exit(&raid_lock);
417 raiddestroy(sc);
418 }
419
420 void
421 raidattach(int num)
422 {
423
424 /*
425 * Device attachment and associated initialization now occurs
426 * as part of the module initialization.
427 */
428 }
429
430 int
431 rf_autoconfig(device_t self)
432 {
433 RF_AutoConfig_t *ac_list;
434 RF_ConfigSet_t *config_sets;
435
436 if (!raidautoconfig || raidautoconfigdone == true)
437 return (0);
438
439 /* XXX This code can only be run once. */
440 raidautoconfigdone = true;
441
442 #ifdef __HAVE_CPU_BOOTCONF
443 /*
444 * 0. find the boot device if needed first so we can use it later
445 * this needs to be done before we autoconfigure any raid sets,
446 * because if we use wedges we are not going to be able to open
447 * the boot device later
448 */
449 if (booted_device == NULL)
450 cpu_bootconf();
451 #endif
452 /* 1. locate all RAID components on the system */
453 aprint_debug("Searching for RAID components...\n");
454 ac_list = rf_find_raid_components();
455
456 /* 2. Sort them into their respective sets. */
457 config_sets = rf_create_auto_sets(ac_list);
458
459 /*
460 * 3. Evaluate each set and configure the valid ones.
461 * This gets done in rf_buildroothack().
462 */
463 rf_buildroothack(config_sets);
464
465 return 1;
466 }
467
468 static int
469 rf_containsboot(RF_Raid_t *r, device_t bdv) {
470 const char *bootname;
471 size_t len;
472
473 /* if bdv is NULL, the set can't contain it. exit early. */
474 if (bdv == NULL)
475 return 0;
476
477 bootname = device_xname(bdv);
478 len = strlen(bootname);
479
480 for (int col = 0; col < r->numCol; col++) {
481 const char *devname = r->Disks[col].devname;
482 devname += sizeof("/dev/") - 1;
483 if (strncmp(devname, "dk", 2) == 0) {
484 const char *parent =
485 dkwedge_get_parent_name(r->Disks[col].dev);
486 if (parent != NULL)
487 devname = parent;
488 }
489 if (strncmp(devname, bootname, len) == 0) {
490 struct raid_softc *sc = r->softc;
491 aprint_debug("raid%d includes boot device %s\n",
492 sc->sc_unit, devname);
493 return 1;
494 }
495 }
496 return 0;
497 }
498
499 void
500 rf_buildroothack(RF_ConfigSet_t *config_sets)
501 {
502 RF_ConfigSet_t *cset;
503 RF_ConfigSet_t *next_cset;
504 int num_root;
505 struct raid_softc *sc, *rsc;
506 struct dk_softc *dksc;
507
508 sc = rsc = NULL;
509 num_root = 0;
510 cset = config_sets;
511 while (cset != NULL) {
512 next_cset = cset->next;
513 if (rf_have_enough_components(cset) &&
514 cset->ac->clabel->autoconfigure == 1) {
515 sc = rf_auto_config_set(cset);
516 if (sc != NULL) {
517 aprint_debug("raid%d: configured ok, rootable %d\n",
518 sc->sc_unit, cset->rootable);
519 if (cset->rootable) {
520 rsc = sc;
521 num_root++;
522 }
523 } else {
524 /* The autoconfig didn't work :( */
525 aprint_debug("Autoconfig failed\n");
526 rf_release_all_vps(cset);
527 }
528 } else {
529 /* we're not autoconfiguring this set...
530 release the associated resources */
531 rf_release_all_vps(cset);
532 }
533 /* cleanup */
534 rf_cleanup_config_set(cset);
535 cset = next_cset;
536 }
537 dksc = &rsc->sc_dksc;
538
539 /* if the user has specified what the root device should be
540 then we don't touch booted_device or boothowto... */
541
542 if (rootspec != NULL) {
543 DPRINTF("%s: rootspec %s\n", __func__, rootspec);
544 return;
545 }
546
547 /* we found something bootable... */
548
549 /*
550 * XXX: The following code assumes that the root raid
551 * is the first ('a') partition. This is about the best
552 * we can do with a BSD disklabel, but we might be able
553 * to do better with a GPT label, by setting a specified
554 * attribute to indicate the root partition. We can then
555 * stash the partition number in the r->root_partition
556 * high bits (the bottom 2 bits are already used). For
557 * now we just set booted_partition to 0 when we override
558 * root.
559 */
560 if (num_root == 1) {
561 device_t candidate_root;
562 if (dksc->sc_dkdev.dk_nwedges != 0) {
563 char cname[sizeof(cset->ac->devname)];
564 /* XXX: assume partition 'a' first */
565 snprintf(cname, sizeof(cname), "%s%c",
566 device_xname(dksc->sc_dev), 'a');
567 candidate_root = dkwedge_find_by_wname(cname);
568 DPRINTF("%s: candidate wedge root=%s\n", __func__,
569 cname);
570 if (candidate_root == NULL) {
571 /*
572 * If that is not found, because we don't use
573 * disklabel, return the first dk child
574 * XXX: we can skip the 'a' check above
575 * and always do this...
576 */
577 size_t i = 0;
578 candidate_root = dkwedge_find_by_parent(
579 device_xname(dksc->sc_dev), &i);
580 }
581 DPRINTF("%s: candidate wedge root=%p\n", __func__,
582 candidate_root);
583 } else
584 candidate_root = dksc->sc_dev;
585 DPRINTF("%s: candidate root=%p\n", __func__, candidate_root);
586 DPRINTF("%s: booted_device=%p root_partition=%d "
587 "contains_boot=%d",
588 __func__, booted_device, rsc->sc_r.root_partition,
589 rf_containsboot(&rsc->sc_r, booted_device));
590 /* XXX the check for booted_device == NULL can probably be
591 * dropped, now that rf_containsboot handles that case.
592 */
593 if (booted_device == NULL ||
594 rsc->sc_r.root_partition == 1 ||
595 rf_containsboot(&rsc->sc_r, booted_device)) {
596 booted_device = candidate_root;
597 booted_method = "raidframe/single";
598 booted_partition = 0; /* XXX assume 'a' */
599 }
600 } else if (num_root > 1) {
601 DPRINTF("%s: many roots=%d, %p\n", __func__, num_root,
602 booted_device);
603
604 /*
605 * Maybe the MD code can help. If it cannot, then
606 * setroot() will discover that we have no
607 * booted_device and will ask the user if nothing was
608 * hardwired in the kernel config file
609 */
610 if (booted_device == NULL)
611 return;
612
613 num_root = 0;
614 mutex_enter(&raid_lock);
615 LIST_FOREACH(sc, &raids, sc_link) {
616 RF_Raid_t *r = &sc->sc_r;
617 if (r->valid == 0)
618 continue;
619
620 if (r->root_partition == 0)
621 continue;
622
623 if (rf_containsboot(r, booted_device)) {
624 num_root++;
625 rsc = sc;
626 dksc = &rsc->sc_dksc;
627 }
628 }
629 mutex_exit(&raid_lock);
630
631 if (num_root == 1) {
632 booted_device = dksc->sc_dev;
633 booted_method = "raidframe/multi";
634 booted_partition = 0; /* XXX assume 'a' */
635 } else {
636 /* we can't guess.. require the user to answer... */
637 boothowto |= RB_ASKNAME;
638 }
639 }
640 }
641
642 static int
643 raidsize(dev_t dev)
644 {
645 struct raid_softc *rs;
646 struct dk_softc *dksc;
647 unsigned int unit;
648
649 unit = raidunit(dev);
650 if ((rs = raidget(unit, false)) == NULL)
651 return -1;
652 dksc = &rs->sc_dksc;
653
654 if ((rs->sc_flags & RAIDF_INITED) == 0)
655 return -1;
656
657 return dk_size(dksc, dev);
658 }
659
660 static int
661 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
662 {
663 unsigned int unit;
664 struct raid_softc *rs;
665 struct dk_softc *dksc;
666
667 unit = raidunit(dev);
668 if ((rs = raidget(unit, false)) == NULL)
669 return ENXIO;
670 dksc = &rs->sc_dksc;
671
672 if ((rs->sc_flags & RAIDF_INITED) == 0)
673 return ENODEV;
674
675 /*
676 Note that blkno is relative to this particular partition.
677 By adding adding RF_PROTECTED_SECTORS, we get a value that
678 is relative to the partition used for the underlying component.
679 */
680 blkno += RF_PROTECTED_SECTORS;
681
682 return dk_dump(dksc, dev, blkno, va, size);
683 }
684
685 static int
686 raid_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk)
687 {
688 struct raid_softc *rs = raidsoftc(dev);
689 const struct bdevsw *bdev;
690 RF_Raid_t *raidPtr;
691 int c, sparecol, j, scol, dumpto;
692 int error = 0;
693
694 raidPtr = &rs->sc_r;
695
696 /* we only support dumping to RAID 1 sets */
697 if (raidPtr->Layout.numDataCol != 1 ||
698 raidPtr->Layout.numParityCol != 1)
699 return EINVAL;
700
701 if ((error = raidlock(rs)) != 0)
702 return error;
703
704 /* figure out what device is alive.. */
705
706 /*
707 Look for a component to dump to. The preference for the
708 component to dump to is as follows:
709 1) the master
710 2) a used_spare of the master
711 3) the slave
712 4) a used_spare of the slave
713 */
714
715 dumpto = -1;
716 for (c = 0; c < raidPtr->numCol; c++) {
717 if (raidPtr->Disks[c].status == rf_ds_optimal) {
718 /* this might be the one */
719 dumpto = c;
720 break;
721 }
722 }
723
724 /*
725 At this point we have possibly selected a live master or a
726 live slave. We now check to see if there is a spared
727 master (or a spared slave), if we didn't find a live master
728 or a live slave.
729 */
730
731 for (c = 0; c < raidPtr->numSpare; c++) {
732 sparecol = raidPtr->numCol + c;
733 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
734 /* How about this one? */
735 scol = -1;
736 for(j=0;j<raidPtr->numCol;j++) {
737 if (raidPtr->Disks[j].spareCol == sparecol) {
738 scol = j;
739 break;
740 }
741 }
742 if (scol == 0) {
743 /*
744 We must have found a spared master!
745 We'll take that over anything else
746 found so far. (We couldn't have
747 found a real master before, since
748 this is a used spare, and it's
749 saying that it's replacing the
750 master.) On reboot (with
751 autoconfiguration turned on)
752 sparecol will become the 1st
753 component (component0) of this set.
754 */
755 dumpto = sparecol;
756 break;
757 } else if (scol != -1) {
758 /*
759 Must be a spared slave. We'll dump
760 to that if we havn't found anything
761 else so far.
762 */
763 if (dumpto == -1)
764 dumpto = sparecol;
765 }
766 }
767 }
768
769 if (dumpto == -1) {
770 /* we couldn't find any live components to dump to!?!?
771 */
772 error = EINVAL;
773 goto out;
774 }
775
776 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
777 if (bdev == NULL) {
778 error = ENXIO;
779 goto out;
780 }
781
782 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
783 blkno, va, nblk * raidPtr->bytesPerSector);
784
785 out:
786 raidunlock(rs);
787
788 return error;
789 }
790
791 /* ARGSUSED */
792 static int
793 raidopen(dev_t dev, int flags, int fmt,
794 struct lwp *l)
795 {
796 int unit = raidunit(dev);
797 struct raid_softc *rs;
798 struct dk_softc *dksc;
799 int error = 0;
800 int part, pmask;
801
802 if ((rs = raidget(unit, true)) == NULL)
803 return ENXIO;
804 if ((error = raidlock(rs)) != 0)
805 return (error);
806
807 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
808 error = EBUSY;
809 goto bad;
810 }
811
812 dksc = &rs->sc_dksc;
813
814 part = DISKPART(dev);
815 pmask = (1 << part);
816
817 if (!DK_BUSY(dksc, pmask) &&
818 ((rs->sc_flags & RAIDF_INITED) != 0)) {
819 /* First one... mark things as dirty... Note that we *MUST*
820 have done a configure before this. I DO NOT WANT TO BE
821 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
822 THAT THEY BELONG TOGETHER!!!!! */
823 /* XXX should check to see if we're only open for reading
824 here... If so, we needn't do this, but then need some
825 other way of keeping track of what's happened.. */
826
827 rf_markalldirty(&rs->sc_r);
828 }
829
830 if ((rs->sc_flags & RAIDF_INITED) != 0)
831 error = dk_open(dksc, dev, flags, fmt, l);
832
833 bad:
834 raidunlock(rs);
835
836 return (error);
837
838
839 }
840
841 static int
842 raid_lastclose(device_t self)
843 {
844 struct raid_softc *rs = raidsoftc(self);
845
846 /* Last one... device is not unconfigured yet.
847 Device shutdown has taken care of setting the
848 clean bits if RAIDF_INITED is not set
849 mark things as clean... */
850
851 rf_update_component_labels(&rs->sc_r,
852 RF_FINAL_COMPONENT_UPDATE);
853
854 /* pass to unlocked code */
855 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
856 rs->sc_flags |= RAIDF_DETACH;
857
858 return 0;
859 }
860
861 /* ARGSUSED */
862 static int
863 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
864 {
865 int unit = raidunit(dev);
866 struct raid_softc *rs;
867 struct dk_softc *dksc;
868 cfdata_t cf;
869 int error = 0, do_detach = 0, do_put = 0;
870
871 if ((rs = raidget(unit, false)) == NULL)
872 return ENXIO;
873 dksc = &rs->sc_dksc;
874
875 if ((error = raidlock(rs)) != 0)
876 return (error);
877
878 if ((rs->sc_flags & RAIDF_INITED) != 0) {
879 error = dk_close(dksc, dev, flags, fmt, l);
880 if ((rs->sc_flags & RAIDF_DETACH) != 0)
881 do_detach = 1;
882 } else if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
883 do_put = 1;
884
885 raidunlock(rs);
886
887 if (do_detach) {
888 /* free the pseudo device attach bits */
889 cf = device_cfdata(dksc->sc_dev);
890 error = config_detach(dksc->sc_dev, 0);
891 if (error == 0)
892 free(cf, M_RAIDFRAME);
893 } else if (do_put) {
894 raidput(rs);
895 }
896
897 return (error);
898
899 }
900
901 static void
902 raid_wakeup(RF_Raid_t *raidPtr)
903 {
904 rf_lock_mutex2(raidPtr->iodone_lock);
905 rf_signal_cond2(raidPtr->iodone_cv);
906 rf_unlock_mutex2(raidPtr->iodone_lock);
907 }
908
909 static void
910 raidstrategy(struct buf *bp)
911 {
912 unsigned int unit;
913 struct raid_softc *rs;
914 struct dk_softc *dksc;
915 RF_Raid_t *raidPtr;
916
917 unit = raidunit(bp->b_dev);
918 if ((rs = raidget(unit, false)) == NULL) {
919 bp->b_error = ENXIO;
920 goto fail;
921 }
922 if ((rs->sc_flags & RAIDF_INITED) == 0) {
923 bp->b_error = ENXIO;
924 goto fail;
925 }
926 dksc = &rs->sc_dksc;
927 raidPtr = &rs->sc_r;
928
929 /* Queue IO only */
930 if (dk_strategy_defer(dksc, bp))
931 goto done;
932
933 /* schedule the IO to happen at the next convenient time */
934 raid_wakeup(raidPtr);
935
936 done:
937 return;
938
939 fail:
940 bp->b_resid = bp->b_bcount;
941 biodone(bp);
942 }
943
944 static int
945 raid_diskstart(device_t dev, struct buf *bp)
946 {
947 struct raid_softc *rs = raidsoftc(dev);
948 RF_Raid_t *raidPtr;
949
950 raidPtr = &rs->sc_r;
951 if (!raidPtr->valid) {
952 db1_printf(("raid is not valid..\n"));
953 return ENODEV;
954 }
955
956 /* XXX */
957 bp->b_resid = 0;
958
959 return raiddoaccess(raidPtr, bp);
960 }
961
962 void
963 raiddone(RF_Raid_t *raidPtr, struct buf *bp)
964 {
965 struct raid_softc *rs;
966 struct dk_softc *dksc;
967
968 rs = raidPtr->softc;
969 dksc = &rs->sc_dksc;
970
971 dk_done(dksc, bp);
972
973 rf_lock_mutex2(raidPtr->mutex);
974 raidPtr->openings++;
975 rf_unlock_mutex2(raidPtr->mutex);
976
977 /* schedule more IO */
978 raid_wakeup(raidPtr);
979 }
980
981 /* ARGSUSED */
982 static int
983 raidread(dev_t dev, struct uio *uio, int flags)
984 {
985 int unit = raidunit(dev);
986 struct raid_softc *rs;
987
988 if ((rs = raidget(unit, false)) == NULL)
989 return ENXIO;
990
991 if ((rs->sc_flags & RAIDF_INITED) == 0)
992 return (ENXIO);
993
994 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
995
996 }
997
998 /* ARGSUSED */
999 static int
1000 raidwrite(dev_t dev, struct uio *uio, int flags)
1001 {
1002 int unit = raidunit(dev);
1003 struct raid_softc *rs;
1004
1005 if ((rs = raidget(unit, false)) == NULL)
1006 return ENXIO;
1007
1008 if ((rs->sc_flags & RAIDF_INITED) == 0)
1009 return (ENXIO);
1010
1011 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
1012
1013 }
1014
1015 static int
1016 raid_detach_unlocked(struct raid_softc *rs)
1017 {
1018 struct dk_softc *dksc = &rs->sc_dksc;
1019 RF_Raid_t *raidPtr;
1020 int error;
1021
1022 raidPtr = &rs->sc_r;
1023
1024 if (DK_BUSY(dksc, 0) ||
1025 raidPtr->recon_in_progress != 0 ||
1026 raidPtr->parity_rewrite_in_progress != 0 ||
1027 raidPtr->copyback_in_progress != 0)
1028 return EBUSY;
1029
1030 if ((rs->sc_flags & RAIDF_INITED) == 0)
1031 return 0;
1032
1033 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1034
1035 if ((error = rf_Shutdown(raidPtr)) != 0)
1036 return error;
1037
1038 rs->sc_flags &= ~RAIDF_INITED;
1039
1040 /* Kill off any queued buffers */
1041 dk_drain(dksc);
1042 bufq_free(dksc->sc_bufq);
1043
1044 /* Detach the disk. */
1045 dkwedge_delall(&dksc->sc_dkdev);
1046 disk_detach(&dksc->sc_dkdev);
1047 disk_destroy(&dksc->sc_dkdev);
1048 dk_detach(dksc);
1049
1050 return 0;
1051 }
1052
1053 static int
1054 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1055 {
1056 int unit = raidunit(dev);
1057 int error = 0;
1058 int part, pmask;
1059 struct raid_softc *rs;
1060 struct dk_softc *dksc;
1061 RF_Config_t *k_cfg, *u_cfg;
1062 RF_Raid_t *raidPtr;
1063 RF_RaidDisk_t *diskPtr;
1064 RF_AccTotals_t *totals;
1065 RF_DeviceConfig_t *d_cfg, *ucfgp;
1066 u_char *specific_buf;
1067 int retcode = 0;
1068 int column;
1069 /* int raidid; */
1070 struct rf_recon_req *rr;
1071 struct rf_recon_req_internal *rrint;
1072 RF_ComponentLabel_t *clabel;
1073 RF_ComponentLabel_t *ci_label;
1074 RF_SingleComponent_t *sparePtr,*componentPtr;
1075 RF_SingleComponent_t component;
1076 int d;
1077
1078 if ((rs = raidget(unit, false)) == NULL)
1079 return ENXIO;
1080 dksc = &rs->sc_dksc;
1081 raidPtr = &rs->sc_r;
1082
1083 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1084 (int) DISKPART(dev), (int) unit, cmd));
1085
1086 /* Must be initialized for these... */
1087 switch (cmd) {
1088 case RAIDFRAME_REWRITEPARITY:
1089 case RAIDFRAME_GET_INFO:
1090 case RAIDFRAME_RESET_ACCTOTALS:
1091 case RAIDFRAME_GET_ACCTOTALS:
1092 case RAIDFRAME_KEEP_ACCTOTALS:
1093 case RAIDFRAME_GET_SIZE:
1094 case RAIDFRAME_FAIL_DISK:
1095 case RAIDFRAME_COPYBACK:
1096 case RAIDFRAME_CHECK_RECON_STATUS:
1097 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1098 case RAIDFRAME_GET_COMPONENT_LABEL:
1099 case RAIDFRAME_SET_COMPONENT_LABEL:
1100 case RAIDFRAME_ADD_HOT_SPARE:
1101 case RAIDFRAME_REMOVE_HOT_SPARE:
1102 case RAIDFRAME_INIT_LABELS:
1103 case RAIDFRAME_REBUILD_IN_PLACE:
1104 case RAIDFRAME_CHECK_PARITY:
1105 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1106 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1107 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1108 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1109 case RAIDFRAME_SET_AUTOCONFIG:
1110 case RAIDFRAME_SET_ROOT:
1111 case RAIDFRAME_DELETE_COMPONENT:
1112 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1113 case RAIDFRAME_PARITYMAP_STATUS:
1114 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1115 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1116 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1117 #ifdef RAID_COMPAT32
1118 case RAIDFRAME_GET_INFO32:
1119 #endif
1120 if ((rs->sc_flags & RAIDF_INITED) == 0)
1121 return (ENXIO);
1122 }
1123
1124 /*
1125 * Handle compat ioctl calls
1126 *
1127 * * If compat code is not loaded, stub returns ENOSYS and we just
1128 * check the "native" cmd's
1129 * * If compat code is loaded but does not recognize the cmd, it
1130 * returns EPASSTHROUGH, and we just check the "native" cmd's
1131 * * If compat code returns EAGAIN, we need to finish via config
1132 * * Otherwise the cmd has been handled and we just return
1133 */
1134 module_autoload("compat_raid_50", MODULE_CLASS_EXEC);
1135 MODULE_CALL_HOOK(raidframe_ioctl_50_hook,
1136 (cmd, (rs->sc_flags & RAIDF_INITED),raidPtr, unit, data, &k_cfg),
1137 enosys(), retcode);
1138 if (retcode == ENOSYS)
1139 retcode = 0;
1140 else if (retcode == EAGAIN)
1141 goto config;
1142 else if (retcode != EPASSTHROUGH)
1143 return retcode;
1144
1145 module_autoload("compat_raid_80", MODULE_CLASS_EXEC);
1146 MODULE_CALL_HOOK(raidframe_ioctl_80_hook,
1147 (cmd, (rs->sc_flags & RAIDF_INITED),raidPtr, unit, data, &k_cfg),
1148 enosys(), retcode);
1149 if (retcode == ENOSYS)
1150 retcode = 0;
1151 else if (retcode == EAGAIN)
1152 goto config;
1153 else if (retcode != EPASSTHROUGH)
1154 return retcode;
1155
1156 /*
1157 * XXX
1158 * Handling of FAIL_DISK80 command requires us to retain retcode's
1159 * value of EPASSTHROUGH. If you add more compat code later, make
1160 * sure you don't overwrite retcode and break this!
1161 */
1162
1163 switch (cmd) {
1164
1165 /* configure the system */
1166 case RAIDFRAME_CONFIGURE:
1167 #ifdef RAID_COMPAT32
1168 case RAIDFRAME_CONFIGURE32:
1169 #endif
1170
1171 if (raidPtr->valid) {
1172 /* There is a valid RAID set running on this unit! */
1173 printf("raid%d: Device already configured!\n",unit);
1174 return(EINVAL);
1175 }
1176
1177 /* copy-in the configuration information */
1178 /* data points to a pointer to the configuration structure */
1179
1180 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1181 if (k_cfg == NULL) {
1182 return (ENOMEM);
1183 }
1184 #ifdef RAID_COMPAT32
1185 if (cmd == RAIDFRAME_CONFIGURE32 &&
1186 (l->l_proc->p_flag & PK_32) != 0)
1187 MODULE_CALL_HOOK(raidframe_netbsd32_config_hook,
1188 (data, k_cfg), enosys(), retcode);
1189 else
1190 #endif
1191 {
1192 u_cfg = *((RF_Config_t **) data);
1193 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1194 }
1195 if (retcode) {
1196 RF_Free(k_cfg, sizeof(RF_Config_t));
1197 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1198 retcode));
1199 goto no_config;
1200 }
1201 goto config;
1202 config:
1203 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1204
1205 /* allocate a buffer for the layout-specific data, and copy it
1206 * in */
1207 if (k_cfg->layoutSpecificSize) {
1208 if (k_cfg->layoutSpecificSize > 10000) {
1209 /* sanity check */
1210 RF_Free(k_cfg, sizeof(RF_Config_t));
1211 retcode = EINVAL;
1212 goto no_config;
1213 }
1214 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1215 (u_char *));
1216 if (specific_buf == NULL) {
1217 RF_Free(k_cfg, sizeof(RF_Config_t));
1218 retcode = ENOMEM;
1219 goto no_config;
1220 }
1221 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1222 k_cfg->layoutSpecificSize);
1223 if (retcode) {
1224 RF_Free(k_cfg, sizeof(RF_Config_t));
1225 RF_Free(specific_buf,
1226 k_cfg->layoutSpecificSize);
1227 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1228 retcode));
1229 goto no_config;
1230 }
1231 } else
1232 specific_buf = NULL;
1233 k_cfg->layoutSpecific = specific_buf;
1234
1235 /* should do some kind of sanity check on the configuration.
1236 * Store the sum of all the bytes in the last byte? */
1237
1238 /* configure the system */
1239
1240 /*
1241 * Clear the entire RAID descriptor, just to make sure
1242 * there is no stale data left in the case of a
1243 * reconfiguration
1244 */
1245 memset(raidPtr, 0, sizeof(*raidPtr));
1246 raidPtr->softc = rs;
1247 raidPtr->raidid = unit;
1248
1249 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1250
1251 if (retcode == 0) {
1252
1253 /* allow this many simultaneous IO's to
1254 this RAID device */
1255 raidPtr->openings = RAIDOUTSTANDING;
1256
1257 raidinit(rs);
1258 raid_wakeup(raidPtr);
1259 rf_markalldirty(raidPtr);
1260 }
1261 /* free the buffers. No return code here. */
1262 if (k_cfg->layoutSpecificSize) {
1263 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1264 }
1265 RF_Free(k_cfg, sizeof(RF_Config_t));
1266
1267 no_config:
1268 /*
1269 * If configuration failed, set sc_flags so that we
1270 * will detach the device when we close it.
1271 */
1272 if (retcode != 0)
1273 rs->sc_flags |= RAIDF_SHUTDOWN;
1274 return (retcode);
1275
1276 /* shutdown the system */
1277 case RAIDFRAME_SHUTDOWN:
1278
1279 part = DISKPART(dev);
1280 pmask = (1 << part);
1281
1282 if ((error = raidlock(rs)) != 0)
1283 return (error);
1284
1285 if (DK_BUSY(dksc, pmask) ||
1286 raidPtr->recon_in_progress != 0 ||
1287 raidPtr->parity_rewrite_in_progress != 0 ||
1288 raidPtr->copyback_in_progress != 0)
1289 retcode = EBUSY;
1290 else {
1291 /* detach and free on close */
1292 rs->sc_flags |= RAIDF_SHUTDOWN;
1293 retcode = 0;
1294 }
1295
1296 raidunlock(rs);
1297
1298 return (retcode);
1299 case RAIDFRAME_GET_COMPONENT_LABEL:
1300 return rf_get_component_label(raidPtr, data);
1301
1302 #if 0
1303 case RAIDFRAME_SET_COMPONENT_LABEL:
1304 clabel = (RF_ComponentLabel_t *) data;
1305
1306 /* XXX check the label for valid stuff... */
1307 /* Note that some things *should not* get modified --
1308 the user should be re-initing the labels instead of
1309 trying to patch things.
1310 */
1311
1312 raidid = raidPtr->raidid;
1313 #ifdef DEBUG
1314 printf("raid%d: Got component label:\n", raidid);
1315 printf("raid%d: Version: %d\n", raidid, clabel->version);
1316 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1317 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1318 printf("raid%d: Column: %d\n", raidid, clabel->column);
1319 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1320 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1321 printf("raid%d: Status: %d\n", raidid, clabel->status);
1322 #endif /* DEBUG */
1323 clabel->row = 0;
1324 column = clabel->column;
1325
1326 if ((column < 0) || (column >= raidPtr->numCol)) {
1327 return(EINVAL);
1328 }
1329
1330 /* XXX this isn't allowed to do anything for now :-) */
1331
1332 /* XXX and before it is, we need to fill in the rest
1333 of the fields!?!?!?! */
1334 memcpy(raidget_component_label(raidPtr, column),
1335 clabel, sizeof(*clabel));
1336 raidflush_component_label(raidPtr, column);
1337 return (0);
1338 #endif /* 0 */
1339
1340 case RAIDFRAME_INIT_LABELS:
1341 clabel = (RF_ComponentLabel_t *) data;
1342 /*
1343 we only want the serial number from
1344 the above. We get all the rest of the information
1345 from the config that was used to create this RAID
1346 set.
1347 */
1348
1349 raidPtr->serial_number = clabel->serial_number;
1350
1351 for(column=0;column<raidPtr->numCol;column++) {
1352 diskPtr = &raidPtr->Disks[column];
1353 if (!RF_DEAD_DISK(diskPtr->status)) {
1354 ci_label = raidget_component_label(raidPtr,
1355 column);
1356 /* Zeroing this is important. */
1357 memset(ci_label, 0, sizeof(*ci_label));
1358 raid_init_component_label(raidPtr, ci_label);
1359 ci_label->serial_number =
1360 raidPtr->serial_number;
1361 ci_label->row = 0; /* we dont' pretend to support more */
1362 rf_component_label_set_partitionsize(ci_label,
1363 diskPtr->partitionSize);
1364 ci_label->column = column;
1365 raidflush_component_label(raidPtr, column);
1366 }
1367 /* XXXjld what about the spares? */
1368 }
1369
1370 return (retcode);
1371 case RAIDFRAME_SET_AUTOCONFIG:
1372 d = rf_set_autoconfig(raidPtr, *(int *) data);
1373 printf("raid%d: New autoconfig value is: %d\n",
1374 raidPtr->raidid, d);
1375 *(int *) data = d;
1376 return (retcode);
1377
1378 case RAIDFRAME_SET_ROOT:
1379 d = rf_set_rootpartition(raidPtr, *(int *) data);
1380 printf("raid%d: New rootpartition value is: %d\n",
1381 raidPtr->raidid, d);
1382 *(int *) data = d;
1383 return (retcode);
1384
1385 /* initialize all parity */
1386 case RAIDFRAME_REWRITEPARITY:
1387
1388 if (raidPtr->Layout.map->faultsTolerated == 0) {
1389 /* Parity for RAID 0 is trivially correct */
1390 raidPtr->parity_good = RF_RAID_CLEAN;
1391 return(0);
1392 }
1393
1394 if (raidPtr->parity_rewrite_in_progress == 1) {
1395 /* Re-write is already in progress! */
1396 return(EINVAL);
1397 }
1398
1399 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1400 rf_RewriteParityThread,
1401 raidPtr,"raid_parity");
1402 return (retcode);
1403
1404
1405 case RAIDFRAME_ADD_HOT_SPARE:
1406 sparePtr = (RF_SingleComponent_t *) data;
1407 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1408 retcode = rf_add_hot_spare(raidPtr, &component);
1409 return(retcode);
1410
1411 case RAIDFRAME_REMOVE_HOT_SPARE:
1412 return(retcode);
1413
1414 case RAIDFRAME_DELETE_COMPONENT:
1415 componentPtr = (RF_SingleComponent_t *)data;
1416 memcpy( &component, componentPtr,
1417 sizeof(RF_SingleComponent_t));
1418 retcode = rf_delete_component(raidPtr, &component);
1419 return(retcode);
1420
1421 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1422 componentPtr = (RF_SingleComponent_t *)data;
1423 memcpy( &component, componentPtr,
1424 sizeof(RF_SingleComponent_t));
1425 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1426 return(retcode);
1427
1428 case RAIDFRAME_REBUILD_IN_PLACE:
1429
1430 if (raidPtr->Layout.map->faultsTolerated == 0) {
1431 /* Can't do this on a RAID 0!! */
1432 return(EINVAL);
1433 }
1434
1435 if (raidPtr->recon_in_progress == 1) {
1436 /* a reconstruct is already in progress! */
1437 return(EINVAL);
1438 }
1439
1440 componentPtr = (RF_SingleComponent_t *) data;
1441 memcpy( &component, componentPtr,
1442 sizeof(RF_SingleComponent_t));
1443 component.row = 0; /* we don't support any more */
1444 column = component.column;
1445
1446 if ((column < 0) || (column >= raidPtr->numCol)) {
1447 return(EINVAL);
1448 }
1449
1450 rf_lock_mutex2(raidPtr->mutex);
1451 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1452 (raidPtr->numFailures > 0)) {
1453 /* XXX 0 above shouldn't be constant!!! */
1454 /* some component other than this has failed.
1455 Let's not make things worse than they already
1456 are... */
1457 printf("raid%d: Unable to reconstruct to disk at:\n",
1458 raidPtr->raidid);
1459 printf("raid%d: Col: %d Too many failures.\n",
1460 raidPtr->raidid, column);
1461 rf_unlock_mutex2(raidPtr->mutex);
1462 return (EINVAL);
1463 }
1464 if (raidPtr->Disks[column].status ==
1465 rf_ds_reconstructing) {
1466 printf("raid%d: Unable to reconstruct to disk at:\n",
1467 raidPtr->raidid);
1468 printf("raid%d: Col: %d Reconstruction already occurring!\n", raidPtr->raidid, column);
1469
1470 rf_unlock_mutex2(raidPtr->mutex);
1471 return (EINVAL);
1472 }
1473 if (raidPtr->Disks[column].status == rf_ds_spared) {
1474 rf_unlock_mutex2(raidPtr->mutex);
1475 return (EINVAL);
1476 }
1477 rf_unlock_mutex2(raidPtr->mutex);
1478
1479 RF_Malloc(rrint, sizeof(*rrint), (struct rf_recon_req_internal *));
1480 if (rrint == NULL)
1481 return(ENOMEM);
1482
1483 rrint->col = column;
1484 rrint->raidPtr = raidPtr;
1485
1486 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1487 rf_ReconstructInPlaceThread,
1488 rrint, "raid_reconip");
1489 return(retcode);
1490
1491 case RAIDFRAME_GET_INFO:
1492 #ifdef RAID_COMPAT32
1493 case RAIDFRAME_GET_INFO32:
1494 #endif
1495 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1496 (RF_DeviceConfig_t *));
1497 if (d_cfg == NULL)
1498 return (ENOMEM);
1499 retcode = rf_get_info(raidPtr, d_cfg);
1500 if (retcode == 0) {
1501 #ifdef RAID_COMPAT32
1502 if (raidframe_netbsd32_config_hook.hooked &&
1503 cmd == RAIDFRAME_GET_INFO32)
1504 ucfgp = NETBSD32PTR64(*(netbsd32_pointer_t *)data);
1505 else
1506 #endif
1507 ucfgp = *(RF_DeviceConfig_t **)data;
1508 retcode = copyout(d_cfg, ucfgp, sizeof(RF_DeviceConfig_t));
1509 }
1510 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1511
1512 return (retcode);
1513
1514 case RAIDFRAME_CHECK_PARITY:
1515 *(int *) data = raidPtr->parity_good;
1516 return (0);
1517
1518 case RAIDFRAME_PARITYMAP_STATUS:
1519 if (rf_paritymap_ineligible(raidPtr))
1520 return EINVAL;
1521 rf_paritymap_status(raidPtr->parity_map,
1522 (struct rf_pmstat *)data);
1523 return 0;
1524
1525 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1526 if (rf_paritymap_ineligible(raidPtr))
1527 return EINVAL;
1528 if (raidPtr->parity_map == NULL)
1529 return ENOENT; /* ??? */
1530 if (0 != rf_paritymap_set_params(raidPtr->parity_map,
1531 (struct rf_pmparams *)data, 1))
1532 return EINVAL;
1533 return 0;
1534
1535 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1536 if (rf_paritymap_ineligible(raidPtr))
1537 return EINVAL;
1538 *(int *) data = rf_paritymap_get_disable(raidPtr);
1539 return 0;
1540
1541 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1542 if (rf_paritymap_ineligible(raidPtr))
1543 return EINVAL;
1544 rf_paritymap_set_disable(raidPtr, *(int *)data);
1545 /* XXX should errors be passed up? */
1546 return 0;
1547
1548 case RAIDFRAME_RESET_ACCTOTALS:
1549 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1550 return (0);
1551
1552 case RAIDFRAME_GET_ACCTOTALS:
1553 totals = (RF_AccTotals_t *) data;
1554 *totals = raidPtr->acc_totals;
1555 return (0);
1556
1557 case RAIDFRAME_KEEP_ACCTOTALS:
1558 raidPtr->keep_acc_totals = *(int *)data;
1559 return (0);
1560
1561 case RAIDFRAME_GET_SIZE:
1562 *(int *) data = raidPtr->totalSectors;
1563 return (0);
1564
1565 /* fail a disk & optionally start reconstruction */
1566 case RAIDFRAME_FAIL_DISK80:
1567 /* Check if we called compat code for this cmd */
1568 if (retcode != EPASSTHROUGH)
1569 return EINVAL;
1570 /* FALLTHRU */
1571 case RAIDFRAME_FAIL_DISK:
1572 if (raidPtr->Layout.map->faultsTolerated == 0) {
1573 /* Can't do this on a RAID 0!! */
1574 return(EINVAL);
1575 }
1576
1577 rr = (struct rf_recon_req *) data;
1578 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1579 return (EINVAL);
1580
1581 rf_lock_mutex2(raidPtr->mutex);
1582 if (raidPtr->status == rf_rs_reconstructing) {
1583 /* you can't fail a disk while we're reconstructing! */
1584 /* XXX wrong for RAID6 */
1585 rf_unlock_mutex2(raidPtr->mutex);
1586 return (EINVAL);
1587 }
1588 if ((raidPtr->Disks[rr->col].status ==
1589 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1590 /* some other component has failed. Let's not make
1591 things worse. XXX wrong for RAID6 */
1592 rf_unlock_mutex2(raidPtr->mutex);
1593 return (EINVAL);
1594 }
1595 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1596 /* Can't fail a spared disk! */
1597 rf_unlock_mutex2(raidPtr->mutex);
1598 return (EINVAL);
1599 }
1600 rf_unlock_mutex2(raidPtr->mutex);
1601
1602 /* make a copy of the recon request so that we don't rely on
1603 * the user's buffer */
1604 RF_Malloc(rrint, sizeof(*rrint), (struct rf_recon_req_internal *));
1605 if (rrint == NULL)
1606 return(ENOMEM);
1607 rrint->col = rr->col;
1608 rrint->flags = rr->flags;
1609 rrint->raidPtr = raidPtr;
1610
1611 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1612 rf_ReconThread,
1613 rrint, "raid_recon");
1614 return (0);
1615
1616 /* invoke a copyback operation after recon on whatever disk
1617 * needs it, if any */
1618 case RAIDFRAME_COPYBACK:
1619
1620 if (raidPtr->Layout.map->faultsTolerated == 0) {
1621 /* This makes no sense on a RAID 0!! */
1622 return(EINVAL);
1623 }
1624
1625 if (raidPtr->copyback_in_progress == 1) {
1626 /* Copyback is already in progress! */
1627 return(EINVAL);
1628 }
1629
1630 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1631 rf_CopybackThread,
1632 raidPtr,"raid_copyback");
1633 return (retcode);
1634
1635 /* return the percentage completion of reconstruction */
1636 case RAIDFRAME_CHECK_RECON_STATUS:
1637 if (raidPtr->Layout.map->faultsTolerated == 0) {
1638 /* This makes no sense on a RAID 0, so tell the
1639 user it's done. */
1640 *(int *) data = 100;
1641 return(0);
1642 }
1643 if (raidPtr->status != rf_rs_reconstructing)
1644 *(int *) data = 100;
1645 else {
1646 if (raidPtr->reconControl->numRUsTotal > 0) {
1647 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1648 } else {
1649 *(int *) data = 0;
1650 }
1651 }
1652 return (0);
1653 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1654 rf_check_recon_status_ext(raidPtr, data);
1655 return (0);
1656
1657 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1658 if (raidPtr->Layout.map->faultsTolerated == 0) {
1659 /* This makes no sense on a RAID 0, so tell the
1660 user it's done. */
1661 *(int *) data = 100;
1662 return(0);
1663 }
1664 if (raidPtr->parity_rewrite_in_progress == 1) {
1665 *(int *) data = 100 *
1666 raidPtr->parity_rewrite_stripes_done /
1667 raidPtr->Layout.numStripe;
1668 } else {
1669 *(int *) data = 100;
1670 }
1671 return (0);
1672
1673 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1674 rf_check_parityrewrite_status_ext(raidPtr, data);
1675 return (0);
1676
1677 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1678 if (raidPtr->Layout.map->faultsTolerated == 0) {
1679 /* This makes no sense on a RAID 0 */
1680 *(int *) data = 100;
1681 return(0);
1682 }
1683 if (raidPtr->copyback_in_progress == 1) {
1684 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1685 raidPtr->Layout.numStripe;
1686 } else {
1687 *(int *) data = 100;
1688 }
1689 return (0);
1690
1691 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1692 rf_check_copyback_status_ext(raidPtr, data);
1693 return 0;
1694
1695 case RAIDFRAME_SET_LAST_UNIT:
1696 for (column = 0; column < raidPtr->numCol; column++)
1697 if (raidPtr->Disks[column].status != rf_ds_optimal)
1698 return EBUSY;
1699
1700 for (column = 0; column < raidPtr->numCol; column++) {
1701 clabel = raidget_component_label(raidPtr, column);
1702 clabel->last_unit = *(int *)data;
1703 raidflush_component_label(raidPtr, column);
1704 }
1705 rs->sc_cflags |= RAIDF_UNIT_CHANGED;
1706 return 0;
1707
1708 /* the sparetable daemon calls this to wait for the kernel to
1709 * need a spare table. this ioctl does not return until a
1710 * spare table is needed. XXX -- calling mpsleep here in the
1711 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1712 * -- I should either compute the spare table in the kernel,
1713 * or have a different -- XXX XXX -- interface (a different
1714 * character device) for delivering the table -- XXX */
1715 #if 0
1716 case RAIDFRAME_SPARET_WAIT:
1717 rf_lock_mutex2(rf_sparet_wait_mutex);
1718 while (!rf_sparet_wait_queue)
1719 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1720 waitreq = rf_sparet_wait_queue;
1721 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1722 rf_unlock_mutex2(rf_sparet_wait_mutex);
1723
1724 /* structure assignment */
1725 *((RF_SparetWait_t *) data) = *waitreq;
1726
1727 RF_Free(waitreq, sizeof(*waitreq));
1728 return (0);
1729
1730 /* wakes up a process waiting on SPARET_WAIT and puts an error
1731 * code in it that will cause the dameon to exit */
1732 case RAIDFRAME_ABORT_SPARET_WAIT:
1733 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1734 waitreq->fcol = -1;
1735 rf_lock_mutex2(rf_sparet_wait_mutex);
1736 waitreq->next = rf_sparet_wait_queue;
1737 rf_sparet_wait_queue = waitreq;
1738 rf_broadcast_conf2(rf_sparet_wait_cv);
1739 rf_unlock_mutex2(rf_sparet_wait_mutex);
1740 return (0);
1741
1742 /* used by the spare table daemon to deliver a spare table
1743 * into the kernel */
1744 case RAIDFRAME_SEND_SPARET:
1745
1746 /* install the spare table */
1747 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1748
1749 /* respond to the requestor. the return status of the spare
1750 * table installation is passed in the "fcol" field */
1751 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1752 waitreq->fcol = retcode;
1753 rf_lock_mutex2(rf_sparet_wait_mutex);
1754 waitreq->next = rf_sparet_resp_queue;
1755 rf_sparet_resp_queue = waitreq;
1756 rf_broadcast_cond2(rf_sparet_resp_cv);
1757 rf_unlock_mutex2(rf_sparet_wait_mutex);
1758
1759 return (retcode);
1760 #endif
1761
1762 default:
1763 break; /* fall through to the os-specific code below */
1764
1765 }
1766
1767 if (!raidPtr->valid)
1768 return (EINVAL);
1769
1770 /*
1771 * Add support for "regular" device ioctls here.
1772 */
1773
1774 switch (cmd) {
1775 case DIOCGCACHE:
1776 retcode = rf_get_component_caches(raidPtr, (int *)data);
1777 break;
1778
1779 case DIOCCACHESYNC:
1780 retcode = rf_sync_component_caches(raidPtr);
1781 break;
1782
1783 default:
1784 retcode = dk_ioctl(dksc, dev, cmd, data, flag, l);
1785 break;
1786 }
1787
1788 return (retcode);
1789
1790 }
1791
1792
1793 /* raidinit -- complete the rest of the initialization for the
1794 RAIDframe device. */
1795
1796
1797 static void
1798 raidinit(struct raid_softc *rs)
1799 {
1800 cfdata_t cf;
1801 unsigned int unit;
1802 struct dk_softc *dksc = &rs->sc_dksc;
1803 RF_Raid_t *raidPtr = &rs->sc_r;
1804 device_t dev;
1805
1806 unit = raidPtr->raidid;
1807
1808 /* XXX doesn't check bounds. */
1809 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%u", unit);
1810
1811 /* attach the pseudo device */
1812 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1813 cf->cf_name = raid_cd.cd_name;
1814 cf->cf_atname = raid_cd.cd_name;
1815 cf->cf_unit = unit;
1816 cf->cf_fstate = FSTATE_STAR;
1817
1818 dev = config_attach_pseudo(cf);
1819 if (dev == NULL) {
1820 printf("raid%d: config_attach_pseudo failed\n",
1821 raidPtr->raidid);
1822 free(cf, M_RAIDFRAME);
1823 return;
1824 }
1825
1826 /* provide a backpointer to the real softc */
1827 raidsoftc(dev) = rs;
1828
1829 /* disk_attach actually creates space for the CPU disklabel, among
1830 * other things, so it's critical to call this *BEFORE* we try putzing
1831 * with disklabels. */
1832 dk_init(dksc, dev, DKTYPE_RAID);
1833 disk_init(&dksc->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1834
1835 /* XXX There may be a weird interaction here between this, and
1836 * protectedSectors, as used in RAIDframe. */
1837
1838 rs->sc_size = raidPtr->totalSectors;
1839
1840 /* Attach dk and disk subsystems */
1841 dk_attach(dksc);
1842 disk_attach(&dksc->sc_dkdev);
1843 rf_set_geometry(rs, raidPtr);
1844
1845 bufq_alloc(&dksc->sc_bufq, "fcfs", BUFQ_SORT_RAWBLOCK);
1846
1847 /* mark unit as usuable */
1848 rs->sc_flags |= RAIDF_INITED;
1849
1850 dkwedge_discover(&dksc->sc_dkdev);
1851 }
1852
1853 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1854 /* wake up the daemon & tell it to get us a spare table
1855 * XXX
1856 * the entries in the queues should be tagged with the raidPtr
1857 * so that in the extremely rare case that two recons happen at once,
1858 * we know for which device were requesting a spare table
1859 * XXX
1860 *
1861 * XXX This code is not currently used. GO
1862 */
1863 int
1864 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1865 {
1866 int retcode;
1867
1868 rf_lock_mutex2(rf_sparet_wait_mutex);
1869 req->next = rf_sparet_wait_queue;
1870 rf_sparet_wait_queue = req;
1871 rf_broadcast_cond2(rf_sparet_wait_cv);
1872
1873 /* mpsleep unlocks the mutex */
1874 while (!rf_sparet_resp_queue) {
1875 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
1876 }
1877 req = rf_sparet_resp_queue;
1878 rf_sparet_resp_queue = req->next;
1879 rf_unlock_mutex2(rf_sparet_wait_mutex);
1880
1881 retcode = req->fcol;
1882 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1883 * alloc'd */
1884 return (retcode);
1885 }
1886 #endif
1887
1888 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1889 * bp & passes it down.
1890 * any calls originating in the kernel must use non-blocking I/O
1891 * do some extra sanity checking to return "appropriate" error values for
1892 * certain conditions (to make some standard utilities work)
1893 *
1894 * Formerly known as: rf_DoAccessKernel
1895 */
1896 void
1897 raidstart(RF_Raid_t *raidPtr)
1898 {
1899 struct raid_softc *rs;
1900 struct dk_softc *dksc;
1901
1902 rs = raidPtr->softc;
1903 dksc = &rs->sc_dksc;
1904 /* quick check to see if anything has died recently */
1905 rf_lock_mutex2(raidPtr->mutex);
1906 if (raidPtr->numNewFailures > 0) {
1907 rf_unlock_mutex2(raidPtr->mutex);
1908 rf_update_component_labels(raidPtr,
1909 RF_NORMAL_COMPONENT_UPDATE);
1910 rf_lock_mutex2(raidPtr->mutex);
1911 raidPtr->numNewFailures--;
1912 }
1913 rf_unlock_mutex2(raidPtr->mutex);
1914
1915 if ((rs->sc_flags & RAIDF_INITED) == 0) {
1916 printf("raid%d: raidstart not ready\n", raidPtr->raidid);
1917 return;
1918 }
1919
1920 dk_start(dksc, NULL);
1921 }
1922
1923 static int
1924 raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp)
1925 {
1926 RF_SectorCount_t num_blocks, pb, sum;
1927 RF_RaidAddr_t raid_addr;
1928 daddr_t blocknum;
1929 int do_async;
1930 int rc;
1931
1932 rf_lock_mutex2(raidPtr->mutex);
1933 if (raidPtr->openings == 0) {
1934 rf_unlock_mutex2(raidPtr->mutex);
1935 return EAGAIN;
1936 }
1937 rf_unlock_mutex2(raidPtr->mutex);
1938
1939 blocknum = bp->b_rawblkno;
1940
1941 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1942 (int) blocknum));
1943
1944 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1945 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1946
1947 /* *THIS* is where we adjust what block we're going to...
1948 * but DO NOT TOUCH bp->b_blkno!!! */
1949 raid_addr = blocknum;
1950
1951 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1952 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1953 sum = raid_addr + num_blocks + pb;
1954 if (1 || rf_debugKernelAccess) {
1955 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1956 (int) raid_addr, (int) sum, (int) num_blocks,
1957 (int) pb, (int) bp->b_resid));
1958 }
1959 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1960 || (sum < num_blocks) || (sum < pb)) {
1961 rc = ENOSPC;
1962 goto done;
1963 }
1964 /*
1965 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1966 */
1967
1968 if (bp->b_bcount & raidPtr->sectorMask) {
1969 rc = ENOSPC;
1970 goto done;
1971 }
1972 db1_printf(("Calling DoAccess..\n"));
1973
1974
1975 rf_lock_mutex2(raidPtr->mutex);
1976 raidPtr->openings--;
1977 rf_unlock_mutex2(raidPtr->mutex);
1978
1979 /*
1980 * Everything is async.
1981 */
1982 do_async = 1;
1983
1984 /* don't ever condition on bp->b_flags & B_WRITE.
1985 * always condition on B_READ instead */
1986
1987 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1988 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1989 do_async, raid_addr, num_blocks,
1990 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1991
1992 done:
1993 return rc;
1994 }
1995
1996 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1997
1998 int
1999 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
2000 {
2001 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2002 struct buf *bp;
2003
2004 req->queue = queue;
2005 bp = req->bp;
2006
2007 switch (req->type) {
2008 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2009 /* XXX need to do something extra here.. */
2010 /* I'm leaving this in, as I've never actually seen it used,
2011 * and I'd like folks to report it... GO */
2012 printf(("WAKEUP CALLED\n"));
2013 queue->numOutstanding++;
2014
2015 bp->b_flags = 0;
2016 bp->b_private = req;
2017
2018 KernelWakeupFunc(bp);
2019 break;
2020
2021 case RF_IO_TYPE_READ:
2022 case RF_IO_TYPE_WRITE:
2023 #if RF_ACC_TRACE > 0
2024 if (req->tracerec) {
2025 RF_ETIMER_START(req->tracerec->timer);
2026 }
2027 #endif
2028 InitBP(bp, queue->rf_cinfo->ci_vp,
2029 op, queue->rf_cinfo->ci_dev,
2030 req->sectorOffset, req->numSector,
2031 req->buf, KernelWakeupFunc, (void *) req,
2032 queue->raidPtr->logBytesPerSector, req->b_proc);
2033
2034 if (rf_debugKernelAccess) {
2035 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2036 (long) bp->b_blkno));
2037 }
2038 queue->numOutstanding++;
2039 queue->last_deq_sector = req->sectorOffset;
2040 /* acc wouldn't have been let in if there were any pending
2041 * reqs at any other priority */
2042 queue->curPriority = req->priority;
2043
2044 db1_printf(("Going for %c to unit %d col %d\n",
2045 req->type, queue->raidPtr->raidid,
2046 queue->col));
2047 db1_printf(("sector %d count %d (%d bytes) %d\n",
2048 (int) req->sectorOffset, (int) req->numSector,
2049 (int) (req->numSector <<
2050 queue->raidPtr->logBytesPerSector),
2051 (int) queue->raidPtr->logBytesPerSector));
2052
2053 /*
2054 * XXX: drop lock here since this can block at
2055 * least with backing SCSI devices. Retake it
2056 * to minimize fuss with calling interfaces.
2057 */
2058
2059 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2060 bdev_strategy(bp);
2061 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2062 break;
2063
2064 default:
2065 panic("bad req->type in rf_DispatchKernelIO");
2066 }
2067 db1_printf(("Exiting from DispatchKernelIO\n"));
2068
2069 return (0);
2070 }
2071 /* this is the callback function associated with a I/O invoked from
2072 kernel code.
2073 */
2074 static void
2075 KernelWakeupFunc(struct buf *bp)
2076 {
2077 RF_DiskQueueData_t *req = NULL;
2078 RF_DiskQueue_t *queue;
2079
2080 db1_printf(("recovering the request queue:\n"));
2081
2082 req = bp->b_private;
2083
2084 queue = (RF_DiskQueue_t *) req->queue;
2085
2086 rf_lock_mutex2(queue->raidPtr->iodone_lock);
2087
2088 #if RF_ACC_TRACE > 0
2089 if (req->tracerec) {
2090 RF_ETIMER_STOP(req->tracerec->timer);
2091 RF_ETIMER_EVAL(req->tracerec->timer);
2092 rf_lock_mutex2(rf_tracing_mutex);
2093 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2094 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2095 req->tracerec->num_phys_ios++;
2096 rf_unlock_mutex2(rf_tracing_mutex);
2097 }
2098 #endif
2099
2100 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2101 * ballistic, and mark the component as hosed... */
2102
2103 if (bp->b_error != 0) {
2104 /* Mark the disk as dead */
2105 /* but only mark it once... */
2106 /* and only if it wouldn't leave this RAID set
2107 completely broken */
2108 if (((queue->raidPtr->Disks[queue->col].status ==
2109 rf_ds_optimal) ||
2110 (queue->raidPtr->Disks[queue->col].status ==
2111 rf_ds_used_spare)) &&
2112 (queue->raidPtr->numFailures <
2113 queue->raidPtr->Layout.map->faultsTolerated)) {
2114 printf("raid%d: IO Error (%d). Marking %s as failed.\n",
2115 queue->raidPtr->raidid,
2116 bp->b_error,
2117 queue->raidPtr->Disks[queue->col].devname);
2118 queue->raidPtr->Disks[queue->col].status =
2119 rf_ds_failed;
2120 queue->raidPtr->status = rf_rs_degraded;
2121 queue->raidPtr->numFailures++;
2122 queue->raidPtr->numNewFailures++;
2123 } else { /* Disk is already dead... */
2124 /* printf("Disk already marked as dead!\n"); */
2125 }
2126
2127 }
2128
2129 /* Fill in the error value */
2130 req->error = bp->b_error;
2131
2132 /* Drop this one on the "finished" queue... */
2133 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2134
2135 /* Let the raidio thread know there is work to be done. */
2136 rf_signal_cond2(queue->raidPtr->iodone_cv);
2137
2138 rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2139 }
2140
2141
2142 /*
2143 * initialize a buf structure for doing an I/O in the kernel.
2144 */
2145 static void
2146 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2147 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2148 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2149 struct proc *b_proc)
2150 {
2151 /* bp->b_flags = B_PHYS | rw_flag; */
2152 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */
2153 bp->b_oflags = 0;
2154 bp->b_cflags = 0;
2155 bp->b_bcount = numSect << logBytesPerSector;
2156 bp->b_bufsize = bp->b_bcount;
2157 bp->b_error = 0;
2158 bp->b_dev = dev;
2159 bp->b_data = bf;
2160 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2161 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2162 if (bp->b_bcount == 0) {
2163 panic("bp->b_bcount is zero in InitBP!!");
2164 }
2165 bp->b_proc = b_proc;
2166 bp->b_iodone = cbFunc;
2167 bp->b_private = cbArg;
2168 }
2169
2170 /*
2171 * Wait interruptibly for an exclusive lock.
2172 *
2173 * XXX
2174 * Several drivers do this; it should be abstracted and made MP-safe.
2175 * (Hmm... where have we seen this warning before :-> GO )
2176 */
2177 static int
2178 raidlock(struct raid_softc *rs)
2179 {
2180 int error;
2181
2182 error = 0;
2183 mutex_enter(&rs->sc_mutex);
2184 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2185 rs->sc_flags |= RAIDF_WANTED;
2186 error = cv_wait_sig(&rs->sc_cv, &rs->sc_mutex);
2187 if (error != 0)
2188 goto done;
2189 }
2190 rs->sc_flags |= RAIDF_LOCKED;
2191 done:
2192 mutex_exit(&rs->sc_mutex);
2193 return (error);
2194 }
2195 /*
2196 * Unlock and wake up any waiters.
2197 */
2198 static void
2199 raidunlock(struct raid_softc *rs)
2200 {
2201
2202 mutex_enter(&rs->sc_mutex);
2203 rs->sc_flags &= ~RAIDF_LOCKED;
2204 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2205 rs->sc_flags &= ~RAIDF_WANTED;
2206 cv_broadcast(&rs->sc_cv);
2207 }
2208 mutex_exit(&rs->sc_mutex);
2209 }
2210
2211
2212 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2213 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2214 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2215
2216 static daddr_t
2217 rf_component_info_offset(void)
2218 {
2219
2220 return RF_COMPONENT_INFO_OFFSET;
2221 }
2222
2223 static daddr_t
2224 rf_component_info_size(unsigned secsize)
2225 {
2226 daddr_t info_size;
2227
2228 KASSERT(secsize);
2229 if (secsize > RF_COMPONENT_INFO_SIZE)
2230 info_size = secsize;
2231 else
2232 info_size = RF_COMPONENT_INFO_SIZE;
2233
2234 return info_size;
2235 }
2236
2237 static daddr_t
2238 rf_parity_map_offset(RF_Raid_t *raidPtr)
2239 {
2240 daddr_t map_offset;
2241
2242 KASSERT(raidPtr->bytesPerSector);
2243 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2244 map_offset = raidPtr->bytesPerSector;
2245 else
2246 map_offset = RF_COMPONENT_INFO_SIZE;
2247 map_offset += rf_component_info_offset();
2248
2249 return map_offset;
2250 }
2251
2252 static daddr_t
2253 rf_parity_map_size(RF_Raid_t *raidPtr)
2254 {
2255 daddr_t map_size;
2256
2257 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2258 map_size = raidPtr->bytesPerSector;
2259 else
2260 map_size = RF_PARITY_MAP_SIZE;
2261
2262 return map_size;
2263 }
2264
2265 int
2266 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2267 {
2268 RF_ComponentLabel_t *clabel;
2269
2270 clabel = raidget_component_label(raidPtr, col);
2271 clabel->clean = RF_RAID_CLEAN;
2272 raidflush_component_label(raidPtr, col);
2273 return(0);
2274 }
2275
2276
2277 int
2278 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2279 {
2280 RF_ComponentLabel_t *clabel;
2281
2282 clabel = raidget_component_label(raidPtr, col);
2283 clabel->clean = RF_RAID_DIRTY;
2284 raidflush_component_label(raidPtr, col);
2285 return(0);
2286 }
2287
2288 int
2289 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2290 {
2291 KASSERT(raidPtr->bytesPerSector);
2292 return raidread_component_label(raidPtr->bytesPerSector,
2293 raidPtr->Disks[col].dev,
2294 raidPtr->raid_cinfo[col].ci_vp,
2295 &raidPtr->raid_cinfo[col].ci_label);
2296 }
2297
2298 RF_ComponentLabel_t *
2299 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2300 {
2301 return &raidPtr->raid_cinfo[col].ci_label;
2302 }
2303
2304 int
2305 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2306 {
2307 RF_ComponentLabel_t *label;
2308
2309 label = &raidPtr->raid_cinfo[col].ci_label;
2310 label->mod_counter = raidPtr->mod_counter;
2311 #ifndef RF_NO_PARITY_MAP
2312 label->parity_map_modcount = label->mod_counter;
2313 #endif
2314 return raidwrite_component_label(raidPtr->bytesPerSector,
2315 raidPtr->Disks[col].dev,
2316 raidPtr->raid_cinfo[col].ci_vp, label);
2317 }
2318
2319
2320 static int
2321 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2322 RF_ComponentLabel_t *clabel)
2323 {
2324 return raidread_component_area(dev, b_vp, clabel,
2325 sizeof(RF_ComponentLabel_t),
2326 rf_component_info_offset(),
2327 rf_component_info_size(secsize));
2328 }
2329
2330 /* ARGSUSED */
2331 static int
2332 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2333 size_t msize, daddr_t offset, daddr_t dsize)
2334 {
2335 struct buf *bp;
2336 int error;
2337
2338 /* XXX should probably ensure that we don't try to do this if
2339 someone has changed rf_protected_sectors. */
2340
2341 if (b_vp == NULL) {
2342 /* For whatever reason, this component is not valid.
2343 Don't try to read a component label from it. */
2344 return(EINVAL);
2345 }
2346
2347 /* get a block of the appropriate size... */
2348 bp = geteblk((int)dsize);
2349 bp->b_dev = dev;
2350
2351 /* get our ducks in a row for the read */
2352 bp->b_blkno = offset / DEV_BSIZE;
2353 bp->b_bcount = dsize;
2354 bp->b_flags |= B_READ;
2355 bp->b_resid = dsize;
2356
2357 bdev_strategy(bp);
2358 error = biowait(bp);
2359
2360 if (!error) {
2361 memcpy(data, bp->b_data, msize);
2362 }
2363
2364 brelse(bp, 0);
2365 return(error);
2366 }
2367
2368
2369 static int
2370 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2371 RF_ComponentLabel_t *clabel)
2372 {
2373 return raidwrite_component_area(dev, b_vp, clabel,
2374 sizeof(RF_ComponentLabel_t),
2375 rf_component_info_offset(),
2376 rf_component_info_size(secsize), 0);
2377 }
2378
2379 /* ARGSUSED */
2380 static int
2381 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2382 size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2383 {
2384 struct buf *bp;
2385 int error;
2386
2387 /* get a block of the appropriate size... */
2388 bp = geteblk((int)dsize);
2389 bp->b_dev = dev;
2390
2391 /* get our ducks in a row for the write */
2392 bp->b_blkno = offset / DEV_BSIZE;
2393 bp->b_bcount = dsize;
2394 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2395 bp->b_resid = dsize;
2396
2397 memset(bp->b_data, 0, dsize);
2398 memcpy(bp->b_data, data, msize);
2399
2400 bdev_strategy(bp);
2401 if (asyncp)
2402 return 0;
2403 error = biowait(bp);
2404 brelse(bp, 0);
2405 if (error) {
2406 #if 1
2407 printf("Failed to write RAID component info!\n");
2408 #endif
2409 }
2410
2411 return(error);
2412 }
2413
2414 void
2415 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2416 {
2417 int c;
2418
2419 for (c = 0; c < raidPtr->numCol; c++) {
2420 /* Skip dead disks. */
2421 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2422 continue;
2423 /* XXXjld: what if an error occurs here? */
2424 raidwrite_component_area(raidPtr->Disks[c].dev,
2425 raidPtr->raid_cinfo[c].ci_vp, map,
2426 RF_PARITYMAP_NBYTE,
2427 rf_parity_map_offset(raidPtr),
2428 rf_parity_map_size(raidPtr), 0);
2429 }
2430 }
2431
2432 void
2433 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2434 {
2435 struct rf_paritymap_ondisk tmp;
2436 int c,first;
2437
2438 first=1;
2439 for (c = 0; c < raidPtr->numCol; c++) {
2440 /* Skip dead disks. */
2441 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2442 continue;
2443 raidread_component_area(raidPtr->Disks[c].dev,
2444 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2445 RF_PARITYMAP_NBYTE,
2446 rf_parity_map_offset(raidPtr),
2447 rf_parity_map_size(raidPtr));
2448 if (first) {
2449 memcpy(map, &tmp, sizeof(*map));
2450 first = 0;
2451 } else {
2452 rf_paritymap_merge(map, &tmp);
2453 }
2454 }
2455 }
2456
2457 void
2458 rf_markalldirty(RF_Raid_t *raidPtr)
2459 {
2460 RF_ComponentLabel_t *clabel;
2461 int sparecol;
2462 int c;
2463 int j;
2464 int scol = -1;
2465
2466 raidPtr->mod_counter++;
2467 for (c = 0; c < raidPtr->numCol; c++) {
2468 /* we don't want to touch (at all) a disk that has
2469 failed */
2470 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2471 clabel = raidget_component_label(raidPtr, c);
2472 if (clabel->status == rf_ds_spared) {
2473 /* XXX do something special...
2474 but whatever you do, don't
2475 try to access it!! */
2476 } else {
2477 raidmarkdirty(raidPtr, c);
2478 }
2479 }
2480 }
2481
2482 for( c = 0; c < raidPtr->numSpare ; c++) {
2483 sparecol = raidPtr->numCol + c;
2484 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2485 /*
2486
2487 we claim this disk is "optimal" if it's
2488 rf_ds_used_spare, as that means it should be
2489 directly substitutable for the disk it replaced.
2490 We note that too...
2491
2492 */
2493
2494 for(j=0;j<raidPtr->numCol;j++) {
2495 if (raidPtr->Disks[j].spareCol == sparecol) {
2496 scol = j;
2497 break;
2498 }
2499 }
2500
2501 clabel = raidget_component_label(raidPtr, sparecol);
2502 /* make sure status is noted */
2503
2504 raid_init_component_label(raidPtr, clabel);
2505
2506 clabel->row = 0;
2507 clabel->column = scol;
2508 /* Note: we *don't* change status from rf_ds_used_spare
2509 to rf_ds_optimal */
2510 /* clabel.status = rf_ds_optimal; */
2511
2512 raidmarkdirty(raidPtr, sparecol);
2513 }
2514 }
2515 }
2516
2517
2518 void
2519 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2520 {
2521 RF_ComponentLabel_t *clabel;
2522 int sparecol;
2523 int c;
2524 int j;
2525 int scol;
2526 struct raid_softc *rs = raidPtr->softc;
2527
2528 scol = -1;
2529
2530 /* XXX should do extra checks to make sure things really are clean,
2531 rather than blindly setting the clean bit... */
2532
2533 raidPtr->mod_counter++;
2534
2535 for (c = 0; c < raidPtr->numCol; c++) {
2536 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2537 clabel = raidget_component_label(raidPtr, c);
2538 /* make sure status is noted */
2539 clabel->status = rf_ds_optimal;
2540
2541 /* note what unit we are configured as */
2542 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2543 clabel->last_unit = raidPtr->raidid;
2544
2545 raidflush_component_label(raidPtr, c);
2546 if (final == RF_FINAL_COMPONENT_UPDATE) {
2547 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2548 raidmarkclean(raidPtr, c);
2549 }
2550 }
2551 }
2552 /* else we don't touch it.. */
2553 }
2554
2555 for( c = 0; c < raidPtr->numSpare ; c++) {
2556 sparecol = raidPtr->numCol + c;
2557 /* Need to ensure that the reconstruct actually completed! */
2558 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2559 /*
2560
2561 we claim this disk is "optimal" if it's
2562 rf_ds_used_spare, as that means it should be
2563 directly substitutable for the disk it replaced.
2564 We note that too...
2565
2566 */
2567
2568 for(j=0;j<raidPtr->numCol;j++) {
2569 if (raidPtr->Disks[j].spareCol == sparecol) {
2570 scol = j;
2571 break;
2572 }
2573 }
2574
2575 /* XXX shouldn't *really* need this... */
2576 clabel = raidget_component_label(raidPtr, sparecol);
2577 /* make sure status is noted */
2578
2579 raid_init_component_label(raidPtr, clabel);
2580
2581 clabel->column = scol;
2582 clabel->status = rf_ds_optimal;
2583 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2584 clabel->last_unit = raidPtr->raidid;
2585
2586 raidflush_component_label(raidPtr, sparecol);
2587 if (final == RF_FINAL_COMPONENT_UPDATE) {
2588 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2589 raidmarkclean(raidPtr, sparecol);
2590 }
2591 }
2592 }
2593 }
2594 }
2595
2596 void
2597 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2598 {
2599
2600 if (vp != NULL) {
2601 if (auto_configured == 1) {
2602 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2603 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2604 vput(vp);
2605
2606 } else {
2607 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2608 }
2609 }
2610 }
2611
2612
2613 void
2614 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2615 {
2616 int r,c;
2617 struct vnode *vp;
2618 int acd;
2619
2620
2621 /* We take this opportunity to close the vnodes like we should.. */
2622
2623 for (c = 0; c < raidPtr->numCol; c++) {
2624 vp = raidPtr->raid_cinfo[c].ci_vp;
2625 acd = raidPtr->Disks[c].auto_configured;
2626 rf_close_component(raidPtr, vp, acd);
2627 raidPtr->raid_cinfo[c].ci_vp = NULL;
2628 raidPtr->Disks[c].auto_configured = 0;
2629 }
2630
2631 for (r = 0; r < raidPtr->numSpare; r++) {
2632 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2633 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2634 rf_close_component(raidPtr, vp, acd);
2635 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2636 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2637 }
2638 }
2639
2640
2641 void
2642 rf_ReconThread(struct rf_recon_req_internal *req)
2643 {
2644 int s;
2645 RF_Raid_t *raidPtr;
2646
2647 s = splbio();
2648 raidPtr = (RF_Raid_t *) req->raidPtr;
2649 raidPtr->recon_in_progress = 1;
2650
2651 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2652 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2653
2654 RF_Free(req, sizeof(*req));
2655
2656 raidPtr->recon_in_progress = 0;
2657 splx(s);
2658
2659 /* That's all... */
2660 kthread_exit(0); /* does not return */
2661 }
2662
2663 void
2664 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2665 {
2666 int retcode;
2667 int s;
2668
2669 raidPtr->parity_rewrite_stripes_done = 0;
2670 raidPtr->parity_rewrite_in_progress = 1;
2671 s = splbio();
2672 retcode = rf_RewriteParity(raidPtr);
2673 splx(s);
2674 if (retcode) {
2675 printf("raid%d: Error re-writing parity (%d)!\n",
2676 raidPtr->raidid, retcode);
2677 } else {
2678 /* set the clean bit! If we shutdown correctly,
2679 the clean bit on each component label will get
2680 set */
2681 raidPtr->parity_good = RF_RAID_CLEAN;
2682 }
2683 raidPtr->parity_rewrite_in_progress = 0;
2684
2685 /* Anyone waiting for us to stop? If so, inform them... */
2686 if (raidPtr->waitShutdown) {
2687 rf_lock_mutex2(raidPtr->rad_lock);
2688 cv_broadcast(&raidPtr->parity_rewrite_cv);
2689 rf_unlock_mutex2(raidPtr->rad_lock);
2690 }
2691
2692 /* That's all... */
2693 kthread_exit(0); /* does not return */
2694 }
2695
2696
2697 void
2698 rf_CopybackThread(RF_Raid_t *raidPtr)
2699 {
2700 int s;
2701
2702 raidPtr->copyback_in_progress = 1;
2703 s = splbio();
2704 rf_CopybackReconstructedData(raidPtr);
2705 splx(s);
2706 raidPtr->copyback_in_progress = 0;
2707
2708 /* That's all... */
2709 kthread_exit(0); /* does not return */
2710 }
2711
2712
2713 void
2714 rf_ReconstructInPlaceThread(struct rf_recon_req_internal *req)
2715 {
2716 int s;
2717 RF_Raid_t *raidPtr;
2718
2719 s = splbio();
2720 raidPtr = req->raidPtr;
2721 raidPtr->recon_in_progress = 1;
2722 rf_ReconstructInPlace(raidPtr, req->col);
2723 RF_Free(req, sizeof(*req));
2724 raidPtr->recon_in_progress = 0;
2725 splx(s);
2726
2727 /* That's all... */
2728 kthread_exit(0); /* does not return */
2729 }
2730
2731 static RF_AutoConfig_t *
2732 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
2733 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
2734 unsigned secsize)
2735 {
2736 int good_one = 0;
2737 RF_ComponentLabel_t *clabel;
2738 RF_AutoConfig_t *ac;
2739
2740 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
2741 if (clabel == NULL) {
2742 oomem:
2743 while(ac_list) {
2744 ac = ac_list;
2745 if (ac->clabel)
2746 free(ac->clabel, M_RAIDFRAME);
2747 ac_list = ac_list->next;
2748 free(ac, M_RAIDFRAME);
2749 }
2750 printf("RAID auto config: out of memory!\n");
2751 return NULL; /* XXX probably should panic? */
2752 }
2753
2754 if (!raidread_component_label(secsize, dev, vp, clabel)) {
2755 /* Got the label. Does it look reasonable? */
2756 if (rf_reasonable_label(clabel, numsecs) &&
2757 (rf_component_label_partitionsize(clabel) <= size)) {
2758 #ifdef DEBUG
2759 printf("Component on: %s: %llu\n",
2760 cname, (unsigned long long)size);
2761 rf_print_component_label(clabel);
2762 #endif
2763 /* if it's reasonable, add it, else ignore it. */
2764 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
2765 M_NOWAIT);
2766 if (ac == NULL) {
2767 free(clabel, M_RAIDFRAME);
2768 goto oomem;
2769 }
2770 strlcpy(ac->devname, cname, sizeof(ac->devname));
2771 ac->dev = dev;
2772 ac->vp = vp;
2773 ac->clabel = clabel;
2774 ac->next = ac_list;
2775 ac_list = ac;
2776 good_one = 1;
2777 }
2778 }
2779 if (!good_one) {
2780 /* cleanup */
2781 free(clabel, M_RAIDFRAME);
2782 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2783 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2784 vput(vp);
2785 }
2786 return ac_list;
2787 }
2788
2789 RF_AutoConfig_t *
2790 rf_find_raid_components(void)
2791 {
2792 struct vnode *vp;
2793 struct disklabel label;
2794 device_t dv;
2795 deviter_t di;
2796 dev_t dev;
2797 int bmajor, bminor, wedge, rf_part_found;
2798 int error;
2799 int i;
2800 RF_AutoConfig_t *ac_list;
2801 uint64_t numsecs;
2802 unsigned secsize;
2803 int dowedges;
2804
2805 /* initialize the AutoConfig list */
2806 ac_list = NULL;
2807
2808 /*
2809 * we begin by trolling through *all* the devices on the system *twice*
2810 * first we scan for wedges, second for other devices. This avoids
2811 * using a raw partition instead of a wedge that covers the whole disk
2812 */
2813
2814 for (dowedges=1; dowedges>=0; --dowedges) {
2815 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
2816 dv = deviter_next(&di)) {
2817
2818 /* we are only interested in disks... */
2819 if (device_class(dv) != DV_DISK)
2820 continue;
2821
2822 /* we don't care about floppies... */
2823 if (device_is_a(dv, "fd")) {
2824 continue;
2825 }
2826
2827 /* we don't care about CD's... */
2828 if (device_is_a(dv, "cd")) {
2829 continue;
2830 }
2831
2832 /* we don't care about md's... */
2833 if (device_is_a(dv, "md")) {
2834 continue;
2835 }
2836
2837 /* hdfd is the Atari/Hades floppy driver */
2838 if (device_is_a(dv, "hdfd")) {
2839 continue;
2840 }
2841
2842 /* fdisa is the Atari/Milan floppy driver */
2843 if (device_is_a(dv, "fdisa")) {
2844 continue;
2845 }
2846
2847 /* are we in the wedges pass ? */
2848 wedge = device_is_a(dv, "dk");
2849 if (wedge != dowedges) {
2850 continue;
2851 }
2852
2853 /* need to find the device_name_to_block_device_major stuff */
2854 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
2855
2856 rf_part_found = 0; /*No raid partition as yet*/
2857
2858 /* get a vnode for the raw partition of this disk */
2859 bminor = minor(device_unit(dv));
2860 dev = wedge ? makedev(bmajor, bminor) :
2861 MAKEDISKDEV(bmajor, bminor, RAW_PART);
2862 if (bdevvp(dev, &vp))
2863 panic("RAID can't alloc vnode");
2864
2865 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
2866
2867 if (error) {
2868 /* "Who cares." Continue looking
2869 for something that exists*/
2870 vput(vp);
2871 continue;
2872 }
2873
2874 error = getdisksize(vp, &numsecs, &secsize);
2875 if (error) {
2876 /*
2877 * Pseudo devices like vnd and cgd can be
2878 * opened but may still need some configuration.
2879 * Ignore these quietly.
2880 */
2881 if (error != ENXIO)
2882 printf("RAIDframe: can't get disk size"
2883 " for dev %s (%d)\n",
2884 device_xname(dv), error);
2885 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2886 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2887 vput(vp);
2888 continue;
2889 }
2890 if (wedge) {
2891 struct dkwedge_info dkw;
2892 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
2893 NOCRED);
2894 if (error) {
2895 printf("RAIDframe: can't get wedge info for "
2896 "dev %s (%d)\n", device_xname(dv), error);
2897 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2898 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2899 vput(vp);
2900 continue;
2901 }
2902
2903 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
2904 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2905 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2906 vput(vp);
2907 continue;
2908 }
2909
2910 ac_list = rf_get_component(ac_list, dev, vp,
2911 device_xname(dv), dkw.dkw_size, numsecs, secsize);
2912 rf_part_found = 1; /*There is a raid component on this disk*/
2913 continue;
2914 }
2915
2916 /* Ok, the disk exists. Go get the disklabel. */
2917 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
2918 if (error) {
2919 /*
2920 * XXX can't happen - open() would
2921 * have errored out (or faked up one)
2922 */
2923 if (error != ENOTTY)
2924 printf("RAIDframe: can't get label for dev "
2925 "%s (%d)\n", device_xname(dv), error);
2926 }
2927
2928 /* don't need this any more. We'll allocate it again
2929 a little later if we really do... */
2930 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2931 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2932 vput(vp);
2933
2934 if (error)
2935 continue;
2936
2937 rf_part_found = 0; /*No raid partitions yet*/
2938 for (i = 0; i < label.d_npartitions; i++) {
2939 char cname[sizeof(ac_list->devname)];
2940
2941 /* We only support partitions marked as RAID */
2942 if (label.d_partitions[i].p_fstype != FS_RAID)
2943 continue;
2944
2945 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
2946 if (bdevvp(dev, &vp))
2947 panic("RAID can't alloc vnode");
2948
2949 error = VOP_OPEN(vp, FREAD, NOCRED);
2950 if (error) {
2951 /* Whatever... */
2952 vput(vp);
2953 continue;
2954 }
2955 snprintf(cname, sizeof(cname), "%s%c",
2956 device_xname(dv), 'a' + i);
2957 ac_list = rf_get_component(ac_list, dev, vp, cname,
2958 label.d_partitions[i].p_size, numsecs, secsize);
2959 rf_part_found = 1; /*There is at least one raid partition on this disk*/
2960 }
2961
2962 /*
2963 *If there is no raid component on this disk, either in a
2964 *disklabel or inside a wedge, check the raw partition as well,
2965 *as it is possible to configure raid components on raw disk
2966 *devices.
2967 */
2968
2969 if (!rf_part_found) {
2970 char cname[sizeof(ac_list->devname)];
2971
2972 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
2973 if (bdevvp(dev, &vp))
2974 panic("RAID can't alloc vnode");
2975
2976 error = VOP_OPEN(vp, FREAD, NOCRED);
2977 if (error) {
2978 /* Whatever... */
2979 vput(vp);
2980 continue;
2981 }
2982 snprintf(cname, sizeof(cname), "%s%c",
2983 device_xname(dv), 'a' + RAW_PART);
2984 ac_list = rf_get_component(ac_list, dev, vp, cname,
2985 label.d_partitions[RAW_PART].p_size, numsecs, secsize);
2986 }
2987 }
2988 deviter_release(&di);
2989 }
2990 return ac_list;
2991 }
2992
2993
2994 int
2995 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
2996 {
2997
2998 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2999 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
3000 ((clabel->clean == RF_RAID_CLEAN) ||
3001 (clabel->clean == RF_RAID_DIRTY)) &&
3002 clabel->row >=0 &&
3003 clabel->column >= 0 &&
3004 clabel->num_rows > 0 &&
3005 clabel->num_columns > 0 &&
3006 clabel->row < clabel->num_rows &&
3007 clabel->column < clabel->num_columns &&
3008 clabel->blockSize > 0 &&
3009 /*
3010 * numBlocksHi may contain garbage, but it is ok since
3011 * the type is unsigned. If it is really garbage,
3012 * rf_fix_old_label_size() will fix it.
3013 */
3014 rf_component_label_numblocks(clabel) > 0) {
3015 /*
3016 * label looks reasonable enough...
3017 * let's make sure it has no old garbage.
3018 */
3019 if (numsecs)
3020 rf_fix_old_label_size(clabel, numsecs);
3021 return(1);
3022 }
3023 return(0);
3024 }
3025
3026
3027 /*
3028 * For reasons yet unknown, some old component labels have garbage in
3029 * the newer numBlocksHi region, and this causes lossage. Since those
3030 * disks will also have numsecs set to less than 32 bits of sectors,
3031 * we can determine when this corruption has occurred, and fix it.
3032 *
3033 * The exact same problem, with the same unknown reason, happens to
3034 * the partitionSizeHi member as well.
3035 */
3036 static void
3037 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3038 {
3039
3040 if (numsecs < ((uint64_t)1 << 32)) {
3041 if (clabel->numBlocksHi) {
3042 printf("WARNING: total sectors < 32 bits, yet "
3043 "numBlocksHi set\n"
3044 "WARNING: resetting numBlocksHi to zero.\n");
3045 clabel->numBlocksHi = 0;
3046 }
3047
3048 if (clabel->partitionSizeHi) {
3049 printf("WARNING: total sectors < 32 bits, yet "
3050 "partitionSizeHi set\n"
3051 "WARNING: resetting partitionSizeHi to zero.\n");
3052 clabel->partitionSizeHi = 0;
3053 }
3054 }
3055 }
3056
3057
3058 #ifdef DEBUG
3059 void
3060 rf_print_component_label(RF_ComponentLabel_t *clabel)
3061 {
3062 uint64_t numBlocks;
3063 static const char *rp[] = {
3064 "No", "Force", "Soft", "*invalid*"
3065 };
3066
3067
3068 numBlocks = rf_component_label_numblocks(clabel);
3069
3070 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3071 clabel->row, clabel->column,
3072 clabel->num_rows, clabel->num_columns);
3073 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3074 clabel->version, clabel->serial_number,
3075 clabel->mod_counter);
3076 printf(" Clean: %s Status: %d\n",
3077 clabel->clean ? "Yes" : "No", clabel->status);
3078 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3079 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3080 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3081 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3082 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3083 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]);
3084 printf(" Last configured as: raid%d\n", clabel->last_unit);
3085 #if 0
3086 printf(" Config order: %d\n", clabel->config_order);
3087 #endif
3088
3089 }
3090 #endif
3091
3092 RF_ConfigSet_t *
3093 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3094 {
3095 RF_AutoConfig_t *ac;
3096 RF_ConfigSet_t *config_sets;
3097 RF_ConfigSet_t *cset;
3098 RF_AutoConfig_t *ac_next;
3099
3100
3101 config_sets = NULL;
3102
3103 /* Go through the AutoConfig list, and figure out which components
3104 belong to what sets. */
3105 ac = ac_list;
3106 while(ac!=NULL) {
3107 /* we're going to putz with ac->next, so save it here
3108 for use at the end of the loop */
3109 ac_next = ac->next;
3110
3111 if (config_sets == NULL) {
3112 /* will need at least this one... */
3113 config_sets = (RF_ConfigSet_t *)
3114 malloc(sizeof(RF_ConfigSet_t),
3115 M_RAIDFRAME, M_NOWAIT);
3116 if (config_sets == NULL) {
3117 panic("rf_create_auto_sets: No memory!");
3118 }
3119 /* this one is easy :) */
3120 config_sets->ac = ac;
3121 config_sets->next = NULL;
3122 config_sets->rootable = 0;
3123 ac->next = NULL;
3124 } else {
3125 /* which set does this component fit into? */
3126 cset = config_sets;
3127 while(cset!=NULL) {
3128 if (rf_does_it_fit(cset, ac)) {
3129 /* looks like it matches... */
3130 ac->next = cset->ac;
3131 cset->ac = ac;
3132 break;
3133 }
3134 cset = cset->next;
3135 }
3136 if (cset==NULL) {
3137 /* didn't find a match above... new set..*/
3138 cset = (RF_ConfigSet_t *)
3139 malloc(sizeof(RF_ConfigSet_t),
3140 M_RAIDFRAME, M_NOWAIT);
3141 if (cset == NULL) {
3142 panic("rf_create_auto_sets: No memory!");
3143 }
3144 cset->ac = ac;
3145 ac->next = NULL;
3146 cset->next = config_sets;
3147 cset->rootable = 0;
3148 config_sets = cset;
3149 }
3150 }
3151 ac = ac_next;
3152 }
3153
3154
3155 return(config_sets);
3156 }
3157
3158 static int
3159 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3160 {
3161 RF_ComponentLabel_t *clabel1, *clabel2;
3162
3163 /* If this one matches the *first* one in the set, that's good
3164 enough, since the other members of the set would have been
3165 through here too... */
3166 /* note that we are not checking partitionSize here..
3167
3168 Note that we are also not checking the mod_counters here.
3169 If everything else matches except the mod_counter, that's
3170 good enough for this test. We will deal with the mod_counters
3171 a little later in the autoconfiguration process.
3172
3173 (clabel1->mod_counter == clabel2->mod_counter) &&
3174
3175 The reason we don't check for this is that failed disks
3176 will have lower modification counts. If those disks are
3177 not added to the set they used to belong to, then they will
3178 form their own set, which may result in 2 different sets,
3179 for example, competing to be configured at raid0, and
3180 perhaps competing to be the root filesystem set. If the
3181 wrong ones get configured, or both attempt to become /,
3182 weird behaviour and or serious lossage will occur. Thus we
3183 need to bring them into the fold here, and kick them out at
3184 a later point.
3185
3186 */
3187
3188 clabel1 = cset->ac->clabel;
3189 clabel2 = ac->clabel;
3190 if ((clabel1->version == clabel2->version) &&
3191 (clabel1->serial_number == clabel2->serial_number) &&
3192 (clabel1->num_rows == clabel2->num_rows) &&
3193 (clabel1->num_columns == clabel2->num_columns) &&
3194 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3195 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3196 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3197 (clabel1->parityConfig == clabel2->parityConfig) &&
3198 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3199 (clabel1->blockSize == clabel2->blockSize) &&
3200 rf_component_label_numblocks(clabel1) ==
3201 rf_component_label_numblocks(clabel2) &&
3202 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3203 (clabel1->root_partition == clabel2->root_partition) &&
3204 (clabel1->last_unit == clabel2->last_unit) &&
3205 (clabel1->config_order == clabel2->config_order)) {
3206 /* if it get's here, it almost *has* to be a match */
3207 } else {
3208 /* it's not consistent with somebody in the set..
3209 punt */
3210 return(0);
3211 }
3212 /* all was fine.. it must fit... */
3213 return(1);
3214 }
3215
3216 int
3217 rf_have_enough_components(RF_ConfigSet_t *cset)
3218 {
3219 RF_AutoConfig_t *ac;
3220 RF_AutoConfig_t *auto_config;
3221 RF_ComponentLabel_t *clabel;
3222 int c;
3223 int num_cols;
3224 int num_missing;
3225 int mod_counter;
3226 int mod_counter_found;
3227 int even_pair_failed;
3228 char parity_type;
3229
3230
3231 /* check to see that we have enough 'live' components
3232 of this set. If so, we can configure it if necessary */
3233
3234 num_cols = cset->ac->clabel->num_columns;
3235 parity_type = cset->ac->clabel->parityConfig;
3236
3237 /* XXX Check for duplicate components!?!?!? */
3238
3239 /* Determine what the mod_counter is supposed to be for this set. */
3240
3241 mod_counter_found = 0;
3242 mod_counter = 0;
3243 ac = cset->ac;
3244 while(ac!=NULL) {
3245 if (mod_counter_found==0) {
3246 mod_counter = ac->clabel->mod_counter;
3247 mod_counter_found = 1;
3248 } else {
3249 if (ac->clabel->mod_counter > mod_counter) {
3250 mod_counter = ac->clabel->mod_counter;
3251 }
3252 }
3253 ac = ac->next;
3254 }
3255
3256 num_missing = 0;
3257 auto_config = cset->ac;
3258
3259 even_pair_failed = 0;
3260 for(c=0; c<num_cols; c++) {
3261 ac = auto_config;
3262 while(ac!=NULL) {
3263 if ((ac->clabel->column == c) &&
3264 (ac->clabel->mod_counter == mod_counter)) {
3265 /* it's this one... */
3266 #ifdef DEBUG
3267 printf("Found: %s at %d\n",
3268 ac->devname,c);
3269 #endif
3270 break;
3271 }
3272 ac=ac->next;
3273 }
3274 if (ac==NULL) {
3275 /* Didn't find one here! */
3276 /* special case for RAID 1, especially
3277 where there are more than 2
3278 components (where RAIDframe treats
3279 things a little differently :( ) */
3280 if (parity_type == '1') {
3281 if (c%2 == 0) { /* even component */
3282 even_pair_failed = 1;
3283 } else { /* odd component. If
3284 we're failed, and
3285 so is the even
3286 component, it's
3287 "Good Night, Charlie" */
3288 if (even_pair_failed == 1) {
3289 return(0);
3290 }
3291 }
3292 } else {
3293 /* normal accounting */
3294 num_missing++;
3295 }
3296 }
3297 if ((parity_type == '1') && (c%2 == 1)) {
3298 /* Just did an even component, and we didn't
3299 bail.. reset the even_pair_failed flag,
3300 and go on to the next component.... */
3301 even_pair_failed = 0;
3302 }
3303 }
3304
3305 clabel = cset->ac->clabel;
3306
3307 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3308 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3309 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3310 /* XXX this needs to be made *much* more general */
3311 /* Too many failures */
3312 return(0);
3313 }
3314 /* otherwise, all is well, and we've got enough to take a kick
3315 at autoconfiguring this set */
3316 return(1);
3317 }
3318
3319 void
3320 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3321 RF_Raid_t *raidPtr)
3322 {
3323 RF_ComponentLabel_t *clabel;
3324 int i;
3325
3326 clabel = ac->clabel;
3327
3328 /* 1. Fill in the common stuff */
3329 config->numCol = clabel->num_columns;
3330 config->numSpare = 0; /* XXX should this be set here? */
3331 config->sectPerSU = clabel->sectPerSU;
3332 config->SUsPerPU = clabel->SUsPerPU;
3333 config->SUsPerRU = clabel->SUsPerRU;
3334 config->parityConfig = clabel->parityConfig;
3335 /* XXX... */
3336 strcpy(config->diskQueueType,"fifo");
3337 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3338 config->layoutSpecificSize = 0; /* XXX ?? */
3339
3340 while(ac!=NULL) {
3341 /* row/col values will be in range due to the checks
3342 in reasonable_label() */
3343 strcpy(config->devnames[0][ac->clabel->column],
3344 ac->devname);
3345 ac = ac->next;
3346 }
3347
3348 for(i=0;i<RF_MAXDBGV;i++) {
3349 config->debugVars[i][0] = 0;
3350 }
3351 }
3352
3353 int
3354 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3355 {
3356 RF_ComponentLabel_t *clabel;
3357 int column;
3358 int sparecol;
3359
3360 raidPtr->autoconfigure = new_value;
3361
3362 for(column=0; column<raidPtr->numCol; column++) {
3363 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3364 clabel = raidget_component_label(raidPtr, column);
3365 clabel->autoconfigure = new_value;
3366 raidflush_component_label(raidPtr, column);
3367 }
3368 }
3369 for(column = 0; column < raidPtr->numSpare ; column++) {
3370 sparecol = raidPtr->numCol + column;
3371 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3372 clabel = raidget_component_label(raidPtr, sparecol);
3373 clabel->autoconfigure = new_value;
3374 raidflush_component_label(raidPtr, sparecol);
3375 }
3376 }
3377 return(new_value);
3378 }
3379
3380 int
3381 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3382 {
3383 RF_ComponentLabel_t *clabel;
3384 int column;
3385 int sparecol;
3386
3387 raidPtr->root_partition = new_value;
3388 for(column=0; column<raidPtr->numCol; column++) {
3389 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3390 clabel = raidget_component_label(raidPtr, column);
3391 clabel->root_partition = new_value;
3392 raidflush_component_label(raidPtr, column);
3393 }
3394 }
3395 for(column = 0; column < raidPtr->numSpare ; column++) {
3396 sparecol = raidPtr->numCol + column;
3397 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3398 clabel = raidget_component_label(raidPtr, sparecol);
3399 clabel->root_partition = new_value;
3400 raidflush_component_label(raidPtr, sparecol);
3401 }
3402 }
3403 return(new_value);
3404 }
3405
3406 void
3407 rf_release_all_vps(RF_ConfigSet_t *cset)
3408 {
3409 RF_AutoConfig_t *ac;
3410
3411 ac = cset->ac;
3412 while(ac!=NULL) {
3413 /* Close the vp, and give it back */
3414 if (ac->vp) {
3415 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3416 VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
3417 vput(ac->vp);
3418 ac->vp = NULL;
3419 }
3420 ac = ac->next;
3421 }
3422 }
3423
3424
3425 void
3426 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3427 {
3428 RF_AutoConfig_t *ac;
3429 RF_AutoConfig_t *next_ac;
3430
3431 ac = cset->ac;
3432 while(ac!=NULL) {
3433 next_ac = ac->next;
3434 /* nuke the label */
3435 free(ac->clabel, M_RAIDFRAME);
3436 /* cleanup the config structure */
3437 free(ac, M_RAIDFRAME);
3438 /* "next.." */
3439 ac = next_ac;
3440 }
3441 /* and, finally, nuke the config set */
3442 free(cset, M_RAIDFRAME);
3443 }
3444
3445
3446 void
3447 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3448 {
3449 /* current version number */
3450 clabel->version = RF_COMPONENT_LABEL_VERSION;
3451 clabel->serial_number = raidPtr->serial_number;
3452 clabel->mod_counter = raidPtr->mod_counter;
3453
3454 clabel->num_rows = 1;
3455 clabel->num_columns = raidPtr->numCol;
3456 clabel->clean = RF_RAID_DIRTY; /* not clean */
3457 clabel->status = rf_ds_optimal; /* "It's good!" */
3458
3459 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3460 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3461 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3462
3463 clabel->blockSize = raidPtr->bytesPerSector;
3464 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3465
3466 /* XXX not portable */
3467 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3468 clabel->maxOutstanding = raidPtr->maxOutstanding;
3469 clabel->autoconfigure = raidPtr->autoconfigure;
3470 clabel->root_partition = raidPtr->root_partition;
3471 clabel->last_unit = raidPtr->raidid;
3472 clabel->config_order = raidPtr->config_order;
3473
3474 #ifndef RF_NO_PARITY_MAP
3475 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3476 #endif
3477 }
3478
3479 struct raid_softc *
3480 rf_auto_config_set(RF_ConfigSet_t *cset)
3481 {
3482 RF_Raid_t *raidPtr;
3483 RF_Config_t *config;
3484 int raidID;
3485 struct raid_softc *sc;
3486
3487 #ifdef DEBUG
3488 printf("RAID autoconfigure\n");
3489 #endif
3490
3491 /* 1. Create a config structure */
3492 config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO);
3493 if (config == NULL) {
3494 printf("%s: Out of mem - config!?!?\n", __func__);
3495 /* XXX do something more intelligent here. */
3496 return NULL;
3497 }
3498
3499 /*
3500 2. Figure out what RAID ID this one is supposed to live at
3501 See if we can get the same RAID dev that it was configured
3502 on last time..
3503 */
3504
3505 raidID = cset->ac->clabel->last_unit;
3506 for (sc = raidget(raidID, false); sc && sc->sc_r.valid != 0;
3507 sc = raidget(++raidID, false))
3508 continue;
3509 #ifdef DEBUG
3510 printf("Configuring raid%d:\n",raidID);
3511 #endif
3512
3513 if (sc == NULL)
3514 sc = raidget(raidID, true);
3515 if (sc == NULL) {
3516 printf("%s: Out of mem - softc!?!?\n", __func__);
3517 /* XXX do something more intelligent here. */
3518 free(config, M_RAIDFRAME);
3519 return NULL;
3520 }
3521
3522 raidPtr = &sc->sc_r;
3523
3524 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3525 raidPtr->softc = sc;
3526 raidPtr->raidid = raidID;
3527 raidPtr->openings = RAIDOUTSTANDING;
3528
3529 /* 3. Build the configuration structure */
3530 rf_create_configuration(cset->ac, config, raidPtr);
3531
3532 /* 4. Do the configuration */
3533 if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3534 raidinit(sc);
3535
3536 rf_markalldirty(raidPtr);
3537 raidPtr->autoconfigure = 1; /* XXX do this here? */
3538 switch (cset->ac->clabel->root_partition) {
3539 case 1: /* Force Root */
3540 case 2: /* Soft Root: root when boot partition part of raid */
3541 /*
3542 * everything configured just fine. Make a note
3543 * that this set is eligible to be root,
3544 * or forced to be root
3545 */
3546 cset->rootable = cset->ac->clabel->root_partition;
3547 /* XXX do this here? */
3548 raidPtr->root_partition = cset->rootable;
3549 break;
3550 default:
3551 break;
3552 }
3553 } else {
3554 raidput(sc);
3555 sc = NULL;
3556 }
3557
3558 /* 5. Cleanup */
3559 free(config, M_RAIDFRAME);
3560 return sc;
3561 }
3562
3563 void
3564 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3565 size_t xmin, size_t xmax)
3566 {
3567 int error;
3568
3569 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3570 pool_sethiwat(p, xmax);
3571 if ((error = pool_prime(p, xmin)) != 0)
3572 panic("%s: failed to prime pool: %d", __func__, error);
3573 pool_setlowat(p, xmin);
3574 }
3575
3576 /*
3577 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buffer queue
3578 * to see if there is IO pending and if that IO could possibly be done
3579 * for a given RAID set. Returns 0 if IO is waiting and can be done, 1
3580 * otherwise.
3581 *
3582 */
3583 int
3584 rf_buf_queue_check(RF_Raid_t *raidPtr)
3585 {
3586 struct raid_softc *rs;
3587 struct dk_softc *dksc;
3588
3589 rs = raidPtr->softc;
3590 dksc = &rs->sc_dksc;
3591
3592 if ((rs->sc_flags & RAIDF_INITED) == 0)
3593 return 1;
3594
3595 if (dk_strategy_pending(dksc) && raidPtr->openings > 0) {
3596 /* there is work to do */
3597 return 0;
3598 }
3599 /* default is nothing to do */
3600 return 1;
3601 }
3602
3603 int
3604 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3605 {
3606 uint64_t numsecs;
3607 unsigned secsize;
3608 int error;
3609
3610 error = getdisksize(vp, &numsecs, &secsize);
3611 if (error == 0) {
3612 diskPtr->blockSize = secsize;
3613 diskPtr->numBlocks = numsecs - rf_protectedSectors;
3614 diskPtr->partitionSize = numsecs;
3615 return 0;
3616 }
3617 return error;
3618 }
3619
3620 static int
3621 raid_match(device_t self, cfdata_t cfdata, void *aux)
3622 {
3623 return 1;
3624 }
3625
3626 static void
3627 raid_attach(device_t parent, device_t self, void *aux)
3628 {
3629 }
3630
3631
3632 static int
3633 raid_detach(device_t self, int flags)
3634 {
3635 int error;
3636 struct raid_softc *rs = raidsoftc(self);
3637
3638 if (rs == NULL)
3639 return ENXIO;
3640
3641 if ((error = raidlock(rs)) != 0)
3642 return (error);
3643
3644 error = raid_detach_unlocked(rs);
3645
3646 raidunlock(rs);
3647
3648 /* XXX raid can be referenced here */
3649
3650 if (error)
3651 return error;
3652
3653 /* Free the softc */
3654 raidput(rs);
3655
3656 return 0;
3657 }
3658
3659 static void
3660 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
3661 {
3662 struct dk_softc *dksc = &rs->sc_dksc;
3663 struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
3664
3665 memset(dg, 0, sizeof(*dg));
3666
3667 dg->dg_secperunit = raidPtr->totalSectors;
3668 dg->dg_secsize = raidPtr->bytesPerSector;
3669 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
3670 dg->dg_ntracks = 4 * raidPtr->numCol;
3671
3672 disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, NULL);
3673 }
3674
3675 /*
3676 * Get cache info for all the components (including spares).
3677 * Returns intersection of all the cache flags of all disks, or first
3678 * error if any encountered.
3679 * XXXfua feature flags can change as spares are added - lock down somehow
3680 */
3681 static int
3682 rf_get_component_caches(RF_Raid_t *raidPtr, int *data)
3683 {
3684 int c;
3685 int error;
3686 int dkwhole = 0, dkpart;
3687
3688 for (c = 0; c < raidPtr->numCol + raidPtr->numSpare; c++) {
3689 /*
3690 * Check any non-dead disk, even when currently being
3691 * reconstructed.
3692 */
3693 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)
3694 || raidPtr->Disks[c].status == rf_ds_reconstructing) {
3695 error = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp,
3696 DIOCGCACHE, &dkpart, FREAD, NOCRED);
3697 if (error) {
3698 if (error != ENODEV) {
3699 printf("raid%d: get cache for component %s failed\n",
3700 raidPtr->raidid,
3701 raidPtr->Disks[c].devname);
3702 }
3703
3704 return error;
3705 }
3706
3707 if (c == 0)
3708 dkwhole = dkpart;
3709 else
3710 dkwhole = DKCACHE_COMBINE(dkwhole, dkpart);
3711 }
3712 }
3713
3714 *data = dkwhole;
3715
3716 return 0;
3717 }
3718
3719 /*
3720 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3721 * We end up returning whatever error was returned by the first cache flush
3722 * that fails.
3723 */
3724
3725 int
3726 rf_sync_component_caches(RF_Raid_t *raidPtr)
3727 {
3728 int c, sparecol;
3729 int e,error;
3730 int force = 1;
3731
3732 error = 0;
3733 for (c = 0; c < raidPtr->numCol; c++) {
3734 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3735 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3736 &force, FWRITE, NOCRED);
3737 if (e) {
3738 if (e != ENODEV)
3739 printf("raid%d: cache flush to component %s failed.\n",
3740 raidPtr->raidid, raidPtr->Disks[c].devname);
3741 if (error == 0) {
3742 error = e;
3743 }
3744 }
3745 }
3746 }
3747
3748 for( c = 0; c < raidPtr->numSpare ; c++) {
3749 sparecol = raidPtr->numCol + c;
3750 /* Need to ensure that the reconstruct actually completed! */
3751 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3752 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3753 DIOCCACHESYNC, &force, FWRITE, NOCRED);
3754 if (e) {
3755 if (e != ENODEV)
3756 printf("raid%d: cache flush to component %s failed.\n",
3757 raidPtr->raidid, raidPtr->Disks[sparecol].devname);
3758 if (error == 0) {
3759 error = e;
3760 }
3761 }
3762 }
3763 }
3764 return error;
3765 }
3766
3767 /* Fill in info with the current status */
3768 void
3769 rf_check_recon_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3770 {
3771
3772 if (raidPtr->status != rf_rs_reconstructing) {
3773 info->total = 100;
3774 info->completed = 100;
3775 } else {
3776 info->total = raidPtr->reconControl->numRUsTotal;
3777 info->completed = raidPtr->reconControl->numRUsComplete;
3778 }
3779 info->remaining = info->total - info->completed;
3780 }
3781
3782 /* Fill in info with the current status */
3783 void
3784 rf_check_parityrewrite_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3785 {
3786
3787 if (raidPtr->parity_rewrite_in_progress == 1) {
3788 info->total = raidPtr->Layout.numStripe;
3789 info->completed = raidPtr->parity_rewrite_stripes_done;
3790 } else {
3791 info->completed = 100;
3792 info->total = 100;
3793 }
3794 info->remaining = info->total - info->completed;
3795 }
3796
3797 /* Fill in info with the current status */
3798 void
3799 rf_check_copyback_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3800 {
3801
3802 if (raidPtr->copyback_in_progress == 1) {
3803 info->total = raidPtr->Layout.numStripe;
3804 info->completed = raidPtr->copyback_stripes_done;
3805 info->remaining = info->total - info->completed;
3806 } else {
3807 info->remaining = 0;
3808 info->completed = 100;
3809 info->total = 100;
3810 }
3811 }
3812
3813 /* Fill in config with the current info */
3814 int
3815 rf_get_info(RF_Raid_t *raidPtr, RF_DeviceConfig_t *config)
3816 {
3817 int d, i, j;
3818
3819 if (!raidPtr->valid)
3820 return (ENODEV);
3821 config->cols = raidPtr->numCol;
3822 config->ndevs = raidPtr->numCol;
3823 if (config->ndevs >= RF_MAX_DISKS)
3824 return (ENOMEM);
3825 config->nspares = raidPtr->numSpare;
3826 if (config->nspares >= RF_MAX_DISKS)
3827 return (ENOMEM);
3828 config->maxqdepth = raidPtr->maxQueueDepth;
3829 d = 0;
3830 for (j = 0; j < config->cols; j++) {
3831 config->devs[d] = raidPtr->Disks[j];
3832 d++;
3833 }
3834 for (j = config->cols, i = 0; i < config->nspares; i++, j++) {
3835 config->spares[i] = raidPtr->Disks[j];
3836 if (config->spares[i].status == rf_ds_rebuilding_spare) {
3837 /* XXX: raidctl(8) expects to see this as a used spare */
3838 config->spares[i].status = rf_ds_used_spare;
3839 }
3840 }
3841 return 0;
3842 }
3843
3844 int
3845 rf_get_component_label(RF_Raid_t *raidPtr, void *data)
3846 {
3847 RF_ComponentLabel_t *clabel = (RF_ComponentLabel_t *)data;
3848 RF_ComponentLabel_t *raid_clabel;
3849 int column = clabel->column;
3850
3851 if ((column < 0) || (column >= raidPtr->numCol + raidPtr->numSpare))
3852 return EINVAL;
3853 raid_clabel = raidget_component_label(raidPtr, column);
3854 memcpy(clabel, raid_clabel, sizeof *clabel);
3855
3856 return 0;
3857 }
3858
3859 /*
3860 * Module interface
3861 */
3862
3863 MODULE(MODULE_CLASS_DRIVER, raid, "dk_subr,bufq_fcfs");
3864
3865 #ifdef _MODULE
3866 CFDRIVER_DECL(raid, DV_DISK, NULL);
3867 #endif
3868
3869 static int raid_modcmd(modcmd_t, void *);
3870 static int raid_modcmd_init(void);
3871 static int raid_modcmd_fini(void);
3872
3873 static int
3874 raid_modcmd(modcmd_t cmd, void *data)
3875 {
3876 int error;
3877
3878 error = 0;
3879 switch (cmd) {
3880 case MODULE_CMD_INIT:
3881 error = raid_modcmd_init();
3882 break;
3883 case MODULE_CMD_FINI:
3884 error = raid_modcmd_fini();
3885 break;
3886 default:
3887 error = ENOTTY;
3888 break;
3889 }
3890 return error;
3891 }
3892
3893 static int
3894 raid_modcmd_init(void)
3895 {
3896 int error;
3897 int bmajor, cmajor;
3898
3899 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
3900 mutex_enter(&raid_lock);
3901 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
3902 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
3903 rf_init_cond2(rf_sparet_wait_cv, "sparetw");
3904 rf_init_cond2(rf_sparet_resp_cv, "rfgst");
3905
3906 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
3907 #endif
3908
3909 bmajor = cmajor = -1;
3910 error = devsw_attach("raid", &raid_bdevsw, &bmajor,
3911 &raid_cdevsw, &cmajor);
3912 if (error != 0 && error != EEXIST) {
3913 aprint_error("%s: devsw_attach failed %d\n", __func__, error);
3914 mutex_exit(&raid_lock);
3915 return error;
3916 }
3917 #ifdef _MODULE
3918 error = config_cfdriver_attach(&raid_cd);
3919 if (error != 0) {
3920 aprint_error("%s: config_cfdriver_attach failed %d\n",
3921 __func__, error);
3922 devsw_detach(&raid_bdevsw, &raid_cdevsw);
3923 mutex_exit(&raid_lock);
3924 return error;
3925 }
3926 #endif
3927 error = config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3928 if (error != 0) {
3929 aprint_error("%s: config_cfattach_attach failed %d\n",
3930 __func__, error);
3931 #ifdef _MODULE
3932 config_cfdriver_detach(&raid_cd);
3933 #endif
3934 devsw_detach(&raid_bdevsw, &raid_cdevsw);
3935 mutex_exit(&raid_lock);
3936 return error;
3937 }
3938
3939 raidautoconfigdone = false;
3940
3941 mutex_exit(&raid_lock);
3942
3943 if (error == 0) {
3944 if (rf_BootRaidframe(true) == 0)
3945 aprint_verbose("Kernelized RAIDframe activated\n");
3946 else
3947 panic("Serious error activating RAID!!");
3948 }
3949
3950 /*
3951 * Register a finalizer which will be used to auto-config RAID
3952 * sets once all real hardware devices have been found.
3953 */
3954 error = config_finalize_register(NULL, rf_autoconfig);
3955 if (error != 0) {
3956 aprint_error("WARNING: unable to register RAIDframe "
3957 "finalizer\n");
3958 error = 0;
3959 }
3960
3961 return error;
3962 }
3963
3964 static int
3965 raid_modcmd_fini(void)
3966 {
3967 int error;
3968
3969 mutex_enter(&raid_lock);
3970
3971 /* Don't allow unload if raid device(s) exist. */
3972 if (!LIST_EMPTY(&raids)) {
3973 mutex_exit(&raid_lock);
3974 return EBUSY;
3975 }
3976
3977 error = config_cfattach_detach(raid_cd.cd_name, &raid_ca);
3978 if (error != 0) {
3979 aprint_error("%s: cannot detach cfattach\n",__func__);
3980 mutex_exit(&raid_lock);
3981 return error;
3982 }
3983 #ifdef _MODULE
3984 error = config_cfdriver_detach(&raid_cd);
3985 if (error != 0) {
3986 aprint_error("%s: cannot detach cfdriver\n",__func__);
3987 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3988 mutex_exit(&raid_lock);
3989 return error;
3990 }
3991 #endif
3992 error = devsw_detach(&raid_bdevsw, &raid_cdevsw);
3993 if (error != 0) {
3994 aprint_error("%s: cannot detach devsw\n",__func__);
3995 #ifdef _MODULE
3996 config_cfdriver_attach(&raid_cd);
3997 #endif
3998 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3999 mutex_exit(&raid_lock);
4000 return error;
4001 }
4002 rf_BootRaidframe(false);
4003 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
4004 rf_destroy_mutex2(rf_sparet_wait_mutex);
4005 rf_destroy_cond2(rf_sparet_wait_cv);
4006 rf_destroy_cond2(rf_sparet_resp_cv);
4007 #endif
4008 mutex_exit(&raid_lock);
4009 mutex_destroy(&raid_lock);
4010
4011 return error;
4012 }
4013