rf_netbsdkintf.c revision 1.356 1 /* $NetBSD: rf_netbsdkintf.c,v 1.356 2018/01/23 22:42:29 pgoyette Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: cd.c 1.6 90/11/28$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
89 * School of Computer Science
90 * Carnegie Mellon University
91 * Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97 /***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.356 2018/01/23 22:42:29 pgoyette Exp $");
105
106 #ifdef _KERNEL_OPT
107 #include "opt_compat_netbsd.h"
108 #include "opt_compat_netbsd32.h"
109 #include "opt_raid_autoconfig.h"
110 #endif
111
112 #include <sys/param.h>
113 #include <sys/errno.h>
114 #include <sys/pool.h>
115 #include <sys/proc.h>
116 #include <sys/queue.h>
117 #include <sys/disk.h>
118 #include <sys/device.h>
119 #include <sys/stat.h>
120 #include <sys/ioctl.h>
121 #include <sys/fcntl.h>
122 #include <sys/systm.h>
123 #include <sys/vnode.h>
124 #include <sys/disklabel.h>
125 #include <sys/conf.h>
126 #include <sys/buf.h>
127 #include <sys/bufq.h>
128 #include <sys/reboot.h>
129 #include <sys/kauth.h>
130 #include <sys/module.h>
131
132 #include <prop/proplib.h>
133
134 #include <dev/raidframe/raidframevar.h>
135 #include <dev/raidframe/raidframeio.h>
136 #include <dev/raidframe/rf_paritymap.h>
137
138 #include "rf_raid.h"
139 #include "rf_copyback.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_desc.h"
143 #include "rf_diskqueue.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_kintf.h"
147 #include "rf_options.h"
148 #include "rf_driver.h"
149 #include "rf_parityscan.h"
150 #include "rf_threadstuff.h"
151
152 #ifdef COMPAT_50
153 #include "rf_compat50.h"
154 #endif
155
156 #ifdef COMPAT_80
157 #include "rf_compat80.h"
158 #endif
159
160 #ifdef COMPAT_NETBSD32
161 #include "rf_compat32.h"
162 #endif
163
164 #include "ioconf.h"
165
166 #ifdef DEBUG
167 int rf_kdebug_level = 0;
168 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
169 #else /* DEBUG */
170 #define db1_printf(a) { }
171 #endif /* DEBUG */
172
173 #ifdef DEBUG_ROOT
174 #define DPRINTF(a, ...) printf(a, __VA_ARGS__)
175 #else
176 #define DPRINTF(a, ...)
177 #endif
178
179 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
180 static rf_declare_mutex2(rf_sparet_wait_mutex);
181 static rf_declare_cond2(rf_sparet_wait_cv);
182 static rf_declare_cond2(rf_sparet_resp_cv);
183
184 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
185 * spare table */
186 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
187 * installation process */
188 #endif
189
190 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
191
192 /* prototypes */
193 static void KernelWakeupFunc(struct buf *);
194 static void InitBP(struct buf *, struct vnode *, unsigned,
195 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
196 void *, int, struct proc *);
197 struct raid_softc;
198 static void raidinit(struct raid_softc *);
199 static int raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp);
200 static int rf_get_component_caches(RF_Raid_t *raidPtr, int *);
201
202 static int raid_match(device_t, cfdata_t, void *);
203 static void raid_attach(device_t, device_t, void *);
204 static int raid_detach(device_t, int);
205
206 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
207 daddr_t, daddr_t);
208 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
209 daddr_t, daddr_t, int);
210
211 static int raidwrite_component_label(unsigned,
212 dev_t, struct vnode *, RF_ComponentLabel_t *);
213 static int raidread_component_label(unsigned,
214 dev_t, struct vnode *, RF_ComponentLabel_t *);
215
216 static int raid_diskstart(device_t, struct buf *bp);
217 static int raid_dumpblocks(device_t, void *, daddr_t, int);
218 static int raid_lastclose(device_t);
219
220 static dev_type_open(raidopen);
221 static dev_type_close(raidclose);
222 static dev_type_read(raidread);
223 static dev_type_write(raidwrite);
224 static dev_type_ioctl(raidioctl);
225 static dev_type_strategy(raidstrategy);
226 static dev_type_dump(raiddump);
227 static dev_type_size(raidsize);
228
229 const struct bdevsw raid_bdevsw = {
230 .d_open = raidopen,
231 .d_close = raidclose,
232 .d_strategy = raidstrategy,
233 .d_ioctl = raidioctl,
234 .d_dump = raiddump,
235 .d_psize = raidsize,
236 .d_discard = nodiscard,
237 .d_flag = D_DISK
238 };
239
240 const struct cdevsw raid_cdevsw = {
241 .d_open = raidopen,
242 .d_close = raidclose,
243 .d_read = raidread,
244 .d_write = raidwrite,
245 .d_ioctl = raidioctl,
246 .d_stop = nostop,
247 .d_tty = notty,
248 .d_poll = nopoll,
249 .d_mmap = nommap,
250 .d_kqfilter = nokqfilter,
251 .d_discard = nodiscard,
252 .d_flag = D_DISK
253 };
254
255 static struct dkdriver rf_dkdriver = {
256 .d_open = raidopen,
257 .d_close = raidclose,
258 .d_strategy = raidstrategy,
259 .d_diskstart = raid_diskstart,
260 .d_dumpblocks = raid_dumpblocks,
261 .d_lastclose = raid_lastclose,
262 .d_minphys = minphys
263 };
264
265 struct raid_softc {
266 struct dk_softc sc_dksc;
267 int sc_unit;
268 int sc_flags; /* flags */
269 int sc_cflags; /* configuration flags */
270 kmutex_t sc_mutex; /* interlock mutex */
271 kcondvar_t sc_cv; /* and the condvar */
272 uint64_t sc_size; /* size of the raid device */
273 char sc_xname[20]; /* XXX external name */
274 RF_Raid_t sc_r;
275 LIST_ENTRY(raid_softc) sc_link;
276 };
277 /* sc_flags */
278 #define RAIDF_INITED 0x01 /* unit has been initialized */
279 #define RAIDF_SHUTDOWN 0x02 /* unit is being shutdown */
280 #define RAIDF_DETACH 0x04 /* detach after final close */
281 #define RAIDF_WANTED 0x08 /* someone waiting to obtain a lock */
282 #define RAIDF_LOCKED 0x10 /* unit is locked */
283 #define RAIDF_UNIT_CHANGED 0x20 /* unit is being changed */
284
285 #define raidunit(x) DISKUNIT(x)
286 #define raidsoftc(dev) (((struct raid_softc *)device_private(dev))->sc_r.softc)
287
288 extern struct cfdriver raid_cd;
289 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
290 raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
291 DVF_DETACH_SHUTDOWN);
292
293 /* Internal representation of a rf_recon_req */
294 struct rf_recon_req_internal {
295 RF_RowCol_t col;
296 RF_ReconReqFlags_t flags;
297 void *raidPtr;
298 };
299
300 /*
301 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
302 * Be aware that large numbers can allow the driver to consume a lot of
303 * kernel memory, especially on writes, and in degraded mode reads.
304 *
305 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
306 * a single 64K write will typically require 64K for the old data,
307 * 64K for the old parity, and 64K for the new parity, for a total
308 * of 192K (if the parity buffer is not re-used immediately).
309 * Even it if is used immediately, that's still 128K, which when multiplied
310 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
311 *
312 * Now in degraded mode, for example, a 64K read on the above setup may
313 * require data reconstruction, which will require *all* of the 4 remaining
314 * disks to participate -- 4 * 32K/disk == 128K again.
315 */
316
317 #ifndef RAIDOUTSTANDING
318 #define RAIDOUTSTANDING 6
319 #endif
320
321 #define RAIDLABELDEV(dev) \
322 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
323
324 /* declared here, and made public, for the benefit of KVM stuff.. */
325
326 static int raidlock(struct raid_softc *);
327 static void raidunlock(struct raid_softc *);
328
329 static int raid_detach_unlocked(struct raid_softc *);
330
331 static void rf_markalldirty(RF_Raid_t *);
332 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
333
334 void rf_ReconThread(struct rf_recon_req_internal *);
335 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
336 void rf_CopybackThread(RF_Raid_t *raidPtr);
337 void rf_ReconstructInPlaceThread(struct rf_recon_req_internal *);
338 int rf_autoconfig(device_t);
339 void rf_buildroothack(RF_ConfigSet_t *);
340
341 RF_AutoConfig_t *rf_find_raid_components(void);
342 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
343 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
344 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
345 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
346 int rf_set_autoconfig(RF_Raid_t *, int);
347 int rf_set_rootpartition(RF_Raid_t *, int);
348 void rf_release_all_vps(RF_ConfigSet_t *);
349 void rf_cleanup_config_set(RF_ConfigSet_t *);
350 int rf_have_enough_components(RF_ConfigSet_t *);
351 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
352 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
353
354 /*
355 * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
356 * Note that this is overridden by having RAID_AUTOCONFIG as an option
357 * in the kernel config file.
358 */
359 #ifdef RAID_AUTOCONFIG
360 int raidautoconfig = 1;
361 #else
362 int raidautoconfig = 0;
363 #endif
364 static bool raidautoconfigdone = false;
365
366 struct RF_Pools_s rf_pools;
367
368 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
369 static kmutex_t raid_lock;
370
371 static struct raid_softc *
372 raidcreate(int unit) {
373 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
374 sc->sc_unit = unit;
375 cv_init(&sc->sc_cv, "raidunit");
376 mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_NONE);
377 return sc;
378 }
379
380 static void
381 raiddestroy(struct raid_softc *sc) {
382 cv_destroy(&sc->sc_cv);
383 mutex_destroy(&sc->sc_mutex);
384 kmem_free(sc, sizeof(*sc));
385 }
386
387 static struct raid_softc *
388 raidget(int unit, bool create) {
389 struct raid_softc *sc;
390 if (unit < 0) {
391 #ifdef DIAGNOSTIC
392 panic("%s: unit %d!", __func__, unit);
393 #endif
394 return NULL;
395 }
396 mutex_enter(&raid_lock);
397 LIST_FOREACH(sc, &raids, sc_link) {
398 if (sc->sc_unit == unit) {
399 mutex_exit(&raid_lock);
400 return sc;
401 }
402 }
403 mutex_exit(&raid_lock);
404 if (!create)
405 return NULL;
406 if ((sc = raidcreate(unit)) == NULL)
407 return NULL;
408 mutex_enter(&raid_lock);
409 LIST_INSERT_HEAD(&raids, sc, sc_link);
410 mutex_exit(&raid_lock);
411 return sc;
412 }
413
414 static void
415 raidput(struct raid_softc *sc) {
416 mutex_enter(&raid_lock);
417 LIST_REMOVE(sc, sc_link);
418 mutex_exit(&raid_lock);
419 raiddestroy(sc);
420 }
421
422 void
423 raidattach(int num)
424 {
425
426 /*
427 * Device attachment and associated initialization now occurs
428 * as part of the module initialization.
429 */
430 }
431
432 int
433 rf_autoconfig(device_t self)
434 {
435 RF_AutoConfig_t *ac_list;
436 RF_ConfigSet_t *config_sets;
437
438 if (!raidautoconfig || raidautoconfigdone == true)
439 return (0);
440
441 /* XXX This code can only be run once. */
442 raidautoconfigdone = true;
443
444 #ifdef __HAVE_CPU_BOOTCONF
445 /*
446 * 0. find the boot device if needed first so we can use it later
447 * this needs to be done before we autoconfigure any raid sets,
448 * because if we use wedges we are not going to be able to open
449 * the boot device later
450 */
451 if (booted_device == NULL)
452 cpu_bootconf();
453 #endif
454 /* 1. locate all RAID components on the system */
455 aprint_debug("Searching for RAID components...\n");
456 ac_list = rf_find_raid_components();
457
458 /* 2. Sort them into their respective sets. */
459 config_sets = rf_create_auto_sets(ac_list);
460
461 /*
462 * 3. Evaluate each set and configure the valid ones.
463 * This gets done in rf_buildroothack().
464 */
465 rf_buildroothack(config_sets);
466
467 return 1;
468 }
469
470 static int
471 rf_containsboot(RF_Raid_t *r, device_t bdv) {
472 const char *bootname = device_xname(bdv);
473 size_t len = strlen(bootname);
474
475 for (int col = 0; col < r->numCol; col++) {
476 const char *devname = r->Disks[col].devname;
477 devname += sizeof("/dev/") - 1;
478 if (strncmp(devname, "dk", 2) == 0) {
479 const char *parent =
480 dkwedge_get_parent_name(r->Disks[col].dev);
481 if (parent != NULL)
482 devname = parent;
483 }
484 if (strncmp(devname, bootname, len) == 0) {
485 struct raid_softc *sc = r->softc;
486 aprint_debug("raid%d includes boot device %s\n",
487 sc->sc_unit, devname);
488 return 1;
489 }
490 }
491 return 0;
492 }
493
494 void
495 rf_buildroothack(RF_ConfigSet_t *config_sets)
496 {
497 RF_ConfigSet_t *cset;
498 RF_ConfigSet_t *next_cset;
499 int num_root;
500 struct raid_softc *sc, *rsc;
501 struct dk_softc *dksc;
502
503 sc = rsc = NULL;
504 num_root = 0;
505 cset = config_sets;
506 while (cset != NULL) {
507 next_cset = cset->next;
508 if (rf_have_enough_components(cset) &&
509 cset->ac->clabel->autoconfigure == 1) {
510 sc = rf_auto_config_set(cset);
511 if (sc != NULL) {
512 aprint_debug("raid%d: configured ok\n",
513 sc->sc_unit);
514 if (cset->rootable) {
515 rsc = sc;
516 num_root++;
517 }
518 } else {
519 /* The autoconfig didn't work :( */
520 aprint_debug("Autoconfig failed\n");
521 rf_release_all_vps(cset);
522 }
523 } else {
524 /* we're not autoconfiguring this set...
525 release the associated resources */
526 rf_release_all_vps(cset);
527 }
528 /* cleanup */
529 rf_cleanup_config_set(cset);
530 cset = next_cset;
531 }
532 dksc = &rsc->sc_dksc;
533
534 /* if the user has specified what the root device should be
535 then we don't touch booted_device or boothowto... */
536
537 if (rootspec != NULL)
538 return;
539
540 /* we found something bootable... */
541
542 /*
543 * XXX: The following code assumes that the root raid
544 * is the first ('a') partition. This is about the best
545 * we can do with a BSD disklabel, but we might be able
546 * to do better with a GPT label, by setting a specified
547 * attribute to indicate the root partition. We can then
548 * stash the partition number in the r->root_partition
549 * high bits (the bottom 2 bits are already used). For
550 * now we just set booted_partition to 0 when we override
551 * root.
552 */
553 if (num_root == 1) {
554 device_t candidate_root;
555 if (dksc->sc_dkdev.dk_nwedges != 0) {
556 char cname[sizeof(cset->ac->devname)];
557 /* XXX: assume partition 'a' first */
558 snprintf(cname, sizeof(cname), "%s%c",
559 device_xname(dksc->sc_dev), 'a');
560 candidate_root = dkwedge_find_by_wname(cname);
561 DPRINTF("%s: candidate wedge root=%s\n", __func__,
562 cname);
563 if (candidate_root == NULL) {
564 /*
565 * If that is not found, because we don't use
566 * disklabel, return the first dk child
567 * XXX: we can skip the 'a' check above
568 * and always do this...
569 */
570 size_t i = 0;
571 candidate_root = dkwedge_find_by_parent(
572 device_xname(dksc->sc_dev), &i);
573 }
574 DPRINTF("%s: candidate wedge root=%p\n", __func__,
575 candidate_root);
576 } else
577 candidate_root = dksc->sc_dev;
578 DPRINTF("%s: candidate root=%p\n", __func__, candidate_root);
579 DPRINTF("%s: booted_device=%p root_partition=%d "
580 "contains_boot=%d\n", __func__, booted_device,
581 rsc->sc_r.root_partition,
582 rf_containsboot(&rsc->sc_r, booted_device));
583 if (booted_device == NULL ||
584 rsc->sc_r.root_partition == 1 ||
585 rf_containsboot(&rsc->sc_r, booted_device)) {
586 booted_device = candidate_root;
587 booted_method = "raidframe/single";
588 booted_partition = 0; /* XXX assume 'a' */
589 }
590 } else if (num_root > 1) {
591 DPRINTF("%s: many roots=%d, %p\n", __func__, num_root,
592 booted_device);
593
594 /*
595 * Maybe the MD code can help. If it cannot, then
596 * setroot() will discover that we have no
597 * booted_device and will ask the user if nothing was
598 * hardwired in the kernel config file
599 */
600 if (booted_device == NULL)
601 return;
602
603 num_root = 0;
604 mutex_enter(&raid_lock);
605 LIST_FOREACH(sc, &raids, sc_link) {
606 RF_Raid_t *r = &sc->sc_r;
607 if (r->valid == 0)
608 continue;
609
610 if (r->root_partition == 0)
611 continue;
612
613 if (rf_containsboot(r, booted_device)) {
614 num_root++;
615 rsc = sc;
616 dksc = &rsc->sc_dksc;
617 }
618 }
619 mutex_exit(&raid_lock);
620
621 if (num_root == 1) {
622 booted_device = dksc->sc_dev;
623 booted_method = "raidframe/multi";
624 booted_partition = 0; /* XXX assume 'a' */
625 } else {
626 /* we can't guess.. require the user to answer... */
627 boothowto |= RB_ASKNAME;
628 }
629 }
630 }
631
632 static int
633 raidsize(dev_t dev)
634 {
635 struct raid_softc *rs;
636 struct dk_softc *dksc;
637 unsigned int unit;
638
639 unit = raidunit(dev);
640 if ((rs = raidget(unit, false)) == NULL)
641 return -1;
642 dksc = &rs->sc_dksc;
643
644 if ((rs->sc_flags & RAIDF_INITED) == 0)
645 return -1;
646
647 return dk_size(dksc, dev);
648 }
649
650 static int
651 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
652 {
653 unsigned int unit;
654 struct raid_softc *rs;
655 struct dk_softc *dksc;
656
657 unit = raidunit(dev);
658 if ((rs = raidget(unit, false)) == NULL)
659 return ENXIO;
660 dksc = &rs->sc_dksc;
661
662 if ((rs->sc_flags & RAIDF_INITED) == 0)
663 return ENODEV;
664
665 /*
666 Note that blkno is relative to this particular partition.
667 By adding adding RF_PROTECTED_SECTORS, we get a value that
668 is relative to the partition used for the underlying component.
669 */
670 blkno += RF_PROTECTED_SECTORS;
671
672 return dk_dump(dksc, dev, blkno, va, size);
673 }
674
675 static int
676 raid_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk)
677 {
678 struct raid_softc *rs = raidsoftc(dev);
679 const struct bdevsw *bdev;
680 RF_Raid_t *raidPtr;
681 int c, sparecol, j, scol, dumpto;
682 int error = 0;
683
684 raidPtr = &rs->sc_r;
685
686 /* we only support dumping to RAID 1 sets */
687 if (raidPtr->Layout.numDataCol != 1 ||
688 raidPtr->Layout.numParityCol != 1)
689 return EINVAL;
690
691 if ((error = raidlock(rs)) != 0)
692 return error;
693
694 /* figure out what device is alive.. */
695
696 /*
697 Look for a component to dump to. The preference for the
698 component to dump to is as follows:
699 1) the master
700 2) a used_spare of the master
701 3) the slave
702 4) a used_spare of the slave
703 */
704
705 dumpto = -1;
706 for (c = 0; c < raidPtr->numCol; c++) {
707 if (raidPtr->Disks[c].status == rf_ds_optimal) {
708 /* this might be the one */
709 dumpto = c;
710 break;
711 }
712 }
713
714 /*
715 At this point we have possibly selected a live master or a
716 live slave. We now check to see if there is a spared
717 master (or a spared slave), if we didn't find a live master
718 or a live slave.
719 */
720
721 for (c = 0; c < raidPtr->numSpare; c++) {
722 sparecol = raidPtr->numCol + c;
723 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
724 /* How about this one? */
725 scol = -1;
726 for(j=0;j<raidPtr->numCol;j++) {
727 if (raidPtr->Disks[j].spareCol == sparecol) {
728 scol = j;
729 break;
730 }
731 }
732 if (scol == 0) {
733 /*
734 We must have found a spared master!
735 We'll take that over anything else
736 found so far. (We couldn't have
737 found a real master before, since
738 this is a used spare, and it's
739 saying that it's replacing the
740 master.) On reboot (with
741 autoconfiguration turned on)
742 sparecol will become the 1st
743 component (component0) of this set.
744 */
745 dumpto = sparecol;
746 break;
747 } else if (scol != -1) {
748 /*
749 Must be a spared slave. We'll dump
750 to that if we havn't found anything
751 else so far.
752 */
753 if (dumpto == -1)
754 dumpto = sparecol;
755 }
756 }
757 }
758
759 if (dumpto == -1) {
760 /* we couldn't find any live components to dump to!?!?
761 */
762 error = EINVAL;
763 goto out;
764 }
765
766 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
767 if (bdev == NULL) {
768 error = ENXIO;
769 goto out;
770 }
771
772 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
773 blkno, va, nblk * raidPtr->bytesPerSector);
774
775 out:
776 raidunlock(rs);
777
778 return error;
779 }
780
781 /* ARGSUSED */
782 static int
783 raidopen(dev_t dev, int flags, int fmt,
784 struct lwp *l)
785 {
786 int unit = raidunit(dev);
787 struct raid_softc *rs;
788 struct dk_softc *dksc;
789 int error = 0;
790 int part, pmask;
791
792 if ((rs = raidget(unit, true)) == NULL)
793 return ENXIO;
794 if ((error = raidlock(rs)) != 0)
795 return (error);
796
797 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
798 error = EBUSY;
799 goto bad;
800 }
801
802 dksc = &rs->sc_dksc;
803
804 part = DISKPART(dev);
805 pmask = (1 << part);
806
807 if (!DK_BUSY(dksc, pmask) &&
808 ((rs->sc_flags & RAIDF_INITED) != 0)) {
809 /* First one... mark things as dirty... Note that we *MUST*
810 have done a configure before this. I DO NOT WANT TO BE
811 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
812 THAT THEY BELONG TOGETHER!!!!! */
813 /* XXX should check to see if we're only open for reading
814 here... If so, we needn't do this, but then need some
815 other way of keeping track of what's happened.. */
816
817 rf_markalldirty(&rs->sc_r);
818 }
819
820 if ((rs->sc_flags & RAIDF_INITED) != 0)
821 error = dk_open(dksc, dev, flags, fmt, l);
822
823 bad:
824 raidunlock(rs);
825
826 return (error);
827
828
829 }
830
831 static int
832 raid_lastclose(device_t self)
833 {
834 struct raid_softc *rs = raidsoftc(self);
835
836 /* Last one... device is not unconfigured yet.
837 Device shutdown has taken care of setting the
838 clean bits if RAIDF_INITED is not set
839 mark things as clean... */
840
841 rf_update_component_labels(&rs->sc_r,
842 RF_FINAL_COMPONENT_UPDATE);
843
844 /* pass to unlocked code */
845 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
846 rs->sc_flags |= RAIDF_DETACH;
847
848 return 0;
849 }
850
851 /* ARGSUSED */
852 static int
853 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
854 {
855 int unit = raidunit(dev);
856 struct raid_softc *rs;
857 struct dk_softc *dksc;
858 cfdata_t cf;
859 int error = 0, do_detach = 0, do_put = 0;
860
861 if ((rs = raidget(unit, false)) == NULL)
862 return ENXIO;
863 dksc = &rs->sc_dksc;
864
865 if ((error = raidlock(rs)) != 0)
866 return (error);
867
868 if ((rs->sc_flags & RAIDF_INITED) != 0) {
869 error = dk_close(dksc, dev, flags, fmt, l);
870 if ((rs->sc_flags & RAIDF_DETACH) != 0)
871 do_detach = 1;
872 } else if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
873 do_put = 1;
874
875 raidunlock(rs);
876
877 if (do_detach) {
878 /* free the pseudo device attach bits */
879 cf = device_cfdata(dksc->sc_dev);
880 error = config_detach(dksc->sc_dev, 0);
881 if (error == 0)
882 free(cf, M_RAIDFRAME);
883 } else if (do_put) {
884 raidput(rs);
885 }
886
887 return (error);
888
889 }
890
891 static void
892 raid_wakeup(RF_Raid_t *raidPtr)
893 {
894 rf_lock_mutex2(raidPtr->iodone_lock);
895 rf_signal_cond2(raidPtr->iodone_cv);
896 rf_unlock_mutex2(raidPtr->iodone_lock);
897 }
898
899 static void
900 raidstrategy(struct buf *bp)
901 {
902 unsigned int unit;
903 struct raid_softc *rs;
904 struct dk_softc *dksc;
905 RF_Raid_t *raidPtr;
906
907 unit = raidunit(bp->b_dev);
908 if ((rs = raidget(unit, false)) == NULL) {
909 bp->b_error = ENXIO;
910 goto fail;
911 }
912 if ((rs->sc_flags & RAIDF_INITED) == 0) {
913 bp->b_error = ENXIO;
914 goto fail;
915 }
916 dksc = &rs->sc_dksc;
917 raidPtr = &rs->sc_r;
918
919 /* Queue IO only */
920 if (dk_strategy_defer(dksc, bp))
921 goto done;
922
923 /* schedule the IO to happen at the next convenient time */
924 raid_wakeup(raidPtr);
925
926 done:
927 return;
928
929 fail:
930 bp->b_resid = bp->b_bcount;
931 biodone(bp);
932 }
933
934 static int
935 raid_diskstart(device_t dev, struct buf *bp)
936 {
937 struct raid_softc *rs = raidsoftc(dev);
938 RF_Raid_t *raidPtr;
939
940 raidPtr = &rs->sc_r;
941 if (!raidPtr->valid) {
942 db1_printf(("raid is not valid..\n"));
943 return ENODEV;
944 }
945
946 /* XXX */
947 bp->b_resid = 0;
948
949 return raiddoaccess(raidPtr, bp);
950 }
951
952 void
953 raiddone(RF_Raid_t *raidPtr, struct buf *bp)
954 {
955 struct raid_softc *rs;
956 struct dk_softc *dksc;
957
958 rs = raidPtr->softc;
959 dksc = &rs->sc_dksc;
960
961 dk_done(dksc, bp);
962
963 rf_lock_mutex2(raidPtr->mutex);
964 raidPtr->openings++;
965 rf_unlock_mutex2(raidPtr->mutex);
966
967 /* schedule more IO */
968 raid_wakeup(raidPtr);
969 }
970
971 /* ARGSUSED */
972 static int
973 raidread(dev_t dev, struct uio *uio, int flags)
974 {
975 int unit = raidunit(dev);
976 struct raid_softc *rs;
977
978 if ((rs = raidget(unit, false)) == NULL)
979 return ENXIO;
980
981 if ((rs->sc_flags & RAIDF_INITED) == 0)
982 return (ENXIO);
983
984 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
985
986 }
987
988 /* ARGSUSED */
989 static int
990 raidwrite(dev_t dev, struct uio *uio, int flags)
991 {
992 int unit = raidunit(dev);
993 struct raid_softc *rs;
994
995 if ((rs = raidget(unit, false)) == NULL)
996 return ENXIO;
997
998 if ((rs->sc_flags & RAIDF_INITED) == 0)
999 return (ENXIO);
1000
1001 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
1002
1003 }
1004
1005 static int
1006 raid_detach_unlocked(struct raid_softc *rs)
1007 {
1008 struct dk_softc *dksc = &rs->sc_dksc;
1009 RF_Raid_t *raidPtr;
1010 int error;
1011
1012 raidPtr = &rs->sc_r;
1013
1014 if (DK_BUSY(dksc, 0) ||
1015 raidPtr->recon_in_progress != 0 ||
1016 raidPtr->parity_rewrite_in_progress != 0 ||
1017 raidPtr->copyback_in_progress != 0)
1018 return EBUSY;
1019
1020 if ((rs->sc_flags & RAIDF_INITED) == 0)
1021 return 0;
1022
1023 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1024
1025 if ((error = rf_Shutdown(raidPtr)) != 0)
1026 return error;
1027
1028 rs->sc_flags &= ~RAIDF_INITED;
1029
1030 /* Kill off any queued buffers */
1031 dk_drain(dksc);
1032 bufq_free(dksc->sc_bufq);
1033
1034 /* Detach the disk. */
1035 dkwedge_delall(&dksc->sc_dkdev);
1036 disk_detach(&dksc->sc_dkdev);
1037 disk_destroy(&dksc->sc_dkdev);
1038 dk_detach(dksc);
1039
1040 return 0;
1041 }
1042
1043 static int
1044 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1045 {
1046 int unit = raidunit(dev);
1047 int error = 0;
1048 int part, pmask;
1049 struct raid_softc *rs;
1050 struct dk_softc *dksc;
1051 RF_Config_t *k_cfg, *u_cfg;
1052 RF_Raid_t *raidPtr;
1053 RF_RaidDisk_t *diskPtr;
1054 RF_AccTotals_t *totals;
1055 RF_DeviceConfig_t *d_cfg, *ucfgp;
1056 u_char *specific_buf;
1057 int retcode = 0;
1058 int column;
1059 /* int raidid; */
1060 struct rf_recon_req *rr;
1061 struct rf_recon_req_internal *rrint;
1062 RF_ComponentLabel_t *clabel;
1063 RF_ComponentLabel_t *ci_label;
1064 RF_SingleComponent_t *sparePtr,*componentPtr;
1065 RF_SingleComponent_t component;
1066 int d;
1067
1068 if ((rs = raidget(unit, false)) == NULL)
1069 return ENXIO;
1070 dksc = &rs->sc_dksc;
1071 raidPtr = &rs->sc_r;
1072
1073 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1074 (int) DISKPART(dev), (int) unit, cmd));
1075
1076 /* Must be initialized for these... */
1077 switch (cmd) {
1078 case RAIDFRAME_REWRITEPARITY:
1079 case RAIDFRAME_GET_INFO:
1080 case RAIDFRAME_RESET_ACCTOTALS:
1081 case RAIDFRAME_GET_ACCTOTALS:
1082 case RAIDFRAME_KEEP_ACCTOTALS:
1083 case RAIDFRAME_GET_SIZE:
1084 case RAIDFRAME_FAIL_DISK:
1085 case RAIDFRAME_COPYBACK:
1086 case RAIDFRAME_CHECK_RECON_STATUS:
1087 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1088 case RAIDFRAME_GET_COMPONENT_LABEL:
1089 case RAIDFRAME_SET_COMPONENT_LABEL:
1090 case RAIDFRAME_ADD_HOT_SPARE:
1091 case RAIDFRAME_REMOVE_HOT_SPARE:
1092 case RAIDFRAME_INIT_LABELS:
1093 case RAIDFRAME_REBUILD_IN_PLACE:
1094 case RAIDFRAME_CHECK_PARITY:
1095 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1096 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1097 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1098 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1099 case RAIDFRAME_SET_AUTOCONFIG:
1100 case RAIDFRAME_SET_ROOT:
1101 case RAIDFRAME_DELETE_COMPONENT:
1102 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1103 case RAIDFRAME_PARITYMAP_STATUS:
1104 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1105 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1106 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1107 #ifdef COMPAT_50
1108 case RAIDFRAME_GET_INFO50:
1109 #endif
1110 #ifdef COMPAT_80
1111 case RAIDFRAME_CHECK_RECON_STATUS_EXT80:
1112 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT80:
1113 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT80:
1114 case RAIDFRAME_GET_INFO80:
1115 case RAIDFRAME_GET_COMPONENT_LABEL80:
1116 #endif
1117 #ifdef COMPAT_NETBSD32
1118 #ifdef _LP64
1119 case RAIDFRAME_GET_INFO32:
1120 #endif
1121 #endif
1122 if ((rs->sc_flags & RAIDF_INITED) == 0)
1123 return (ENXIO);
1124 }
1125
1126 switch (cmd) {
1127 #ifdef COMPAT_50
1128 case RAIDFRAME_GET_INFO50:
1129 return rf_get_info50(raidPtr, data);
1130
1131 case RAIDFRAME_CONFIGURE50:
1132 if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0)
1133 return retcode;
1134 goto config;
1135 #endif
1136
1137 #ifdef COMPAT_80
1138 case RAIDFRAME_CHECK_RECON_STATUS_EXT80:
1139 return rf_check_recon_status_ext80(raidPtr, data);
1140
1141 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT80:
1142 return rf_check_parityrewrite_status_ext80(raidPtr, data);
1143
1144 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT80:
1145 return rf_check_copyback_status_ext80(raidPtr, data);
1146
1147 case RAIDFRAME_GET_INFO80:
1148 return rf_get_info80(raidPtr, data);
1149
1150 case RAIDFRAME_GET_COMPONENT_LABEL80:
1151 return rf_get_component_label80(raidPtr, data);
1152
1153 case RAIDFRAME_CONFIGURE80:
1154 if ((retcode = rf_config80(raidPtr, unit, data, &k_cfg)) != 0)
1155 return retcode;
1156 goto config;
1157 #endif
1158
1159 /* configure the system */
1160 case RAIDFRAME_CONFIGURE:
1161 #ifdef COMPAT_NETBSD32
1162 #ifdef _LP64
1163 case RAIDFRAME_CONFIGURE32:
1164 #endif
1165 #endif
1166
1167 if (raidPtr->valid) {
1168 /* There is a valid RAID set running on this unit! */
1169 printf("raid%d: Device already configured!\n",unit);
1170 return(EINVAL);
1171 }
1172
1173 /* copy-in the configuration information */
1174 /* data points to a pointer to the configuration structure */
1175
1176 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1177 if (k_cfg == NULL) {
1178 return (ENOMEM);
1179 }
1180 #ifdef COMPAT_NETBSD32
1181 #ifdef _LP64
1182 if (cmd == RAIDFRAME_CONFIGURE32 &&
1183 (l->l_proc->p_flag & PK_32) != 0)
1184 retcode = rf_config_netbsd32(data, k_cfg);
1185 else
1186 #endif
1187 #endif
1188 {
1189 u_cfg = *((RF_Config_t **) data);
1190 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1191 }
1192 if (retcode) {
1193 RF_Free(k_cfg, sizeof(RF_Config_t));
1194 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1195 retcode));
1196 goto no_config;
1197 }
1198 goto config;
1199 config:
1200 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1201
1202 /* allocate a buffer for the layout-specific data, and copy it
1203 * in */
1204 if (k_cfg->layoutSpecificSize) {
1205 if (k_cfg->layoutSpecificSize > 10000) {
1206 /* sanity check */
1207 RF_Free(k_cfg, sizeof(RF_Config_t));
1208 retcode = EINVAL;
1209 goto no_config;
1210 }
1211 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1212 (u_char *));
1213 if (specific_buf == NULL) {
1214 RF_Free(k_cfg, sizeof(RF_Config_t));
1215 retcode = ENOMEM;
1216 goto no_config;
1217 }
1218 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1219 k_cfg->layoutSpecificSize);
1220 if (retcode) {
1221 RF_Free(k_cfg, sizeof(RF_Config_t));
1222 RF_Free(specific_buf,
1223 k_cfg->layoutSpecificSize);
1224 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1225 retcode));
1226 goto no_config;
1227 }
1228 } else
1229 specific_buf = NULL;
1230 k_cfg->layoutSpecific = specific_buf;
1231
1232 /* should do some kind of sanity check on the configuration.
1233 * Store the sum of all the bytes in the last byte? */
1234
1235 /* configure the system */
1236
1237 /*
1238 * Clear the entire RAID descriptor, just to make sure
1239 * there is no stale data left in the case of a
1240 * reconfiguration
1241 */
1242 memset(raidPtr, 0, sizeof(*raidPtr));
1243 raidPtr->softc = rs;
1244 raidPtr->raidid = unit;
1245
1246 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1247
1248 if (retcode == 0) {
1249
1250 /* allow this many simultaneous IO's to
1251 this RAID device */
1252 raidPtr->openings = RAIDOUTSTANDING;
1253
1254 raidinit(rs);
1255 raid_wakeup(raidPtr);
1256 rf_markalldirty(raidPtr);
1257 }
1258 /* free the buffers. No return code here. */
1259 if (k_cfg->layoutSpecificSize) {
1260 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1261 }
1262 RF_Free(k_cfg, sizeof(RF_Config_t));
1263
1264 no_config:
1265 /*
1266 * If configuration failed, set sc_flags so that we
1267 * will detach the device when we close it.
1268 */
1269 if (retcode != 0)
1270 rs->sc_flags |= RAIDF_SHUTDOWN;
1271 return (retcode);
1272
1273 /* shutdown the system */
1274 case RAIDFRAME_SHUTDOWN:
1275
1276 part = DISKPART(dev);
1277 pmask = (1 << part);
1278
1279 if ((error = raidlock(rs)) != 0)
1280 return (error);
1281
1282 if (DK_BUSY(dksc, pmask) ||
1283 raidPtr->recon_in_progress != 0 ||
1284 raidPtr->parity_rewrite_in_progress != 0 ||
1285 raidPtr->copyback_in_progress != 0)
1286 retcode = EBUSY;
1287 else {
1288 /* detach and free on close */
1289 rs->sc_flags |= RAIDF_SHUTDOWN;
1290 retcode = 0;
1291 }
1292
1293 raidunlock(rs);
1294
1295 return (retcode);
1296 case RAIDFRAME_GET_COMPONENT_LABEL:
1297 return rf_get_component_label(raidPtr, data);
1298
1299 #if 0
1300 case RAIDFRAME_SET_COMPONENT_LABEL:
1301 clabel = (RF_ComponentLabel_t *) data;
1302
1303 /* XXX check the label for valid stuff... */
1304 /* Note that some things *should not* get modified --
1305 the user should be re-initing the labels instead of
1306 trying to patch things.
1307 */
1308
1309 raidid = raidPtr->raidid;
1310 #ifdef DEBUG
1311 printf("raid%d: Got component label:\n", raidid);
1312 printf("raid%d: Version: %d\n", raidid, clabel->version);
1313 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1314 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1315 printf("raid%d: Column: %d\n", raidid, clabel->column);
1316 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1317 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1318 printf("raid%d: Status: %d\n", raidid, clabel->status);
1319 #endif
1320 clabel->row = 0;
1321 column = clabel->column;
1322
1323 if ((column < 0) || (column >= raidPtr->numCol)) {
1324 return(EINVAL);
1325 }
1326
1327 /* XXX this isn't allowed to do anything for now :-) */
1328
1329 /* XXX and before it is, we need to fill in the rest
1330 of the fields!?!?!?! */
1331 memcpy(raidget_component_label(raidPtr, column),
1332 clabel, sizeof(*clabel));
1333 raidflush_component_label(raidPtr, column);
1334 return (0);
1335 #endif
1336
1337 case RAIDFRAME_INIT_LABELS:
1338 clabel = (RF_ComponentLabel_t *) data;
1339 /*
1340 we only want the serial number from
1341 the above. We get all the rest of the information
1342 from the config that was used to create this RAID
1343 set.
1344 */
1345
1346 raidPtr->serial_number = clabel->serial_number;
1347
1348 for(column=0;column<raidPtr->numCol;column++) {
1349 diskPtr = &raidPtr->Disks[column];
1350 if (!RF_DEAD_DISK(diskPtr->status)) {
1351 ci_label = raidget_component_label(raidPtr,
1352 column);
1353 /* Zeroing this is important. */
1354 memset(ci_label, 0, sizeof(*ci_label));
1355 raid_init_component_label(raidPtr, ci_label);
1356 ci_label->serial_number =
1357 raidPtr->serial_number;
1358 ci_label->row = 0; /* we dont' pretend to support more */
1359 rf_component_label_set_partitionsize(ci_label,
1360 diskPtr->partitionSize);
1361 ci_label->column = column;
1362 raidflush_component_label(raidPtr, column);
1363 }
1364 /* XXXjld what about the spares? */
1365 }
1366
1367 return (retcode);
1368 case RAIDFRAME_SET_AUTOCONFIG:
1369 d = rf_set_autoconfig(raidPtr, *(int *) data);
1370 printf("raid%d: New autoconfig value is: %d\n",
1371 raidPtr->raidid, d);
1372 *(int *) data = d;
1373 return (retcode);
1374
1375 case RAIDFRAME_SET_ROOT:
1376 d = rf_set_rootpartition(raidPtr, *(int *) data);
1377 printf("raid%d: New rootpartition value is: %d\n",
1378 raidPtr->raidid, d);
1379 *(int *) data = d;
1380 return (retcode);
1381
1382 /* initialize all parity */
1383 case RAIDFRAME_REWRITEPARITY:
1384
1385 if (raidPtr->Layout.map->faultsTolerated == 0) {
1386 /* Parity for RAID 0 is trivially correct */
1387 raidPtr->parity_good = RF_RAID_CLEAN;
1388 return(0);
1389 }
1390
1391 if (raidPtr->parity_rewrite_in_progress == 1) {
1392 /* Re-write is already in progress! */
1393 return(EINVAL);
1394 }
1395
1396 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1397 rf_RewriteParityThread,
1398 raidPtr,"raid_parity");
1399 return (retcode);
1400
1401
1402 case RAIDFRAME_ADD_HOT_SPARE:
1403 sparePtr = (RF_SingleComponent_t *) data;
1404 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1405 retcode = rf_add_hot_spare(raidPtr, &component);
1406 return(retcode);
1407
1408 case RAIDFRAME_REMOVE_HOT_SPARE:
1409 return(retcode);
1410
1411 case RAIDFRAME_DELETE_COMPONENT:
1412 componentPtr = (RF_SingleComponent_t *)data;
1413 memcpy( &component, componentPtr,
1414 sizeof(RF_SingleComponent_t));
1415 retcode = rf_delete_component(raidPtr, &component);
1416 return(retcode);
1417
1418 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1419 componentPtr = (RF_SingleComponent_t *)data;
1420 memcpy( &component, componentPtr,
1421 sizeof(RF_SingleComponent_t));
1422 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1423 return(retcode);
1424
1425 case RAIDFRAME_REBUILD_IN_PLACE:
1426
1427 if (raidPtr->Layout.map->faultsTolerated == 0) {
1428 /* Can't do this on a RAID 0!! */
1429 return(EINVAL);
1430 }
1431
1432 if (raidPtr->recon_in_progress == 1) {
1433 /* a reconstruct is already in progress! */
1434 return(EINVAL);
1435 }
1436
1437 componentPtr = (RF_SingleComponent_t *) data;
1438 memcpy( &component, componentPtr,
1439 sizeof(RF_SingleComponent_t));
1440 component.row = 0; /* we don't support any more */
1441 column = component.column;
1442
1443 if ((column < 0) || (column >= raidPtr->numCol)) {
1444 return(EINVAL);
1445 }
1446
1447 rf_lock_mutex2(raidPtr->mutex);
1448 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1449 (raidPtr->numFailures > 0)) {
1450 /* XXX 0 above shouldn't be constant!!! */
1451 /* some component other than this has failed.
1452 Let's not make things worse than they already
1453 are... */
1454 printf("raid%d: Unable to reconstruct to disk at:\n",
1455 raidPtr->raidid);
1456 printf("raid%d: Col: %d Too many failures.\n",
1457 raidPtr->raidid, column);
1458 rf_unlock_mutex2(raidPtr->mutex);
1459 return (EINVAL);
1460 }
1461 if (raidPtr->Disks[column].status ==
1462 rf_ds_reconstructing) {
1463 printf("raid%d: Unable to reconstruct to disk at:\n",
1464 raidPtr->raidid);
1465 printf("raid%d: Col: %d Reconstruction already occurring!\n", raidPtr->raidid, column);
1466
1467 rf_unlock_mutex2(raidPtr->mutex);
1468 return (EINVAL);
1469 }
1470 if (raidPtr->Disks[column].status == rf_ds_spared) {
1471 rf_unlock_mutex2(raidPtr->mutex);
1472 return (EINVAL);
1473 }
1474 rf_unlock_mutex2(raidPtr->mutex);
1475
1476 RF_Malloc(rrint, sizeof(*rrint), (struct rf_recon_req_internal *));
1477 if (rrint == NULL)
1478 return(ENOMEM);
1479
1480 rrint->col = column;
1481 rrint->raidPtr = raidPtr;
1482
1483 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1484 rf_ReconstructInPlaceThread,
1485 rrint, "raid_reconip");
1486 return(retcode);
1487
1488 case RAIDFRAME_GET_INFO:
1489 #ifdef COMPAT_NETBSD32
1490 #ifdef _LP64
1491 case RAIDFRAME_GET_INFO32:
1492 #endif
1493 #endif
1494 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1495 (RF_DeviceConfig_t *));
1496 if (d_cfg == NULL)
1497 return (ENOMEM);
1498 retcode = rf_get_info(raidPtr, d_cfg);
1499 if (retcode == 0) {
1500 #ifdef COMPAT_NETBSD32
1501 #ifdef _LP64
1502 if (cmd == RAIDFRAME_GET_INFO32)
1503 ucfgp = NETBSD32PTR64(*(netbsd32_pointer_t *)data);
1504 else
1505 #endif
1506 #endif
1507 ucfgp = *(RF_DeviceConfig_t **)data;
1508 retcode = copyout(d_cfg, ucfgp, sizeof(RF_DeviceConfig_t));
1509 }
1510 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1511
1512 return (retcode);
1513
1514 case RAIDFRAME_CHECK_PARITY:
1515 *(int *) data = raidPtr->parity_good;
1516 return (0);
1517
1518 case RAIDFRAME_PARITYMAP_STATUS:
1519 if (rf_paritymap_ineligible(raidPtr))
1520 return EINVAL;
1521 rf_paritymap_status(raidPtr->parity_map,
1522 (struct rf_pmstat *)data);
1523 return 0;
1524
1525 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1526 if (rf_paritymap_ineligible(raidPtr))
1527 return EINVAL;
1528 if (raidPtr->parity_map == NULL)
1529 return ENOENT; /* ??? */
1530 if (0 != rf_paritymap_set_params(raidPtr->parity_map,
1531 (struct rf_pmparams *)data, 1))
1532 return EINVAL;
1533 return 0;
1534
1535 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1536 if (rf_paritymap_ineligible(raidPtr))
1537 return EINVAL;
1538 *(int *) data = rf_paritymap_get_disable(raidPtr);
1539 return 0;
1540
1541 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1542 if (rf_paritymap_ineligible(raidPtr))
1543 return EINVAL;
1544 rf_paritymap_set_disable(raidPtr, *(int *)data);
1545 /* XXX should errors be passed up? */
1546 return 0;
1547
1548 case RAIDFRAME_RESET_ACCTOTALS:
1549 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1550 return (0);
1551
1552 case RAIDFRAME_GET_ACCTOTALS:
1553 totals = (RF_AccTotals_t *) data;
1554 *totals = raidPtr->acc_totals;
1555 return (0);
1556
1557 case RAIDFRAME_KEEP_ACCTOTALS:
1558 raidPtr->keep_acc_totals = *(int *)data;
1559 return (0);
1560
1561 case RAIDFRAME_GET_SIZE:
1562 *(int *) data = raidPtr->totalSectors;
1563 return (0);
1564
1565 /* fail a disk & optionally start reconstruction */
1566 case RAIDFRAME_FAIL_DISK:
1567 #ifdef COMPAT_80
1568 case RAIDFRAME_FAIL_DISK80:
1569 #endif
1570
1571 if (raidPtr->Layout.map->faultsTolerated == 0) {
1572 /* Can't do this on a RAID 0!! */
1573 return(EINVAL);
1574 }
1575
1576 rr = (struct rf_recon_req *) data;
1577 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1578 return (EINVAL);
1579
1580 rf_lock_mutex2(raidPtr->mutex);
1581 if (raidPtr->status == rf_rs_reconstructing) {
1582 /* you can't fail a disk while we're reconstructing! */
1583 /* XXX wrong for RAID6 */
1584 rf_unlock_mutex2(raidPtr->mutex);
1585 return (EINVAL);
1586 }
1587 if ((raidPtr->Disks[rr->col].status ==
1588 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1589 /* some other component has failed. Let's not make
1590 things worse. XXX wrong for RAID6 */
1591 rf_unlock_mutex2(raidPtr->mutex);
1592 return (EINVAL);
1593 }
1594 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1595 /* Can't fail a spared disk! */
1596 rf_unlock_mutex2(raidPtr->mutex);
1597 return (EINVAL);
1598 }
1599 rf_unlock_mutex2(raidPtr->mutex);
1600
1601 /* make a copy of the recon request so that we don't rely on
1602 * the user's buffer */
1603 RF_Malloc(rrint, sizeof(*rrint), (struct rf_recon_req_internal *));
1604 if (rrint == NULL)
1605 return(ENOMEM);
1606 rrint->col = rr->col;
1607 rrint->flags = rr->flags;
1608 rrint->raidPtr = raidPtr;
1609
1610 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1611 rf_ReconThread,
1612 rrint, "raid_recon");
1613 return (0);
1614
1615 /* invoke a copyback operation after recon on whatever disk
1616 * needs it, if any */
1617 case RAIDFRAME_COPYBACK:
1618
1619 if (raidPtr->Layout.map->faultsTolerated == 0) {
1620 /* This makes no sense on a RAID 0!! */
1621 return(EINVAL);
1622 }
1623
1624 if (raidPtr->copyback_in_progress == 1) {
1625 /* Copyback is already in progress! */
1626 return(EINVAL);
1627 }
1628
1629 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1630 rf_CopybackThread,
1631 raidPtr,"raid_copyback");
1632 return (retcode);
1633
1634 /* return the percentage completion of reconstruction */
1635 case RAIDFRAME_CHECK_RECON_STATUS:
1636 if (raidPtr->Layout.map->faultsTolerated == 0) {
1637 /* This makes no sense on a RAID 0, so tell the
1638 user it's done. */
1639 *(int *) data = 100;
1640 return(0);
1641 }
1642 if (raidPtr->status != rf_rs_reconstructing)
1643 *(int *) data = 100;
1644 else {
1645 if (raidPtr->reconControl->numRUsTotal > 0) {
1646 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1647 } else {
1648 *(int *) data = 0;
1649 }
1650 }
1651 return (0);
1652 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1653 rf_check_recon_status_ext(raidPtr, data);
1654 return (0);
1655
1656 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1657 if (raidPtr->Layout.map->faultsTolerated == 0) {
1658 /* This makes no sense on a RAID 0, so tell the
1659 user it's done. */
1660 *(int *) data = 100;
1661 return(0);
1662 }
1663 if (raidPtr->parity_rewrite_in_progress == 1) {
1664 *(int *) data = 100 *
1665 raidPtr->parity_rewrite_stripes_done /
1666 raidPtr->Layout.numStripe;
1667 } else {
1668 *(int *) data = 100;
1669 }
1670 return (0);
1671
1672 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1673 rf_check_parityrewrite_status_ext(raidPtr, data);
1674 return (0);
1675
1676 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1677 if (raidPtr->Layout.map->faultsTolerated == 0) {
1678 /* This makes no sense on a RAID 0 */
1679 *(int *) data = 100;
1680 return(0);
1681 }
1682 if (raidPtr->copyback_in_progress == 1) {
1683 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1684 raidPtr->Layout.numStripe;
1685 } else {
1686 *(int *) data = 100;
1687 }
1688 return (0);
1689
1690 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1691 rf_check_copyback_status_ext(raidPtr, data);
1692 return 0;
1693
1694 case RAIDFRAME_SET_LAST_UNIT:
1695 for (column = 0; column < raidPtr->numCol; column++)
1696 if (raidPtr->Disks[column].status != rf_ds_optimal)
1697 return EBUSY;
1698
1699 for (column = 0; column < raidPtr->numCol; column++) {
1700 clabel = raidget_component_label(raidPtr, column);
1701 clabel->last_unit = *(int *)data;
1702 raidflush_component_label(raidPtr, column);
1703 }
1704 rs->sc_cflags |= RAIDF_UNIT_CHANGED;
1705 return 0;
1706
1707 /* the sparetable daemon calls this to wait for the kernel to
1708 * need a spare table. this ioctl does not return until a
1709 * spare table is needed. XXX -- calling mpsleep here in the
1710 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1711 * -- I should either compute the spare table in the kernel,
1712 * or have a different -- XXX XXX -- interface (a different
1713 * character device) for delivering the table -- XXX */
1714 #if 0
1715 case RAIDFRAME_SPARET_WAIT:
1716 rf_lock_mutex2(rf_sparet_wait_mutex);
1717 while (!rf_sparet_wait_queue)
1718 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1719 waitreq = rf_sparet_wait_queue;
1720 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1721 rf_unlock_mutex2(rf_sparet_wait_mutex);
1722
1723 /* structure assignment */
1724 *((RF_SparetWait_t *) data) = *waitreq;
1725
1726 RF_Free(waitreq, sizeof(*waitreq));
1727 return (0);
1728
1729 /* wakes up a process waiting on SPARET_WAIT and puts an error
1730 * code in it that will cause the dameon to exit */
1731 case RAIDFRAME_ABORT_SPARET_WAIT:
1732 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1733 waitreq->fcol = -1;
1734 rf_lock_mutex2(rf_sparet_wait_mutex);
1735 waitreq->next = rf_sparet_wait_queue;
1736 rf_sparet_wait_queue = waitreq;
1737 rf_broadcast_conf2(rf_sparet_wait_cv);
1738 rf_unlock_mutex2(rf_sparet_wait_mutex);
1739 return (0);
1740
1741 /* used by the spare table daemon to deliver a spare table
1742 * into the kernel */
1743 case RAIDFRAME_SEND_SPARET:
1744
1745 /* install the spare table */
1746 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1747
1748 /* respond to the requestor. the return status of the spare
1749 * table installation is passed in the "fcol" field */
1750 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1751 waitreq->fcol = retcode;
1752 rf_lock_mutex2(rf_sparet_wait_mutex);
1753 waitreq->next = rf_sparet_resp_queue;
1754 rf_sparet_resp_queue = waitreq;
1755 rf_broadcast_cond2(rf_sparet_resp_cv);
1756 rf_unlock_mutex2(rf_sparet_wait_mutex);
1757
1758 return (retcode);
1759 #endif
1760
1761 default:
1762 break; /* fall through to the os-specific code below */
1763
1764 }
1765
1766 if (!raidPtr->valid)
1767 return (EINVAL);
1768
1769 /*
1770 * Add support for "regular" device ioctls here.
1771 */
1772
1773 switch (cmd) {
1774 case DIOCGCACHE:
1775 retcode = rf_get_component_caches(raidPtr, (int *)data);
1776 break;
1777
1778 case DIOCCACHESYNC:
1779 retcode = rf_sync_component_caches(raidPtr);
1780 break;
1781
1782 default:
1783 retcode = dk_ioctl(dksc, dev, cmd, data, flag, l);
1784 break;
1785 }
1786
1787 return (retcode);
1788
1789 }
1790
1791
1792 /* raidinit -- complete the rest of the initialization for the
1793 RAIDframe device. */
1794
1795
1796 static void
1797 raidinit(struct raid_softc *rs)
1798 {
1799 cfdata_t cf;
1800 unsigned int unit;
1801 struct dk_softc *dksc = &rs->sc_dksc;
1802 RF_Raid_t *raidPtr = &rs->sc_r;
1803 device_t dev;
1804
1805 unit = raidPtr->raidid;
1806
1807 /* XXX doesn't check bounds. */
1808 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%u", unit);
1809
1810 /* attach the pseudo device */
1811 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1812 cf->cf_name = raid_cd.cd_name;
1813 cf->cf_atname = raid_cd.cd_name;
1814 cf->cf_unit = unit;
1815 cf->cf_fstate = FSTATE_STAR;
1816
1817 dev = config_attach_pseudo(cf);
1818 if (dev == NULL) {
1819 printf("raid%d: config_attach_pseudo failed\n",
1820 raidPtr->raidid);
1821 free(cf, M_RAIDFRAME);
1822 return;
1823 }
1824
1825 /* provide a backpointer to the real softc */
1826 raidsoftc(dev) = rs;
1827
1828 /* disk_attach actually creates space for the CPU disklabel, among
1829 * other things, so it's critical to call this *BEFORE* we try putzing
1830 * with disklabels. */
1831 dk_init(dksc, dev, DKTYPE_RAID);
1832 disk_init(&dksc->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1833
1834 /* XXX There may be a weird interaction here between this, and
1835 * protectedSectors, as used in RAIDframe. */
1836
1837 rs->sc_size = raidPtr->totalSectors;
1838
1839 /* Attach dk and disk subsystems */
1840 dk_attach(dksc);
1841 disk_attach(&dksc->sc_dkdev);
1842 rf_set_geometry(rs, raidPtr);
1843
1844 bufq_alloc(&dksc->sc_bufq, "fcfs", BUFQ_SORT_RAWBLOCK);
1845
1846 /* mark unit as usuable */
1847 rs->sc_flags |= RAIDF_INITED;
1848
1849 dkwedge_discover(&dksc->sc_dkdev);
1850 }
1851
1852 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1853 /* wake up the daemon & tell it to get us a spare table
1854 * XXX
1855 * the entries in the queues should be tagged with the raidPtr
1856 * so that in the extremely rare case that two recons happen at once,
1857 * we know for which device were requesting a spare table
1858 * XXX
1859 *
1860 * XXX This code is not currently used. GO
1861 */
1862 int
1863 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1864 {
1865 int retcode;
1866
1867 rf_lock_mutex2(rf_sparet_wait_mutex);
1868 req->next = rf_sparet_wait_queue;
1869 rf_sparet_wait_queue = req;
1870 rf_broadcast_cond2(rf_sparet_wait_cv);
1871
1872 /* mpsleep unlocks the mutex */
1873 while (!rf_sparet_resp_queue) {
1874 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
1875 }
1876 req = rf_sparet_resp_queue;
1877 rf_sparet_resp_queue = req->next;
1878 rf_unlock_mutex2(rf_sparet_wait_mutex);
1879
1880 retcode = req->fcol;
1881 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1882 * alloc'd */
1883 return (retcode);
1884 }
1885 #endif
1886
1887 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1888 * bp & passes it down.
1889 * any calls originating in the kernel must use non-blocking I/O
1890 * do some extra sanity checking to return "appropriate" error values for
1891 * certain conditions (to make some standard utilities work)
1892 *
1893 * Formerly known as: rf_DoAccessKernel
1894 */
1895 void
1896 raidstart(RF_Raid_t *raidPtr)
1897 {
1898 struct raid_softc *rs;
1899 struct dk_softc *dksc;
1900
1901 rs = raidPtr->softc;
1902 dksc = &rs->sc_dksc;
1903 /* quick check to see if anything has died recently */
1904 rf_lock_mutex2(raidPtr->mutex);
1905 if (raidPtr->numNewFailures > 0) {
1906 rf_unlock_mutex2(raidPtr->mutex);
1907 rf_update_component_labels(raidPtr,
1908 RF_NORMAL_COMPONENT_UPDATE);
1909 rf_lock_mutex2(raidPtr->mutex);
1910 raidPtr->numNewFailures--;
1911 }
1912 rf_unlock_mutex2(raidPtr->mutex);
1913
1914 if ((rs->sc_flags & RAIDF_INITED) == 0) {
1915 printf("raid%d: raidstart not ready\n", raidPtr->raidid);
1916 return;
1917 }
1918
1919 dk_start(dksc, NULL);
1920 }
1921
1922 static int
1923 raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp)
1924 {
1925 RF_SectorCount_t num_blocks, pb, sum;
1926 RF_RaidAddr_t raid_addr;
1927 daddr_t blocknum;
1928 int do_async;
1929 int rc;
1930
1931 rf_lock_mutex2(raidPtr->mutex);
1932 if (raidPtr->openings == 0) {
1933 rf_unlock_mutex2(raidPtr->mutex);
1934 return EAGAIN;
1935 }
1936 rf_unlock_mutex2(raidPtr->mutex);
1937
1938 blocknum = bp->b_rawblkno;
1939
1940 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1941 (int) blocknum));
1942
1943 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1944 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1945
1946 /* *THIS* is where we adjust what block we're going to...
1947 * but DO NOT TOUCH bp->b_blkno!!! */
1948 raid_addr = blocknum;
1949
1950 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1951 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1952 sum = raid_addr + num_blocks + pb;
1953 if (1 || rf_debugKernelAccess) {
1954 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1955 (int) raid_addr, (int) sum, (int) num_blocks,
1956 (int) pb, (int) bp->b_resid));
1957 }
1958 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1959 || (sum < num_blocks) || (sum < pb)) {
1960 rc = ENOSPC;
1961 goto done;
1962 }
1963 /*
1964 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1965 */
1966
1967 if (bp->b_bcount & raidPtr->sectorMask) {
1968 rc = ENOSPC;
1969 goto done;
1970 }
1971 db1_printf(("Calling DoAccess..\n"));
1972
1973
1974 rf_lock_mutex2(raidPtr->mutex);
1975 raidPtr->openings--;
1976 rf_unlock_mutex2(raidPtr->mutex);
1977
1978 /*
1979 * Everything is async.
1980 */
1981 do_async = 1;
1982
1983 /* don't ever condition on bp->b_flags & B_WRITE.
1984 * always condition on B_READ instead */
1985
1986 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1987 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1988 do_async, raid_addr, num_blocks,
1989 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1990
1991 done:
1992 return rc;
1993 }
1994
1995 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1996
1997 int
1998 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
1999 {
2000 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2001 struct buf *bp;
2002
2003 req->queue = queue;
2004 bp = req->bp;
2005
2006 switch (req->type) {
2007 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2008 /* XXX need to do something extra here.. */
2009 /* I'm leaving this in, as I've never actually seen it used,
2010 * and I'd like folks to report it... GO */
2011 printf(("WAKEUP CALLED\n"));
2012 queue->numOutstanding++;
2013
2014 bp->b_flags = 0;
2015 bp->b_private = req;
2016
2017 KernelWakeupFunc(bp);
2018 break;
2019
2020 case RF_IO_TYPE_READ:
2021 case RF_IO_TYPE_WRITE:
2022 #if RF_ACC_TRACE > 0
2023 if (req->tracerec) {
2024 RF_ETIMER_START(req->tracerec->timer);
2025 }
2026 #endif
2027 InitBP(bp, queue->rf_cinfo->ci_vp,
2028 op, queue->rf_cinfo->ci_dev,
2029 req->sectorOffset, req->numSector,
2030 req->buf, KernelWakeupFunc, (void *) req,
2031 queue->raidPtr->logBytesPerSector, req->b_proc);
2032
2033 if (rf_debugKernelAccess) {
2034 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2035 (long) bp->b_blkno));
2036 }
2037 queue->numOutstanding++;
2038 queue->last_deq_sector = req->sectorOffset;
2039 /* acc wouldn't have been let in if there were any pending
2040 * reqs at any other priority */
2041 queue->curPriority = req->priority;
2042
2043 db1_printf(("Going for %c to unit %d col %d\n",
2044 req->type, queue->raidPtr->raidid,
2045 queue->col));
2046 db1_printf(("sector %d count %d (%d bytes) %d\n",
2047 (int) req->sectorOffset, (int) req->numSector,
2048 (int) (req->numSector <<
2049 queue->raidPtr->logBytesPerSector),
2050 (int) queue->raidPtr->logBytesPerSector));
2051
2052 /*
2053 * XXX: drop lock here since this can block at
2054 * least with backing SCSI devices. Retake it
2055 * to minimize fuss with calling interfaces.
2056 */
2057
2058 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2059 bdev_strategy(bp);
2060 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2061 break;
2062
2063 default:
2064 panic("bad req->type in rf_DispatchKernelIO");
2065 }
2066 db1_printf(("Exiting from DispatchKernelIO\n"));
2067
2068 return (0);
2069 }
2070 /* this is the callback function associated with a I/O invoked from
2071 kernel code.
2072 */
2073 static void
2074 KernelWakeupFunc(struct buf *bp)
2075 {
2076 RF_DiskQueueData_t *req = NULL;
2077 RF_DiskQueue_t *queue;
2078
2079 db1_printf(("recovering the request queue:\n"));
2080
2081 req = bp->b_private;
2082
2083 queue = (RF_DiskQueue_t *) req->queue;
2084
2085 rf_lock_mutex2(queue->raidPtr->iodone_lock);
2086
2087 #if RF_ACC_TRACE > 0
2088 if (req->tracerec) {
2089 RF_ETIMER_STOP(req->tracerec->timer);
2090 RF_ETIMER_EVAL(req->tracerec->timer);
2091 rf_lock_mutex2(rf_tracing_mutex);
2092 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2093 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2094 req->tracerec->num_phys_ios++;
2095 rf_unlock_mutex2(rf_tracing_mutex);
2096 }
2097 #endif
2098
2099 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2100 * ballistic, and mark the component as hosed... */
2101
2102 if (bp->b_error != 0) {
2103 /* Mark the disk as dead */
2104 /* but only mark it once... */
2105 /* and only if it wouldn't leave this RAID set
2106 completely broken */
2107 if (((queue->raidPtr->Disks[queue->col].status ==
2108 rf_ds_optimal) ||
2109 (queue->raidPtr->Disks[queue->col].status ==
2110 rf_ds_used_spare)) &&
2111 (queue->raidPtr->numFailures <
2112 queue->raidPtr->Layout.map->faultsTolerated)) {
2113 printf("raid%d: IO Error (%d). Marking %s as failed.\n",
2114 queue->raidPtr->raidid,
2115 bp->b_error,
2116 queue->raidPtr->Disks[queue->col].devname);
2117 queue->raidPtr->Disks[queue->col].status =
2118 rf_ds_failed;
2119 queue->raidPtr->status = rf_rs_degraded;
2120 queue->raidPtr->numFailures++;
2121 queue->raidPtr->numNewFailures++;
2122 } else { /* Disk is already dead... */
2123 /* printf("Disk already marked as dead!\n"); */
2124 }
2125
2126 }
2127
2128 /* Fill in the error value */
2129 req->error = bp->b_error;
2130
2131 /* Drop this one on the "finished" queue... */
2132 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2133
2134 /* Let the raidio thread know there is work to be done. */
2135 rf_signal_cond2(queue->raidPtr->iodone_cv);
2136
2137 rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2138 }
2139
2140
2141 /*
2142 * initialize a buf structure for doing an I/O in the kernel.
2143 */
2144 static void
2145 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2146 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2147 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2148 struct proc *b_proc)
2149 {
2150 /* bp->b_flags = B_PHYS | rw_flag; */
2151 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */
2152 bp->b_oflags = 0;
2153 bp->b_cflags = 0;
2154 bp->b_bcount = numSect << logBytesPerSector;
2155 bp->b_bufsize = bp->b_bcount;
2156 bp->b_error = 0;
2157 bp->b_dev = dev;
2158 bp->b_data = bf;
2159 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2160 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2161 if (bp->b_bcount == 0) {
2162 panic("bp->b_bcount is zero in InitBP!!");
2163 }
2164 bp->b_proc = b_proc;
2165 bp->b_iodone = cbFunc;
2166 bp->b_private = cbArg;
2167 }
2168
2169 /*
2170 * Wait interruptibly for an exclusive lock.
2171 *
2172 * XXX
2173 * Several drivers do this; it should be abstracted and made MP-safe.
2174 * (Hmm... where have we seen this warning before :-> GO )
2175 */
2176 static int
2177 raidlock(struct raid_softc *rs)
2178 {
2179 int error;
2180
2181 error = 0;
2182 mutex_enter(&rs->sc_mutex);
2183 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2184 rs->sc_flags |= RAIDF_WANTED;
2185 error = cv_wait_sig(&rs->sc_cv, &rs->sc_mutex);
2186 if (error != 0)
2187 goto done;
2188 }
2189 rs->sc_flags |= RAIDF_LOCKED;
2190 done:
2191 mutex_exit(&rs->sc_mutex);
2192 return (error);
2193 }
2194 /*
2195 * Unlock and wake up any waiters.
2196 */
2197 static void
2198 raidunlock(struct raid_softc *rs)
2199 {
2200
2201 mutex_enter(&rs->sc_mutex);
2202 rs->sc_flags &= ~RAIDF_LOCKED;
2203 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2204 rs->sc_flags &= ~RAIDF_WANTED;
2205 cv_broadcast(&rs->sc_cv);
2206 }
2207 mutex_exit(&rs->sc_mutex);
2208 }
2209
2210
2211 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2212 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2213 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2214
2215 static daddr_t
2216 rf_component_info_offset(void)
2217 {
2218
2219 return RF_COMPONENT_INFO_OFFSET;
2220 }
2221
2222 static daddr_t
2223 rf_component_info_size(unsigned secsize)
2224 {
2225 daddr_t info_size;
2226
2227 KASSERT(secsize);
2228 if (secsize > RF_COMPONENT_INFO_SIZE)
2229 info_size = secsize;
2230 else
2231 info_size = RF_COMPONENT_INFO_SIZE;
2232
2233 return info_size;
2234 }
2235
2236 static daddr_t
2237 rf_parity_map_offset(RF_Raid_t *raidPtr)
2238 {
2239 daddr_t map_offset;
2240
2241 KASSERT(raidPtr->bytesPerSector);
2242 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2243 map_offset = raidPtr->bytesPerSector;
2244 else
2245 map_offset = RF_COMPONENT_INFO_SIZE;
2246 map_offset += rf_component_info_offset();
2247
2248 return map_offset;
2249 }
2250
2251 static daddr_t
2252 rf_parity_map_size(RF_Raid_t *raidPtr)
2253 {
2254 daddr_t map_size;
2255
2256 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2257 map_size = raidPtr->bytesPerSector;
2258 else
2259 map_size = RF_PARITY_MAP_SIZE;
2260
2261 return map_size;
2262 }
2263
2264 int
2265 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2266 {
2267 RF_ComponentLabel_t *clabel;
2268
2269 clabel = raidget_component_label(raidPtr, col);
2270 clabel->clean = RF_RAID_CLEAN;
2271 raidflush_component_label(raidPtr, col);
2272 return(0);
2273 }
2274
2275
2276 int
2277 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2278 {
2279 RF_ComponentLabel_t *clabel;
2280
2281 clabel = raidget_component_label(raidPtr, col);
2282 clabel->clean = RF_RAID_DIRTY;
2283 raidflush_component_label(raidPtr, col);
2284 return(0);
2285 }
2286
2287 int
2288 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2289 {
2290 KASSERT(raidPtr->bytesPerSector);
2291 return raidread_component_label(raidPtr->bytesPerSector,
2292 raidPtr->Disks[col].dev,
2293 raidPtr->raid_cinfo[col].ci_vp,
2294 &raidPtr->raid_cinfo[col].ci_label);
2295 }
2296
2297 RF_ComponentLabel_t *
2298 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2299 {
2300 return &raidPtr->raid_cinfo[col].ci_label;
2301 }
2302
2303 int
2304 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2305 {
2306 RF_ComponentLabel_t *label;
2307
2308 label = &raidPtr->raid_cinfo[col].ci_label;
2309 label->mod_counter = raidPtr->mod_counter;
2310 #ifndef RF_NO_PARITY_MAP
2311 label->parity_map_modcount = label->mod_counter;
2312 #endif
2313 return raidwrite_component_label(raidPtr->bytesPerSector,
2314 raidPtr->Disks[col].dev,
2315 raidPtr->raid_cinfo[col].ci_vp, label);
2316 }
2317
2318
2319 static int
2320 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2321 RF_ComponentLabel_t *clabel)
2322 {
2323 return raidread_component_area(dev, b_vp, clabel,
2324 sizeof(RF_ComponentLabel_t),
2325 rf_component_info_offset(),
2326 rf_component_info_size(secsize));
2327 }
2328
2329 /* ARGSUSED */
2330 static int
2331 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2332 size_t msize, daddr_t offset, daddr_t dsize)
2333 {
2334 struct buf *bp;
2335 int error;
2336
2337 /* XXX should probably ensure that we don't try to do this if
2338 someone has changed rf_protected_sectors. */
2339
2340 if (b_vp == NULL) {
2341 /* For whatever reason, this component is not valid.
2342 Don't try to read a component label from it. */
2343 return(EINVAL);
2344 }
2345
2346 /* get a block of the appropriate size... */
2347 bp = geteblk((int)dsize);
2348 bp->b_dev = dev;
2349
2350 /* get our ducks in a row for the read */
2351 bp->b_blkno = offset / DEV_BSIZE;
2352 bp->b_bcount = dsize;
2353 bp->b_flags |= B_READ;
2354 bp->b_resid = dsize;
2355
2356 bdev_strategy(bp);
2357 error = biowait(bp);
2358
2359 if (!error) {
2360 memcpy(data, bp->b_data, msize);
2361 }
2362
2363 brelse(bp, 0);
2364 return(error);
2365 }
2366
2367
2368 static int
2369 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2370 RF_ComponentLabel_t *clabel)
2371 {
2372 return raidwrite_component_area(dev, b_vp, clabel,
2373 sizeof(RF_ComponentLabel_t),
2374 rf_component_info_offset(),
2375 rf_component_info_size(secsize), 0);
2376 }
2377
2378 /* ARGSUSED */
2379 static int
2380 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2381 size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2382 {
2383 struct buf *bp;
2384 int error;
2385
2386 /* get a block of the appropriate size... */
2387 bp = geteblk((int)dsize);
2388 bp->b_dev = dev;
2389
2390 /* get our ducks in a row for the write */
2391 bp->b_blkno = offset / DEV_BSIZE;
2392 bp->b_bcount = dsize;
2393 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2394 bp->b_resid = dsize;
2395
2396 memset(bp->b_data, 0, dsize);
2397 memcpy(bp->b_data, data, msize);
2398
2399 bdev_strategy(bp);
2400 if (asyncp)
2401 return 0;
2402 error = biowait(bp);
2403 brelse(bp, 0);
2404 if (error) {
2405 #if 1
2406 printf("Failed to write RAID component info!\n");
2407 #endif
2408 }
2409
2410 return(error);
2411 }
2412
2413 void
2414 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2415 {
2416 int c;
2417
2418 for (c = 0; c < raidPtr->numCol; c++) {
2419 /* Skip dead disks. */
2420 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2421 continue;
2422 /* XXXjld: what if an error occurs here? */
2423 raidwrite_component_area(raidPtr->Disks[c].dev,
2424 raidPtr->raid_cinfo[c].ci_vp, map,
2425 RF_PARITYMAP_NBYTE,
2426 rf_parity_map_offset(raidPtr),
2427 rf_parity_map_size(raidPtr), 0);
2428 }
2429 }
2430
2431 void
2432 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2433 {
2434 struct rf_paritymap_ondisk tmp;
2435 int c,first;
2436
2437 first=1;
2438 for (c = 0; c < raidPtr->numCol; c++) {
2439 /* Skip dead disks. */
2440 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2441 continue;
2442 raidread_component_area(raidPtr->Disks[c].dev,
2443 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2444 RF_PARITYMAP_NBYTE,
2445 rf_parity_map_offset(raidPtr),
2446 rf_parity_map_size(raidPtr));
2447 if (first) {
2448 memcpy(map, &tmp, sizeof(*map));
2449 first = 0;
2450 } else {
2451 rf_paritymap_merge(map, &tmp);
2452 }
2453 }
2454 }
2455
2456 void
2457 rf_markalldirty(RF_Raid_t *raidPtr)
2458 {
2459 RF_ComponentLabel_t *clabel;
2460 int sparecol;
2461 int c;
2462 int j;
2463 int scol = -1;
2464
2465 raidPtr->mod_counter++;
2466 for (c = 0; c < raidPtr->numCol; c++) {
2467 /* we don't want to touch (at all) a disk that has
2468 failed */
2469 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2470 clabel = raidget_component_label(raidPtr, c);
2471 if (clabel->status == rf_ds_spared) {
2472 /* XXX do something special...
2473 but whatever you do, don't
2474 try to access it!! */
2475 } else {
2476 raidmarkdirty(raidPtr, c);
2477 }
2478 }
2479 }
2480
2481 for( c = 0; c < raidPtr->numSpare ; c++) {
2482 sparecol = raidPtr->numCol + c;
2483 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2484 /*
2485
2486 we claim this disk is "optimal" if it's
2487 rf_ds_used_spare, as that means it should be
2488 directly substitutable for the disk it replaced.
2489 We note that too...
2490
2491 */
2492
2493 for(j=0;j<raidPtr->numCol;j++) {
2494 if (raidPtr->Disks[j].spareCol == sparecol) {
2495 scol = j;
2496 break;
2497 }
2498 }
2499
2500 clabel = raidget_component_label(raidPtr, sparecol);
2501 /* make sure status is noted */
2502
2503 raid_init_component_label(raidPtr, clabel);
2504
2505 clabel->row = 0;
2506 clabel->column = scol;
2507 /* Note: we *don't* change status from rf_ds_used_spare
2508 to rf_ds_optimal */
2509 /* clabel.status = rf_ds_optimal; */
2510
2511 raidmarkdirty(raidPtr, sparecol);
2512 }
2513 }
2514 }
2515
2516
2517 void
2518 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2519 {
2520 RF_ComponentLabel_t *clabel;
2521 int sparecol;
2522 int c;
2523 int j;
2524 int scol;
2525 struct raid_softc *rs = raidPtr->softc;
2526
2527 scol = -1;
2528
2529 /* XXX should do extra checks to make sure things really are clean,
2530 rather than blindly setting the clean bit... */
2531
2532 raidPtr->mod_counter++;
2533
2534 for (c = 0; c < raidPtr->numCol; c++) {
2535 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2536 clabel = raidget_component_label(raidPtr, c);
2537 /* make sure status is noted */
2538 clabel->status = rf_ds_optimal;
2539
2540 /* note what unit we are configured as */
2541 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2542 clabel->last_unit = raidPtr->raidid;
2543
2544 raidflush_component_label(raidPtr, c);
2545 if (final == RF_FINAL_COMPONENT_UPDATE) {
2546 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2547 raidmarkclean(raidPtr, c);
2548 }
2549 }
2550 }
2551 /* else we don't touch it.. */
2552 }
2553
2554 for( c = 0; c < raidPtr->numSpare ; c++) {
2555 sparecol = raidPtr->numCol + c;
2556 /* Need to ensure that the reconstruct actually completed! */
2557 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2558 /*
2559
2560 we claim this disk is "optimal" if it's
2561 rf_ds_used_spare, as that means it should be
2562 directly substitutable for the disk it replaced.
2563 We note that too...
2564
2565 */
2566
2567 for(j=0;j<raidPtr->numCol;j++) {
2568 if (raidPtr->Disks[j].spareCol == sparecol) {
2569 scol = j;
2570 break;
2571 }
2572 }
2573
2574 /* XXX shouldn't *really* need this... */
2575 clabel = raidget_component_label(raidPtr, sparecol);
2576 /* make sure status is noted */
2577
2578 raid_init_component_label(raidPtr, clabel);
2579
2580 clabel->column = scol;
2581 clabel->status = rf_ds_optimal;
2582 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2583 clabel->last_unit = raidPtr->raidid;
2584
2585 raidflush_component_label(raidPtr, sparecol);
2586 if (final == RF_FINAL_COMPONENT_UPDATE) {
2587 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2588 raidmarkclean(raidPtr, sparecol);
2589 }
2590 }
2591 }
2592 }
2593 }
2594
2595 void
2596 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2597 {
2598
2599 if (vp != NULL) {
2600 if (auto_configured == 1) {
2601 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2602 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2603 vput(vp);
2604
2605 } else {
2606 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2607 }
2608 }
2609 }
2610
2611
2612 void
2613 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2614 {
2615 int r,c;
2616 struct vnode *vp;
2617 int acd;
2618
2619
2620 /* We take this opportunity to close the vnodes like we should.. */
2621
2622 for (c = 0; c < raidPtr->numCol; c++) {
2623 vp = raidPtr->raid_cinfo[c].ci_vp;
2624 acd = raidPtr->Disks[c].auto_configured;
2625 rf_close_component(raidPtr, vp, acd);
2626 raidPtr->raid_cinfo[c].ci_vp = NULL;
2627 raidPtr->Disks[c].auto_configured = 0;
2628 }
2629
2630 for (r = 0; r < raidPtr->numSpare; r++) {
2631 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2632 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2633 rf_close_component(raidPtr, vp, acd);
2634 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2635 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2636 }
2637 }
2638
2639
2640 void
2641 rf_ReconThread(struct rf_recon_req_internal *req)
2642 {
2643 int s;
2644 RF_Raid_t *raidPtr;
2645
2646 s = splbio();
2647 raidPtr = (RF_Raid_t *) req->raidPtr;
2648 raidPtr->recon_in_progress = 1;
2649
2650 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2651 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2652
2653 RF_Free(req, sizeof(*req));
2654
2655 raidPtr->recon_in_progress = 0;
2656 splx(s);
2657
2658 /* That's all... */
2659 kthread_exit(0); /* does not return */
2660 }
2661
2662 void
2663 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2664 {
2665 int retcode;
2666 int s;
2667
2668 raidPtr->parity_rewrite_stripes_done = 0;
2669 raidPtr->parity_rewrite_in_progress = 1;
2670 s = splbio();
2671 retcode = rf_RewriteParity(raidPtr);
2672 splx(s);
2673 if (retcode) {
2674 printf("raid%d: Error re-writing parity (%d)!\n",
2675 raidPtr->raidid, retcode);
2676 } else {
2677 /* set the clean bit! If we shutdown correctly,
2678 the clean bit on each component label will get
2679 set */
2680 raidPtr->parity_good = RF_RAID_CLEAN;
2681 }
2682 raidPtr->parity_rewrite_in_progress = 0;
2683
2684 /* Anyone waiting for us to stop? If so, inform them... */
2685 if (raidPtr->waitShutdown) {
2686 wakeup(&raidPtr->parity_rewrite_in_progress);
2687 }
2688
2689 /* That's all... */
2690 kthread_exit(0); /* does not return */
2691 }
2692
2693
2694 void
2695 rf_CopybackThread(RF_Raid_t *raidPtr)
2696 {
2697 int s;
2698
2699 raidPtr->copyback_in_progress = 1;
2700 s = splbio();
2701 rf_CopybackReconstructedData(raidPtr);
2702 splx(s);
2703 raidPtr->copyback_in_progress = 0;
2704
2705 /* That's all... */
2706 kthread_exit(0); /* does not return */
2707 }
2708
2709
2710 void
2711 rf_ReconstructInPlaceThread(struct rf_recon_req_internal *req)
2712 {
2713 int s;
2714 RF_Raid_t *raidPtr;
2715
2716 s = splbio();
2717 raidPtr = req->raidPtr;
2718 raidPtr->recon_in_progress = 1;
2719 rf_ReconstructInPlace(raidPtr, req->col);
2720 RF_Free(req, sizeof(*req));
2721 raidPtr->recon_in_progress = 0;
2722 splx(s);
2723
2724 /* That's all... */
2725 kthread_exit(0); /* does not return */
2726 }
2727
2728 static RF_AutoConfig_t *
2729 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
2730 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
2731 unsigned secsize)
2732 {
2733 int good_one = 0;
2734 RF_ComponentLabel_t *clabel;
2735 RF_AutoConfig_t *ac;
2736
2737 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
2738 if (clabel == NULL) {
2739 oomem:
2740 while(ac_list) {
2741 ac = ac_list;
2742 if (ac->clabel)
2743 free(ac->clabel, M_RAIDFRAME);
2744 ac_list = ac_list->next;
2745 free(ac, M_RAIDFRAME);
2746 }
2747 printf("RAID auto config: out of memory!\n");
2748 return NULL; /* XXX probably should panic? */
2749 }
2750
2751 if (!raidread_component_label(secsize, dev, vp, clabel)) {
2752 /* Got the label. Does it look reasonable? */
2753 if (rf_reasonable_label(clabel, numsecs) &&
2754 (rf_component_label_partitionsize(clabel) <= size)) {
2755 #ifdef DEBUG
2756 printf("Component on: %s: %llu\n",
2757 cname, (unsigned long long)size);
2758 rf_print_component_label(clabel);
2759 #endif
2760 /* if it's reasonable, add it, else ignore it. */
2761 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
2762 M_NOWAIT);
2763 if (ac == NULL) {
2764 free(clabel, M_RAIDFRAME);
2765 goto oomem;
2766 }
2767 strlcpy(ac->devname, cname, sizeof(ac->devname));
2768 ac->dev = dev;
2769 ac->vp = vp;
2770 ac->clabel = clabel;
2771 ac->next = ac_list;
2772 ac_list = ac;
2773 good_one = 1;
2774 }
2775 }
2776 if (!good_one) {
2777 /* cleanup */
2778 free(clabel, M_RAIDFRAME);
2779 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2780 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2781 vput(vp);
2782 }
2783 return ac_list;
2784 }
2785
2786 RF_AutoConfig_t *
2787 rf_find_raid_components(void)
2788 {
2789 struct vnode *vp;
2790 struct disklabel label;
2791 device_t dv;
2792 deviter_t di;
2793 dev_t dev;
2794 int bmajor, bminor, wedge, rf_part_found;
2795 int error;
2796 int i;
2797 RF_AutoConfig_t *ac_list;
2798 uint64_t numsecs;
2799 unsigned secsize;
2800 int dowedges;
2801
2802 /* initialize the AutoConfig list */
2803 ac_list = NULL;
2804
2805 /*
2806 * we begin by trolling through *all* the devices on the system *twice*
2807 * first we scan for wedges, second for other devices. This avoids
2808 * using a raw partition instead of a wedge that covers the whole disk
2809 */
2810
2811 for (dowedges=1; dowedges>=0; --dowedges) {
2812 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
2813 dv = deviter_next(&di)) {
2814
2815 /* we are only interested in disks... */
2816 if (device_class(dv) != DV_DISK)
2817 continue;
2818
2819 /* we don't care about floppies... */
2820 if (device_is_a(dv, "fd")) {
2821 continue;
2822 }
2823
2824 /* we don't care about CD's... */
2825 if (device_is_a(dv, "cd")) {
2826 continue;
2827 }
2828
2829 /* we don't care about md's... */
2830 if (device_is_a(dv, "md")) {
2831 continue;
2832 }
2833
2834 /* hdfd is the Atari/Hades floppy driver */
2835 if (device_is_a(dv, "hdfd")) {
2836 continue;
2837 }
2838
2839 /* fdisa is the Atari/Milan floppy driver */
2840 if (device_is_a(dv, "fdisa")) {
2841 continue;
2842 }
2843
2844 /* are we in the wedges pass ? */
2845 wedge = device_is_a(dv, "dk");
2846 if (wedge != dowedges) {
2847 continue;
2848 }
2849
2850 /* need to find the device_name_to_block_device_major stuff */
2851 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
2852
2853 rf_part_found = 0; /*No raid partition as yet*/
2854
2855 /* get a vnode for the raw partition of this disk */
2856 bminor = minor(device_unit(dv));
2857 dev = wedge ? makedev(bmajor, bminor) :
2858 MAKEDISKDEV(bmajor, bminor, RAW_PART);
2859 if (bdevvp(dev, &vp))
2860 panic("RAID can't alloc vnode");
2861
2862 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
2863
2864 if (error) {
2865 /* "Who cares." Continue looking
2866 for something that exists*/
2867 vput(vp);
2868 continue;
2869 }
2870
2871 error = getdisksize(vp, &numsecs, &secsize);
2872 if (error) {
2873 /*
2874 * Pseudo devices like vnd and cgd can be
2875 * opened but may still need some configuration.
2876 * Ignore these quietly.
2877 */
2878 if (error != ENXIO)
2879 printf("RAIDframe: can't get disk size"
2880 " for dev %s (%d)\n",
2881 device_xname(dv), error);
2882 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2883 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2884 vput(vp);
2885 continue;
2886 }
2887 if (wedge) {
2888 struct dkwedge_info dkw;
2889 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
2890 NOCRED);
2891 if (error) {
2892 printf("RAIDframe: can't get wedge info for "
2893 "dev %s (%d)\n", device_xname(dv), error);
2894 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2895 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2896 vput(vp);
2897 continue;
2898 }
2899
2900 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
2901 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2902 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2903 vput(vp);
2904 continue;
2905 }
2906
2907 ac_list = rf_get_component(ac_list, dev, vp,
2908 device_xname(dv), dkw.dkw_size, numsecs, secsize);
2909 rf_part_found = 1; /*There is a raid component on this disk*/
2910 continue;
2911 }
2912
2913 /* Ok, the disk exists. Go get the disklabel. */
2914 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
2915 if (error) {
2916 /*
2917 * XXX can't happen - open() would
2918 * have errored out (or faked up one)
2919 */
2920 if (error != ENOTTY)
2921 printf("RAIDframe: can't get label for dev "
2922 "%s (%d)\n", device_xname(dv), error);
2923 }
2924
2925 /* don't need this any more. We'll allocate it again
2926 a little later if we really do... */
2927 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2928 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2929 vput(vp);
2930
2931 if (error)
2932 continue;
2933
2934 rf_part_found = 0; /*No raid partitions yet*/
2935 for (i = 0; i < label.d_npartitions; i++) {
2936 char cname[sizeof(ac_list->devname)];
2937
2938 /* We only support partitions marked as RAID */
2939 if (label.d_partitions[i].p_fstype != FS_RAID)
2940 continue;
2941
2942 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
2943 if (bdevvp(dev, &vp))
2944 panic("RAID can't alloc vnode");
2945
2946 error = VOP_OPEN(vp, FREAD, NOCRED);
2947 if (error) {
2948 /* Whatever... */
2949 vput(vp);
2950 continue;
2951 }
2952 snprintf(cname, sizeof(cname), "%s%c",
2953 device_xname(dv), 'a' + i);
2954 ac_list = rf_get_component(ac_list, dev, vp, cname,
2955 label.d_partitions[i].p_size, numsecs, secsize);
2956 rf_part_found = 1; /*There is at least one raid partition on this disk*/
2957 }
2958
2959 /*
2960 *If there is no raid component on this disk, either in a
2961 *disklabel or inside a wedge, check the raw partition as well,
2962 *as it is possible to configure raid components on raw disk
2963 *devices.
2964 */
2965
2966 if (!rf_part_found) {
2967 char cname[sizeof(ac_list->devname)];
2968
2969 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
2970 if (bdevvp(dev, &vp))
2971 panic("RAID can't alloc vnode");
2972
2973 error = VOP_OPEN(vp, FREAD, NOCRED);
2974 if (error) {
2975 /* Whatever... */
2976 vput(vp);
2977 continue;
2978 }
2979 snprintf(cname, sizeof(cname), "%s%c",
2980 device_xname(dv), 'a' + RAW_PART);
2981 ac_list = rf_get_component(ac_list, dev, vp, cname,
2982 label.d_partitions[RAW_PART].p_size, numsecs, secsize);
2983 }
2984 }
2985 deviter_release(&di);
2986 }
2987 return ac_list;
2988 }
2989
2990
2991 int
2992 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
2993 {
2994
2995 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2996 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2997 ((clabel->clean == RF_RAID_CLEAN) ||
2998 (clabel->clean == RF_RAID_DIRTY)) &&
2999 clabel->row >=0 &&
3000 clabel->column >= 0 &&
3001 clabel->num_rows > 0 &&
3002 clabel->num_columns > 0 &&
3003 clabel->row < clabel->num_rows &&
3004 clabel->column < clabel->num_columns &&
3005 clabel->blockSize > 0 &&
3006 /*
3007 * numBlocksHi may contain garbage, but it is ok since
3008 * the type is unsigned. If it is really garbage,
3009 * rf_fix_old_label_size() will fix it.
3010 */
3011 rf_component_label_numblocks(clabel) > 0) {
3012 /*
3013 * label looks reasonable enough...
3014 * let's make sure it has no old garbage.
3015 */
3016 if (numsecs)
3017 rf_fix_old_label_size(clabel, numsecs);
3018 return(1);
3019 }
3020 return(0);
3021 }
3022
3023
3024 /*
3025 * For reasons yet unknown, some old component labels have garbage in
3026 * the newer numBlocksHi region, and this causes lossage. Since those
3027 * disks will also have numsecs set to less than 32 bits of sectors,
3028 * we can determine when this corruption has occurred, and fix it.
3029 *
3030 * The exact same problem, with the same unknown reason, happens to
3031 * the partitionSizeHi member as well.
3032 */
3033 static void
3034 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3035 {
3036
3037 if (numsecs < ((uint64_t)1 << 32)) {
3038 if (clabel->numBlocksHi) {
3039 printf("WARNING: total sectors < 32 bits, yet "
3040 "numBlocksHi set\n"
3041 "WARNING: resetting numBlocksHi to zero.\n");
3042 clabel->numBlocksHi = 0;
3043 }
3044
3045 if (clabel->partitionSizeHi) {
3046 printf("WARNING: total sectors < 32 bits, yet "
3047 "partitionSizeHi set\n"
3048 "WARNING: resetting partitionSizeHi to zero.\n");
3049 clabel->partitionSizeHi = 0;
3050 }
3051 }
3052 }
3053
3054
3055 #ifdef DEBUG
3056 void
3057 rf_print_component_label(RF_ComponentLabel_t *clabel)
3058 {
3059 uint64_t numBlocks;
3060 static const char *rp[] = {
3061 "No", "Force", "Soft", "*invalid*"
3062 };
3063
3064
3065 numBlocks = rf_component_label_numblocks(clabel);
3066
3067 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3068 clabel->row, clabel->column,
3069 clabel->num_rows, clabel->num_columns);
3070 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3071 clabel->version, clabel->serial_number,
3072 clabel->mod_counter);
3073 printf(" Clean: %s Status: %d\n",
3074 clabel->clean ? "Yes" : "No", clabel->status);
3075 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3076 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3077 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3078 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3079 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3080 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]);
3081 printf(" Last configured as: raid%d\n", clabel->last_unit);
3082 #if 0
3083 printf(" Config order: %d\n", clabel->config_order);
3084 #endif
3085
3086 }
3087 #endif
3088
3089 RF_ConfigSet_t *
3090 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3091 {
3092 RF_AutoConfig_t *ac;
3093 RF_ConfigSet_t *config_sets;
3094 RF_ConfigSet_t *cset;
3095 RF_AutoConfig_t *ac_next;
3096
3097
3098 config_sets = NULL;
3099
3100 /* Go through the AutoConfig list, and figure out which components
3101 belong to what sets. */
3102 ac = ac_list;
3103 while(ac!=NULL) {
3104 /* we're going to putz with ac->next, so save it here
3105 for use at the end of the loop */
3106 ac_next = ac->next;
3107
3108 if (config_sets == NULL) {
3109 /* will need at least this one... */
3110 config_sets = (RF_ConfigSet_t *)
3111 malloc(sizeof(RF_ConfigSet_t),
3112 M_RAIDFRAME, M_NOWAIT);
3113 if (config_sets == NULL) {
3114 panic("rf_create_auto_sets: No memory!");
3115 }
3116 /* this one is easy :) */
3117 config_sets->ac = ac;
3118 config_sets->next = NULL;
3119 config_sets->rootable = 0;
3120 ac->next = NULL;
3121 } else {
3122 /* which set does this component fit into? */
3123 cset = config_sets;
3124 while(cset!=NULL) {
3125 if (rf_does_it_fit(cset, ac)) {
3126 /* looks like it matches... */
3127 ac->next = cset->ac;
3128 cset->ac = ac;
3129 break;
3130 }
3131 cset = cset->next;
3132 }
3133 if (cset==NULL) {
3134 /* didn't find a match above... new set..*/
3135 cset = (RF_ConfigSet_t *)
3136 malloc(sizeof(RF_ConfigSet_t),
3137 M_RAIDFRAME, M_NOWAIT);
3138 if (cset == NULL) {
3139 panic("rf_create_auto_sets: No memory!");
3140 }
3141 cset->ac = ac;
3142 ac->next = NULL;
3143 cset->next = config_sets;
3144 cset->rootable = 0;
3145 config_sets = cset;
3146 }
3147 }
3148 ac = ac_next;
3149 }
3150
3151
3152 return(config_sets);
3153 }
3154
3155 static int
3156 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3157 {
3158 RF_ComponentLabel_t *clabel1, *clabel2;
3159
3160 /* If this one matches the *first* one in the set, that's good
3161 enough, since the other members of the set would have been
3162 through here too... */
3163 /* note that we are not checking partitionSize here..
3164
3165 Note that we are also not checking the mod_counters here.
3166 If everything else matches except the mod_counter, that's
3167 good enough for this test. We will deal with the mod_counters
3168 a little later in the autoconfiguration process.
3169
3170 (clabel1->mod_counter == clabel2->mod_counter) &&
3171
3172 The reason we don't check for this is that failed disks
3173 will have lower modification counts. If those disks are
3174 not added to the set they used to belong to, then they will
3175 form their own set, which may result in 2 different sets,
3176 for example, competing to be configured at raid0, and
3177 perhaps competing to be the root filesystem set. If the
3178 wrong ones get configured, or both attempt to become /,
3179 weird behaviour and or serious lossage will occur. Thus we
3180 need to bring them into the fold here, and kick them out at
3181 a later point.
3182
3183 */
3184
3185 clabel1 = cset->ac->clabel;
3186 clabel2 = ac->clabel;
3187 if ((clabel1->version == clabel2->version) &&
3188 (clabel1->serial_number == clabel2->serial_number) &&
3189 (clabel1->num_rows == clabel2->num_rows) &&
3190 (clabel1->num_columns == clabel2->num_columns) &&
3191 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3192 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3193 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3194 (clabel1->parityConfig == clabel2->parityConfig) &&
3195 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3196 (clabel1->blockSize == clabel2->blockSize) &&
3197 rf_component_label_numblocks(clabel1) ==
3198 rf_component_label_numblocks(clabel2) &&
3199 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3200 (clabel1->root_partition == clabel2->root_partition) &&
3201 (clabel1->last_unit == clabel2->last_unit) &&
3202 (clabel1->config_order == clabel2->config_order)) {
3203 /* if it get's here, it almost *has* to be a match */
3204 } else {
3205 /* it's not consistent with somebody in the set..
3206 punt */
3207 return(0);
3208 }
3209 /* all was fine.. it must fit... */
3210 return(1);
3211 }
3212
3213 int
3214 rf_have_enough_components(RF_ConfigSet_t *cset)
3215 {
3216 RF_AutoConfig_t *ac;
3217 RF_AutoConfig_t *auto_config;
3218 RF_ComponentLabel_t *clabel;
3219 int c;
3220 int num_cols;
3221 int num_missing;
3222 int mod_counter;
3223 int mod_counter_found;
3224 int even_pair_failed;
3225 char parity_type;
3226
3227
3228 /* check to see that we have enough 'live' components
3229 of this set. If so, we can configure it if necessary */
3230
3231 num_cols = cset->ac->clabel->num_columns;
3232 parity_type = cset->ac->clabel->parityConfig;
3233
3234 /* XXX Check for duplicate components!?!?!? */
3235
3236 /* Determine what the mod_counter is supposed to be for this set. */
3237
3238 mod_counter_found = 0;
3239 mod_counter = 0;
3240 ac = cset->ac;
3241 while(ac!=NULL) {
3242 if (mod_counter_found==0) {
3243 mod_counter = ac->clabel->mod_counter;
3244 mod_counter_found = 1;
3245 } else {
3246 if (ac->clabel->mod_counter > mod_counter) {
3247 mod_counter = ac->clabel->mod_counter;
3248 }
3249 }
3250 ac = ac->next;
3251 }
3252
3253 num_missing = 0;
3254 auto_config = cset->ac;
3255
3256 even_pair_failed = 0;
3257 for(c=0; c<num_cols; c++) {
3258 ac = auto_config;
3259 while(ac!=NULL) {
3260 if ((ac->clabel->column == c) &&
3261 (ac->clabel->mod_counter == mod_counter)) {
3262 /* it's this one... */
3263 #ifdef DEBUG
3264 printf("Found: %s at %d\n",
3265 ac->devname,c);
3266 #endif
3267 break;
3268 }
3269 ac=ac->next;
3270 }
3271 if (ac==NULL) {
3272 /* Didn't find one here! */
3273 /* special case for RAID 1, especially
3274 where there are more than 2
3275 components (where RAIDframe treats
3276 things a little differently :( ) */
3277 if (parity_type == '1') {
3278 if (c%2 == 0) { /* even component */
3279 even_pair_failed = 1;
3280 } else { /* odd component. If
3281 we're failed, and
3282 so is the even
3283 component, it's
3284 "Good Night, Charlie" */
3285 if (even_pair_failed == 1) {
3286 return(0);
3287 }
3288 }
3289 } else {
3290 /* normal accounting */
3291 num_missing++;
3292 }
3293 }
3294 if ((parity_type == '1') && (c%2 == 1)) {
3295 /* Just did an even component, and we didn't
3296 bail.. reset the even_pair_failed flag,
3297 and go on to the next component.... */
3298 even_pair_failed = 0;
3299 }
3300 }
3301
3302 clabel = cset->ac->clabel;
3303
3304 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3305 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3306 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3307 /* XXX this needs to be made *much* more general */
3308 /* Too many failures */
3309 return(0);
3310 }
3311 /* otherwise, all is well, and we've got enough to take a kick
3312 at autoconfiguring this set */
3313 return(1);
3314 }
3315
3316 void
3317 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3318 RF_Raid_t *raidPtr)
3319 {
3320 RF_ComponentLabel_t *clabel;
3321 int i;
3322
3323 clabel = ac->clabel;
3324
3325 /* 1. Fill in the common stuff */
3326 config->numCol = clabel->num_columns;
3327 config->numSpare = 0; /* XXX should this be set here? */
3328 config->sectPerSU = clabel->sectPerSU;
3329 config->SUsPerPU = clabel->SUsPerPU;
3330 config->SUsPerRU = clabel->SUsPerRU;
3331 config->parityConfig = clabel->parityConfig;
3332 /* XXX... */
3333 strcpy(config->diskQueueType,"fifo");
3334 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3335 config->layoutSpecificSize = 0; /* XXX ?? */
3336
3337 while(ac!=NULL) {
3338 /* row/col values will be in range due to the checks
3339 in reasonable_label() */
3340 strcpy(config->devnames[0][ac->clabel->column],
3341 ac->devname);
3342 ac = ac->next;
3343 }
3344
3345 for(i=0;i<RF_MAXDBGV;i++) {
3346 config->debugVars[i][0] = 0;
3347 }
3348 }
3349
3350 int
3351 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3352 {
3353 RF_ComponentLabel_t *clabel;
3354 int column;
3355 int sparecol;
3356
3357 raidPtr->autoconfigure = new_value;
3358
3359 for(column=0; column<raidPtr->numCol; column++) {
3360 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3361 clabel = raidget_component_label(raidPtr, column);
3362 clabel->autoconfigure = new_value;
3363 raidflush_component_label(raidPtr, column);
3364 }
3365 }
3366 for(column = 0; column < raidPtr->numSpare ; column++) {
3367 sparecol = raidPtr->numCol + column;
3368 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3369 clabel = raidget_component_label(raidPtr, sparecol);
3370 clabel->autoconfigure = new_value;
3371 raidflush_component_label(raidPtr, sparecol);
3372 }
3373 }
3374 return(new_value);
3375 }
3376
3377 int
3378 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3379 {
3380 RF_ComponentLabel_t *clabel;
3381 int column;
3382 int sparecol;
3383
3384 raidPtr->root_partition = new_value;
3385 for(column=0; column<raidPtr->numCol; column++) {
3386 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3387 clabel = raidget_component_label(raidPtr, column);
3388 clabel->root_partition = new_value;
3389 raidflush_component_label(raidPtr, column);
3390 }
3391 }
3392 for(column = 0; column < raidPtr->numSpare ; column++) {
3393 sparecol = raidPtr->numCol + column;
3394 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3395 clabel = raidget_component_label(raidPtr, sparecol);
3396 clabel->root_partition = new_value;
3397 raidflush_component_label(raidPtr, sparecol);
3398 }
3399 }
3400 return(new_value);
3401 }
3402
3403 void
3404 rf_release_all_vps(RF_ConfigSet_t *cset)
3405 {
3406 RF_AutoConfig_t *ac;
3407
3408 ac = cset->ac;
3409 while(ac!=NULL) {
3410 /* Close the vp, and give it back */
3411 if (ac->vp) {
3412 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3413 VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
3414 vput(ac->vp);
3415 ac->vp = NULL;
3416 }
3417 ac = ac->next;
3418 }
3419 }
3420
3421
3422 void
3423 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3424 {
3425 RF_AutoConfig_t *ac;
3426 RF_AutoConfig_t *next_ac;
3427
3428 ac = cset->ac;
3429 while(ac!=NULL) {
3430 next_ac = ac->next;
3431 /* nuke the label */
3432 free(ac->clabel, M_RAIDFRAME);
3433 /* cleanup the config structure */
3434 free(ac, M_RAIDFRAME);
3435 /* "next.." */
3436 ac = next_ac;
3437 }
3438 /* and, finally, nuke the config set */
3439 free(cset, M_RAIDFRAME);
3440 }
3441
3442
3443 void
3444 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3445 {
3446 /* current version number */
3447 clabel->version = RF_COMPONENT_LABEL_VERSION;
3448 clabel->serial_number = raidPtr->serial_number;
3449 clabel->mod_counter = raidPtr->mod_counter;
3450
3451 clabel->num_rows = 1;
3452 clabel->num_columns = raidPtr->numCol;
3453 clabel->clean = RF_RAID_DIRTY; /* not clean */
3454 clabel->status = rf_ds_optimal; /* "It's good!" */
3455
3456 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3457 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3458 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3459
3460 clabel->blockSize = raidPtr->bytesPerSector;
3461 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3462
3463 /* XXX not portable */
3464 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3465 clabel->maxOutstanding = raidPtr->maxOutstanding;
3466 clabel->autoconfigure = raidPtr->autoconfigure;
3467 clabel->root_partition = raidPtr->root_partition;
3468 clabel->last_unit = raidPtr->raidid;
3469 clabel->config_order = raidPtr->config_order;
3470
3471 #ifndef RF_NO_PARITY_MAP
3472 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3473 #endif
3474 }
3475
3476 struct raid_softc *
3477 rf_auto_config_set(RF_ConfigSet_t *cset)
3478 {
3479 RF_Raid_t *raidPtr;
3480 RF_Config_t *config;
3481 int raidID;
3482 struct raid_softc *sc;
3483
3484 #ifdef DEBUG
3485 printf("RAID autoconfigure\n");
3486 #endif
3487
3488 /* 1. Create a config structure */
3489 config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO);
3490 if (config == NULL) {
3491 printf("%s: Out of mem - config!?!?\n", __func__);
3492 /* XXX do something more intelligent here. */
3493 return NULL;
3494 }
3495
3496 /*
3497 2. Figure out what RAID ID this one is supposed to live at
3498 See if we can get the same RAID dev that it was configured
3499 on last time..
3500 */
3501
3502 raidID = cset->ac->clabel->last_unit;
3503 for (sc = raidget(raidID, false); sc && sc->sc_r.valid != 0;
3504 sc = raidget(++raidID, false))
3505 continue;
3506 #ifdef DEBUG
3507 printf("Configuring raid%d:\n",raidID);
3508 #endif
3509
3510 if (sc == NULL)
3511 sc = raidget(raidID, true);
3512 if (sc == NULL) {
3513 printf("%s: Out of mem - softc!?!?\n", __func__);
3514 /* XXX do something more intelligent here. */
3515 free(config, M_RAIDFRAME);
3516 return NULL;
3517 }
3518
3519 raidPtr = &sc->sc_r;
3520
3521 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3522 raidPtr->softc = sc;
3523 raidPtr->raidid = raidID;
3524 raidPtr->openings = RAIDOUTSTANDING;
3525
3526 /* 3. Build the configuration structure */
3527 rf_create_configuration(cset->ac, config, raidPtr);
3528
3529 /* 4. Do the configuration */
3530 if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3531 raidinit(sc);
3532
3533 rf_markalldirty(raidPtr);
3534 raidPtr->autoconfigure = 1; /* XXX do this here? */
3535 switch (cset->ac->clabel->root_partition) {
3536 case 1: /* Force Root */
3537 case 2: /* Soft Root: root when boot partition part of raid */
3538 /*
3539 * everything configured just fine. Make a note
3540 * that this set is eligible to be root,
3541 * or forced to be root
3542 */
3543 cset->rootable = cset->ac->clabel->root_partition;
3544 /* XXX do this here? */
3545 raidPtr->root_partition = cset->rootable;
3546 break;
3547 default:
3548 break;
3549 }
3550 } else {
3551 raidput(sc);
3552 sc = NULL;
3553 }
3554
3555 /* 5. Cleanup */
3556 free(config, M_RAIDFRAME);
3557 return sc;
3558 }
3559
3560 void
3561 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3562 size_t xmin, size_t xmax)
3563 {
3564 int error;
3565
3566 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3567 pool_sethiwat(p, xmax);
3568 if ((error = pool_prime(p, xmin)) != 0)
3569 panic("%s: failed to prime pool: %d", __func__, error);
3570 pool_setlowat(p, xmin);
3571 }
3572
3573 /*
3574 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buffer queue
3575 * to see if there is IO pending and if that IO could possibly be done
3576 * for a given RAID set. Returns 0 if IO is waiting and can be done, 1
3577 * otherwise.
3578 *
3579 */
3580 int
3581 rf_buf_queue_check(RF_Raid_t *raidPtr)
3582 {
3583 struct raid_softc *rs;
3584 struct dk_softc *dksc;
3585
3586 rs = raidPtr->softc;
3587 dksc = &rs->sc_dksc;
3588
3589 if ((rs->sc_flags & RAIDF_INITED) == 0)
3590 return 1;
3591
3592 if (dk_strategy_pending(dksc) && raidPtr->openings > 0) {
3593 /* there is work to do */
3594 return 0;
3595 }
3596 /* default is nothing to do */
3597 return 1;
3598 }
3599
3600 int
3601 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3602 {
3603 uint64_t numsecs;
3604 unsigned secsize;
3605 int error;
3606
3607 error = getdisksize(vp, &numsecs, &secsize);
3608 if (error == 0) {
3609 diskPtr->blockSize = secsize;
3610 diskPtr->numBlocks = numsecs - rf_protectedSectors;
3611 diskPtr->partitionSize = numsecs;
3612 return 0;
3613 }
3614 return error;
3615 }
3616
3617 static int
3618 raid_match(device_t self, cfdata_t cfdata, void *aux)
3619 {
3620 return 1;
3621 }
3622
3623 static void
3624 raid_attach(device_t parent, device_t self, void *aux)
3625 {
3626 }
3627
3628
3629 static int
3630 raid_detach(device_t self, int flags)
3631 {
3632 int error;
3633 struct raid_softc *rs = raidsoftc(self);
3634
3635 if (rs == NULL)
3636 return ENXIO;
3637
3638 if ((error = raidlock(rs)) != 0)
3639 return (error);
3640
3641 error = raid_detach_unlocked(rs);
3642
3643 raidunlock(rs);
3644
3645 /* XXX raid can be referenced here */
3646
3647 if (error)
3648 return error;
3649
3650 /* Free the softc */
3651 raidput(rs);
3652
3653 return 0;
3654 }
3655
3656 static void
3657 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
3658 {
3659 struct dk_softc *dksc = &rs->sc_dksc;
3660 struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
3661
3662 memset(dg, 0, sizeof(*dg));
3663
3664 dg->dg_secperunit = raidPtr->totalSectors;
3665 dg->dg_secsize = raidPtr->bytesPerSector;
3666 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
3667 dg->dg_ntracks = 4 * raidPtr->numCol;
3668
3669 disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, NULL);
3670 }
3671
3672 /*
3673 * Get cache info for all the components (including spares).
3674 * Returns intersection of all the cache flags of all disks, or first
3675 * error if any encountered.
3676 * XXXfua feature flags can change as spares are added - lock down somehow
3677 */
3678 static int
3679 rf_get_component_caches(RF_Raid_t *raidPtr, int *data)
3680 {
3681 int c;
3682 int error;
3683 int dkwhole = 0, dkpart;
3684
3685 for (c = 0; c < raidPtr->numCol + raidPtr->numSpare; c++) {
3686 /*
3687 * Check any non-dead disk, even when currently being
3688 * reconstructed.
3689 */
3690 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)
3691 || raidPtr->Disks[c].status == rf_ds_reconstructing) {
3692 error = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp,
3693 DIOCGCACHE, &dkpart, FREAD, NOCRED);
3694 if (error) {
3695 if (error != ENODEV) {
3696 printf("raid%d: get cache for component %s failed\n",
3697 raidPtr->raidid,
3698 raidPtr->Disks[c].devname);
3699 }
3700
3701 return error;
3702 }
3703
3704 if (c == 0)
3705 dkwhole = dkpart;
3706 else
3707 dkwhole = DKCACHE_COMBINE(dkwhole, dkpart);
3708 }
3709 }
3710
3711 *data = dkwhole;
3712
3713 return 0;
3714 }
3715
3716 /*
3717 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3718 * We end up returning whatever error was returned by the first cache flush
3719 * that fails.
3720 */
3721
3722 int
3723 rf_sync_component_caches(RF_Raid_t *raidPtr)
3724 {
3725 int c, sparecol;
3726 int e,error;
3727 int force = 1;
3728
3729 error = 0;
3730 for (c = 0; c < raidPtr->numCol; c++) {
3731 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3732 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3733 &force, FWRITE, NOCRED);
3734 if (e) {
3735 if (e != ENODEV)
3736 printf("raid%d: cache flush to component %s failed.\n",
3737 raidPtr->raidid, raidPtr->Disks[c].devname);
3738 if (error == 0) {
3739 error = e;
3740 }
3741 }
3742 }
3743 }
3744
3745 for( c = 0; c < raidPtr->numSpare ; c++) {
3746 sparecol = raidPtr->numCol + c;
3747 /* Need to ensure that the reconstruct actually completed! */
3748 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3749 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3750 DIOCCACHESYNC, &force, FWRITE, NOCRED);
3751 if (e) {
3752 if (e != ENODEV)
3753 printf("raid%d: cache flush to component %s failed.\n",
3754 raidPtr->raidid, raidPtr->Disks[sparecol].devname);
3755 if (error == 0) {
3756 error = e;
3757 }
3758 }
3759 }
3760 }
3761 return error;
3762 }
3763
3764 /* Fill in info with the current status */
3765 void
3766 rf_check_recon_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3767 {
3768
3769 if (raidPtr->status != rf_rs_reconstructing) {
3770 info->total = 100;
3771 info->completed = 100;
3772 } else {
3773 info->total = raidPtr->reconControl->numRUsTotal;
3774 info->completed = raidPtr->reconControl->numRUsComplete;
3775 }
3776 info->remaining = info->total - info->completed;
3777 }
3778
3779 /* Fill in info with the current status */
3780 void
3781 rf_check_parityrewrite_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3782 {
3783
3784 if (raidPtr->parity_rewrite_in_progress == 1) {
3785 info->total = raidPtr->Layout.numStripe;
3786 info->completed = raidPtr->parity_rewrite_stripes_done;
3787 } else {
3788 info->completed = 100;
3789 info->total = 100;
3790 }
3791 info->remaining = info->total - info->completed;
3792 }
3793
3794 /* Fill in info with the current status */
3795 void
3796 rf_check_copyback_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3797 {
3798
3799 if (raidPtr->copyback_in_progress == 1) {
3800 info->total = raidPtr->Layout.numStripe;
3801 info->completed = raidPtr->copyback_stripes_done;
3802 info->remaining = info->total - info->completed;
3803 } else {
3804 info->remaining = 0;
3805 info->completed = 100;
3806 info->total = 100;
3807 }
3808 }
3809
3810 /* Fill in config with the current info */
3811 int
3812 rf_get_info(RF_Raid_t *raidPtr, RF_DeviceConfig_t *config)
3813 {
3814 int d, i, j;
3815
3816 if (!raidPtr->valid)
3817 return (ENODEV);
3818 config->cols = raidPtr->numCol;
3819 config->ndevs = raidPtr->numCol;
3820 if (config->ndevs >= RF_MAX_DISKS)
3821 return (ENOMEM);
3822 config->nspares = raidPtr->numSpare;
3823 if (config->nspares >= RF_MAX_DISKS)
3824 return (ENOMEM);
3825 config->maxqdepth = raidPtr->maxQueueDepth;
3826 d = 0;
3827 for (j = 0; j < config->cols; j++) {
3828 config->devs[d] = raidPtr->Disks[j];
3829 d++;
3830 }
3831 for (j = config->cols, i = 0; i < config->nspares; i++, j++) {
3832 config->spares[i] = raidPtr->Disks[j];
3833 if (config->spares[i].status == rf_ds_rebuilding_spare) {
3834 /* XXX: raidctl(8) expects to see this as a used spare */
3835 config->spares[i].status = rf_ds_used_spare;
3836 }
3837 }
3838 return 0;
3839 }
3840
3841 int
3842 rf_get_component_label(RF_Raid_t *raidPtr, void *data)
3843 {
3844 RF_ComponentLabel_t *clabel = (RF_ComponentLabel_t *)data;
3845 RF_ComponentLabel_t *raid_clabel;
3846 int column = clabel->column;
3847
3848 if ((column < 0) || (column >= raidPtr->numCol + raidPtr->numSpare))
3849 return EINVAL;
3850 raid_clabel = raidget_component_label(raidPtr, column);
3851 memcpy(clabel, raid_clabel, sizeof *clabel);
3852
3853 return 0;
3854 }
3855
3856 /*
3857 * Module interface
3858 */
3859
3860 MODULE(MODULE_CLASS_DRIVER, raid, "dk_subr,bufq_fcfs");
3861
3862 #ifdef _MODULE
3863 CFDRIVER_DECL(raid, DV_DISK, NULL);
3864 #endif
3865
3866 static int raid_modcmd(modcmd_t, void *);
3867 static int raid_modcmd_init(void);
3868 static int raid_modcmd_fini(void);
3869
3870 static int
3871 raid_modcmd(modcmd_t cmd, void *data)
3872 {
3873 int error;
3874
3875 error = 0;
3876 switch (cmd) {
3877 case MODULE_CMD_INIT:
3878 error = raid_modcmd_init();
3879 break;
3880 case MODULE_CMD_FINI:
3881 error = raid_modcmd_fini();
3882 break;
3883 default:
3884 error = ENOTTY;
3885 break;
3886 }
3887 return error;
3888 }
3889
3890 static int
3891 raid_modcmd_init(void)
3892 {
3893 int error;
3894 int bmajor, cmajor;
3895
3896 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
3897 mutex_enter(&raid_lock);
3898 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
3899 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
3900 rf_init_cond2(rf_sparet_wait_cv, "sparetw");
3901 rf_init_cond2(rf_sparet_resp_cv, "rfgst");
3902
3903 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
3904 #endif
3905
3906 bmajor = cmajor = -1;
3907 error = devsw_attach("raid", &raid_bdevsw, &bmajor,
3908 &raid_cdevsw, &cmajor);
3909 if (error != 0 && error != EEXIST) {
3910 aprint_error("%s: devsw_attach failed %d\n", __func__, error);
3911 mutex_exit(&raid_lock);
3912 return error;
3913 }
3914 #ifdef _MODULE
3915 error = config_cfdriver_attach(&raid_cd);
3916 if (error != 0) {
3917 aprint_error("%s: config_cfdriver_attach failed %d\n",
3918 __func__, error);
3919 devsw_detach(&raid_bdevsw, &raid_cdevsw);
3920 mutex_exit(&raid_lock);
3921 return error;
3922 }
3923 #endif
3924 error = config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3925 if (error != 0) {
3926 aprint_error("%s: config_cfattach_attach failed %d\n",
3927 __func__, error);
3928 #ifdef _MODULE
3929 config_cfdriver_detach(&raid_cd);
3930 #endif
3931 devsw_detach(&raid_bdevsw, &raid_cdevsw);
3932 mutex_exit(&raid_lock);
3933 return error;
3934 }
3935
3936 raidautoconfigdone = false;
3937
3938 mutex_exit(&raid_lock);
3939
3940 if (error == 0) {
3941 if (rf_BootRaidframe(true) == 0)
3942 aprint_verbose("Kernelized RAIDframe activated\n");
3943 else
3944 panic("Serious error activating RAID!!");
3945 }
3946
3947 /*
3948 * Register a finalizer which will be used to auto-config RAID
3949 * sets once all real hardware devices have been found.
3950 */
3951 error = config_finalize_register(NULL, rf_autoconfig);
3952 if (error != 0) {
3953 aprint_error("WARNING: unable to register RAIDframe "
3954 "finalizer\n");
3955 error = 0;
3956 }
3957
3958 return error;
3959 }
3960
3961 static int
3962 raid_modcmd_fini(void)
3963 {
3964 int error;
3965
3966 mutex_enter(&raid_lock);
3967
3968 /* Don't allow unload if raid device(s) exist. */
3969 if (!LIST_EMPTY(&raids)) {
3970 mutex_exit(&raid_lock);
3971 return EBUSY;
3972 }
3973
3974 error = config_cfattach_detach(raid_cd.cd_name, &raid_ca);
3975 if (error != 0) {
3976 aprint_error("%s: cannot detach cfattach\n",__func__);
3977 mutex_exit(&raid_lock);
3978 return error;
3979 }
3980 #ifdef _MODULE
3981 error = config_cfdriver_detach(&raid_cd);
3982 if (error != 0) {
3983 aprint_error("%s: cannot detach cfdriver\n",__func__);
3984 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3985 mutex_exit(&raid_lock);
3986 return error;
3987 }
3988 #endif
3989 error = devsw_detach(&raid_bdevsw, &raid_cdevsw);
3990 if (error != 0) {
3991 aprint_error("%s: cannot detach devsw\n",__func__);
3992 #ifdef _MODULE
3993 config_cfdriver_attach(&raid_cd);
3994 #endif
3995 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3996 mutex_exit(&raid_lock);
3997 return error;
3998 }
3999 rf_BootRaidframe(false);
4000 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
4001 rf_destroy_mutex2(rf_sparet_wait_mutex);
4002 rf_destroy_cond2(rf_sparet_wait_cv);
4003 rf_destroy_cond2(rf_sparet_resp_cv);
4004 #endif
4005 mutex_exit(&raid_lock);
4006 mutex_destroy(&raid_lock);
4007
4008 return error;
4009 }
4010