rf_netbsdkintf.c revision 1.397 1 /* $NetBSD: rf_netbsdkintf.c,v 1.397 2021/07/26 22:50:36 oster Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: cd.c 1.6 90/11/28$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
89 * School of Computer Science
90 * Carnegie Mellon University
91 * Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97 /***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.397 2021/07/26 22:50:36 oster Exp $");
105
106 #ifdef _KERNEL_OPT
107 #include "opt_raid_autoconfig.h"
108 #include "opt_compat_netbsd32.h"
109 #endif
110
111 #include <sys/param.h>
112 #include <sys/errno.h>
113 #include <sys/pool.h>
114 #include <sys/proc.h>
115 #include <sys/queue.h>
116 #include <sys/disk.h>
117 #include <sys/device.h>
118 #include <sys/stat.h>
119 #include <sys/ioctl.h>
120 #include <sys/fcntl.h>
121 #include <sys/systm.h>
122 #include <sys/vnode.h>
123 #include <sys/disklabel.h>
124 #include <sys/conf.h>
125 #include <sys/buf.h>
126 #include <sys/bufq.h>
127 #include <sys/reboot.h>
128 #include <sys/kauth.h>
129 #include <sys/module.h>
130 #include <sys/compat_stub.h>
131
132 #include <prop/proplib.h>
133
134 #include <dev/raidframe/raidframevar.h>
135 #include <dev/raidframe/raidframeio.h>
136 #include <dev/raidframe/rf_paritymap.h>
137
138 #include "rf_raid.h"
139 #include "rf_copyback.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_desc.h"
143 #include "rf_diskqueue.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_kintf.h"
147 #include "rf_options.h"
148 #include "rf_driver.h"
149 #include "rf_parityscan.h"
150 #include "rf_threadstuff.h"
151
152 #include "ioconf.h"
153
154 #ifdef DEBUG
155 int rf_kdebug_level = 0;
156 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
157 #else /* DEBUG */
158 #define db1_printf(a) { }
159 #endif /* DEBUG */
160
161 #ifdef DEBUG_ROOT
162 #define DPRINTF(a, ...) printf(a, __VA_ARGS__)
163 #else
164 #define DPRINTF(a, ...)
165 #endif
166
167 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
168 static rf_declare_mutex2(rf_sparet_wait_mutex);
169 static rf_declare_cond2(rf_sparet_wait_cv);
170 static rf_declare_cond2(rf_sparet_resp_cv);
171
172 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
173 * spare table */
174 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
175 * installation process */
176 #endif
177
178 const int rf_b_pass = (B_PHYS|B_RAW|B_MEDIA_FLAGS);
179
180 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
181
182 /* prototypes */
183 static void KernelWakeupFunc(struct buf *);
184 static void InitBP(struct buf *, struct vnode *, unsigned,
185 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
186 void *, int);
187 static void raidinit(struct raid_softc *);
188 static int raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp);
189 static int rf_get_component_caches(RF_Raid_t *raidPtr, int *);
190
191 static int raid_match(device_t, cfdata_t, void *);
192 static void raid_attach(device_t, device_t, void *);
193 static int raid_detach(device_t, int);
194
195 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
196 daddr_t, daddr_t);
197 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
198 daddr_t, daddr_t, int);
199
200 static int raidwrite_component_label(unsigned,
201 dev_t, struct vnode *, RF_ComponentLabel_t *);
202 static int raidread_component_label(unsigned,
203 dev_t, struct vnode *, RF_ComponentLabel_t *);
204
205 static int raid_diskstart(device_t, struct buf *bp);
206 static int raid_dumpblocks(device_t, void *, daddr_t, int);
207 static int raid_lastclose(device_t);
208
209 static dev_type_open(raidopen);
210 static dev_type_close(raidclose);
211 static dev_type_read(raidread);
212 static dev_type_write(raidwrite);
213 static dev_type_ioctl(raidioctl);
214 static dev_type_strategy(raidstrategy);
215 static dev_type_dump(raiddump);
216 static dev_type_size(raidsize);
217
218 const struct bdevsw raid_bdevsw = {
219 .d_open = raidopen,
220 .d_close = raidclose,
221 .d_strategy = raidstrategy,
222 .d_ioctl = raidioctl,
223 .d_dump = raiddump,
224 .d_psize = raidsize,
225 .d_discard = nodiscard,
226 .d_flag = D_DISK
227 };
228
229 const struct cdevsw raid_cdevsw = {
230 .d_open = raidopen,
231 .d_close = raidclose,
232 .d_read = raidread,
233 .d_write = raidwrite,
234 .d_ioctl = raidioctl,
235 .d_stop = nostop,
236 .d_tty = notty,
237 .d_poll = nopoll,
238 .d_mmap = nommap,
239 .d_kqfilter = nokqfilter,
240 .d_discard = nodiscard,
241 .d_flag = D_DISK
242 };
243
244 static struct dkdriver rf_dkdriver = {
245 .d_open = raidopen,
246 .d_close = raidclose,
247 .d_strategy = raidstrategy,
248 .d_diskstart = raid_diskstart,
249 .d_dumpblocks = raid_dumpblocks,
250 .d_lastclose = raid_lastclose,
251 .d_minphys = minphys
252 };
253
254 #define raidunit(x) DISKUNIT(x)
255 #define raidsoftc(dev) (((struct raid_softc *)device_private(dev))->sc_r.softc)
256
257 extern struct cfdriver raid_cd;
258 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
259 raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
260 DVF_DETACH_SHUTDOWN);
261
262 /* Internal representation of a rf_recon_req */
263 struct rf_recon_req_internal {
264 RF_RowCol_t col;
265 RF_ReconReqFlags_t flags;
266 void *raidPtr;
267 };
268
269 /*
270 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
271 * Be aware that large numbers can allow the driver to consume a lot of
272 * kernel memory, especially on writes, and in degraded mode reads.
273 *
274 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
275 * a single 64K write will typically require 64K for the old data,
276 * 64K for the old parity, and 64K for the new parity, for a total
277 * of 192K (if the parity buffer is not re-used immediately).
278 * Even it if is used immediately, that's still 128K, which when multiplied
279 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
280 *
281 * Now in degraded mode, for example, a 64K read on the above setup may
282 * require data reconstruction, which will require *all* of the 4 remaining
283 * disks to participate -- 4 * 32K/disk == 128K again.
284 */
285
286 #ifndef RAIDOUTSTANDING
287 #define RAIDOUTSTANDING 6
288 #endif
289
290 #define RAIDLABELDEV(dev) \
291 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
292
293 /* declared here, and made public, for the benefit of KVM stuff.. */
294
295 static int raidlock(struct raid_softc *);
296 static void raidunlock(struct raid_softc *);
297
298 static int raid_detach_unlocked(struct raid_softc *);
299
300 static void rf_markalldirty(RF_Raid_t *);
301 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
302
303 static void rf_ReconThread(struct rf_recon_req_internal *);
304 static void rf_RewriteParityThread(RF_Raid_t *raidPtr);
305 static void rf_CopybackThread(RF_Raid_t *raidPtr);
306 static void rf_ReconstructInPlaceThread(struct rf_recon_req_internal *);
307 static int rf_autoconfig(device_t);
308 static void rf_buildroothack(RF_ConfigSet_t *);
309
310 static RF_AutoConfig_t *rf_find_raid_components(void);
311 static RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
312 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
313 static void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
314 static int rf_set_autoconfig(RF_Raid_t *, int);
315 static int rf_set_rootpartition(RF_Raid_t *, int);
316 static void rf_release_all_vps(RF_ConfigSet_t *);
317 static void rf_cleanup_config_set(RF_ConfigSet_t *);
318 static int rf_have_enough_components(RF_ConfigSet_t *);
319 static struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
320 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
321
322 /*
323 * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
324 * Note that this is overridden by having RAID_AUTOCONFIG as an option
325 * in the kernel config file.
326 */
327 #ifdef RAID_AUTOCONFIG
328 int raidautoconfig = 1;
329 #else
330 int raidautoconfig = 0;
331 #endif
332 static bool raidautoconfigdone = false;
333
334 struct pool rf_alloclist_pool; /* AllocList */
335
336 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
337 static kmutex_t raid_lock;
338
339 static struct raid_softc *
340 raidcreate(int unit) {
341 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
342 sc->sc_unit = unit;
343 cv_init(&sc->sc_cv, "raidunit");
344 mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_NONE);
345 return sc;
346 }
347
348 static void
349 raiddestroy(struct raid_softc *sc) {
350 cv_destroy(&sc->sc_cv);
351 mutex_destroy(&sc->sc_mutex);
352 kmem_free(sc, sizeof(*sc));
353 }
354
355 static struct raid_softc *
356 raidget(int unit, bool create) {
357 struct raid_softc *sc;
358 if (unit < 0) {
359 #ifdef DIAGNOSTIC
360 panic("%s: unit %d!", __func__, unit);
361 #endif
362 return NULL;
363 }
364 mutex_enter(&raid_lock);
365 LIST_FOREACH(sc, &raids, sc_link) {
366 if (sc->sc_unit == unit) {
367 mutex_exit(&raid_lock);
368 return sc;
369 }
370 }
371 mutex_exit(&raid_lock);
372 if (!create)
373 return NULL;
374 sc = raidcreate(unit);
375 mutex_enter(&raid_lock);
376 LIST_INSERT_HEAD(&raids, sc, sc_link);
377 mutex_exit(&raid_lock);
378 return sc;
379 }
380
381 static void
382 raidput(struct raid_softc *sc) {
383 mutex_enter(&raid_lock);
384 LIST_REMOVE(sc, sc_link);
385 mutex_exit(&raid_lock);
386 raiddestroy(sc);
387 }
388
389 void
390 raidattach(int num)
391 {
392
393 /*
394 * Device attachment and associated initialization now occurs
395 * as part of the module initialization.
396 */
397 }
398
399 static int
400 rf_autoconfig(device_t self)
401 {
402 RF_AutoConfig_t *ac_list;
403 RF_ConfigSet_t *config_sets;
404
405 if (!raidautoconfig || raidautoconfigdone == true)
406 return 0;
407
408 /* XXX This code can only be run once. */
409 raidautoconfigdone = true;
410
411 #ifdef __HAVE_CPU_BOOTCONF
412 /*
413 * 0. find the boot device if needed first so we can use it later
414 * this needs to be done before we autoconfigure any raid sets,
415 * because if we use wedges we are not going to be able to open
416 * the boot device later
417 */
418 if (booted_device == NULL)
419 cpu_bootconf();
420 #endif
421 /* 1. locate all RAID components on the system */
422 aprint_debug("Searching for RAID components...\n");
423 ac_list = rf_find_raid_components();
424
425 /* 2. Sort them into their respective sets. */
426 config_sets = rf_create_auto_sets(ac_list);
427
428 /*
429 * 3. Evaluate each set and configure the valid ones.
430 * This gets done in rf_buildroothack().
431 */
432 rf_buildroothack(config_sets);
433
434 return 1;
435 }
436
437 int
438 rf_inited(const struct raid_softc *rs) {
439 return (rs->sc_flags & RAIDF_INITED) != 0;
440 }
441
442 RF_Raid_t *
443 rf_get_raid(struct raid_softc *rs) {
444 return &rs->sc_r;
445 }
446
447 int
448 rf_get_unit(const struct raid_softc *rs) {
449 return rs->sc_unit;
450 }
451
452 static int
453 rf_containsboot(RF_Raid_t *r, device_t bdv) {
454 const char *bootname;
455 size_t len;
456
457 /* if bdv is NULL, the set can't contain it. exit early. */
458 if (bdv == NULL)
459 return 0;
460
461 bootname = device_xname(bdv);
462 len = strlen(bootname);
463
464 for (int col = 0; col < r->numCol; col++) {
465 const char *devname = r->Disks[col].devname;
466 devname += sizeof("/dev/") - 1;
467 if (strncmp(devname, "dk", 2) == 0) {
468 const char *parent =
469 dkwedge_get_parent_name(r->Disks[col].dev);
470 if (parent != NULL)
471 devname = parent;
472 }
473 if (strncmp(devname, bootname, len) == 0) {
474 struct raid_softc *sc = r->softc;
475 aprint_debug("raid%d includes boot device %s\n",
476 sc->sc_unit, devname);
477 return 1;
478 }
479 }
480 return 0;
481 }
482
483 static void
484 rf_buildroothack(RF_ConfigSet_t *config_sets)
485 {
486 RF_AutoConfig_t *ac_list;
487 RF_ConfigSet_t *cset;
488 RF_ConfigSet_t *next_cset;
489 int num_root;
490 int raid_added;
491 struct raid_softc *sc, *rsc;
492 struct dk_softc *dksc = NULL; /* XXX gcc -Os: may be used uninit. */
493
494 sc = rsc = NULL;
495 num_root = 0;
496
497 raid_added = 1;
498 while (raid_added > 0) {
499 raid_added = 0;
500 cset = config_sets;
501 while (cset != NULL) {
502 next_cset = cset->next;
503 if (rf_have_enough_components(cset) &&
504 cset->ac->clabel->autoconfigure == 1) {
505 sc = rf_auto_config_set(cset);
506 if (sc != NULL) {
507 aprint_debug("raid%d: configured ok, rootable %d\n",
508 sc->sc_unit, cset->rootable);
509 /* We added one RAID set */
510 raid_added++;
511 if (cset->rootable) {
512 rsc = sc;
513 num_root++;
514 }
515 } else {
516 /* The autoconfig didn't work :( */
517 aprint_debug("Autoconfig failed\n");
518 rf_release_all_vps(cset);
519 }
520 } else {
521 /* we're not autoconfiguring this set...
522 release the associated resources */
523 rf_release_all_vps(cset);
524 }
525 /* cleanup */
526 rf_cleanup_config_set(cset);
527 cset = next_cset;
528 }
529 if (raid_added > 0) {
530 /* We added at least one RAID set, so re-scan for recursive RAID */
531 ac_list = rf_find_raid_components();
532 config_sets = rf_create_auto_sets(ac_list);
533 }
534 }
535
536 /* if the user has specified what the root device should be
537 then we don't touch booted_device or boothowto... */
538
539 if (rootspec != NULL) {
540 DPRINTF("%s: rootspec %s\n", __func__, rootspec);
541 return;
542 }
543
544 /* we found something bootable... */
545
546 /*
547 * XXX: The following code assumes that the root raid
548 * is the first ('a') partition. This is about the best
549 * we can do with a BSD disklabel, but we might be able
550 * to do better with a GPT label, by setting a specified
551 * attribute to indicate the root partition. We can then
552 * stash the partition number in the r->root_partition
553 * high bits (the bottom 2 bits are already used). For
554 * now we just set booted_partition to 0 when we override
555 * root.
556 */
557 if (num_root == 1) {
558 device_t candidate_root;
559 dksc = &rsc->sc_dksc;
560 if (dksc->sc_dkdev.dk_nwedges != 0) {
561 char cname[sizeof(cset->ac->devname)];
562 /* XXX: assume partition 'a' first */
563 snprintf(cname, sizeof(cname), "%s%c",
564 device_xname(dksc->sc_dev), 'a');
565 candidate_root = dkwedge_find_by_wname(cname);
566 DPRINTF("%s: candidate wedge root=%s\n", __func__,
567 cname);
568 if (candidate_root == NULL) {
569 /*
570 * If that is not found, because we don't use
571 * disklabel, return the first dk child
572 * XXX: we can skip the 'a' check above
573 * and always do this...
574 */
575 size_t i = 0;
576 candidate_root = dkwedge_find_by_parent(
577 device_xname(dksc->sc_dev), &i);
578 }
579 DPRINTF("%s: candidate wedge root=%p\n", __func__,
580 candidate_root);
581 } else
582 candidate_root = dksc->sc_dev;
583 DPRINTF("%s: candidate root=%p\n", __func__, candidate_root);
584 DPRINTF("%s: booted_device=%p root_partition=%d "
585 "contains_boot=%d",
586 __func__, booted_device, rsc->sc_r.root_partition,
587 rf_containsboot(&rsc->sc_r, booted_device));
588 /* XXX the check for booted_device == NULL can probably be
589 * dropped, now that rf_containsboot handles that case.
590 */
591 if (booted_device == NULL ||
592 rsc->sc_r.root_partition == 1 ||
593 rf_containsboot(&rsc->sc_r, booted_device)) {
594 booted_device = candidate_root;
595 booted_method = "raidframe/single";
596 booted_partition = 0; /* XXX assume 'a' */
597 DPRINTF("%s: set booted_device=%s(%p)\n", __func__,
598 device_xname(booted_device), booted_device);
599 }
600 } else if (num_root > 1) {
601 DPRINTF("%s: many roots=%d, %p\n", __func__, num_root,
602 booted_device);
603
604 /*
605 * Maybe the MD code can help. If it cannot, then
606 * setroot() will discover that we have no
607 * booted_device and will ask the user if nothing was
608 * hardwired in the kernel config file
609 */
610 if (booted_device == NULL)
611 return;
612
613 num_root = 0;
614 mutex_enter(&raid_lock);
615 LIST_FOREACH(sc, &raids, sc_link) {
616 RF_Raid_t *r = &sc->sc_r;
617 if (r->valid == 0)
618 continue;
619
620 if (r->root_partition == 0)
621 continue;
622
623 if (rf_containsboot(r, booted_device)) {
624 num_root++;
625 rsc = sc;
626 dksc = &rsc->sc_dksc;
627 }
628 }
629 mutex_exit(&raid_lock);
630
631 if (num_root == 1) {
632 booted_device = dksc->sc_dev;
633 booted_method = "raidframe/multi";
634 booted_partition = 0; /* XXX assume 'a' */
635 } else {
636 /* we can't guess.. require the user to answer... */
637 boothowto |= RB_ASKNAME;
638 }
639 }
640 }
641
642 static int
643 raidsize(dev_t dev)
644 {
645 struct raid_softc *rs;
646 struct dk_softc *dksc;
647 unsigned int unit;
648
649 unit = raidunit(dev);
650 if ((rs = raidget(unit, false)) == NULL)
651 return -1;
652 dksc = &rs->sc_dksc;
653
654 if ((rs->sc_flags & RAIDF_INITED) == 0)
655 return -1;
656
657 return dk_size(dksc, dev);
658 }
659
660 static int
661 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
662 {
663 unsigned int unit;
664 struct raid_softc *rs;
665 struct dk_softc *dksc;
666
667 unit = raidunit(dev);
668 if ((rs = raidget(unit, false)) == NULL)
669 return ENXIO;
670 dksc = &rs->sc_dksc;
671
672 if ((rs->sc_flags & RAIDF_INITED) == 0)
673 return ENODEV;
674
675 /*
676 Note that blkno is relative to this particular partition.
677 By adding adding RF_PROTECTED_SECTORS, we get a value that
678 is relative to the partition used for the underlying component.
679 */
680 blkno += RF_PROTECTED_SECTORS;
681
682 return dk_dump(dksc, dev, blkno, va, size, DK_DUMP_RECURSIVE);
683 }
684
685 static int
686 raid_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk)
687 {
688 struct raid_softc *rs = raidsoftc(dev);
689 const struct bdevsw *bdev;
690 RF_Raid_t *raidPtr;
691 int c, sparecol, j, scol, dumpto;
692 int error = 0;
693
694 raidPtr = &rs->sc_r;
695
696 /* we only support dumping to RAID 1 sets */
697 if (raidPtr->Layout.numDataCol != 1 ||
698 raidPtr->Layout.numParityCol != 1)
699 return EINVAL;
700
701 if ((error = raidlock(rs)) != 0)
702 return error;
703
704 /* figure out what device is alive.. */
705
706 /*
707 Look for a component to dump to. The preference for the
708 component to dump to is as follows:
709 1) the first component
710 2) a used_spare of the first component
711 3) the second component
712 4) a used_spare of the second component
713 */
714
715 dumpto = -1;
716 for (c = 0; c < raidPtr->numCol; c++) {
717 if (raidPtr->Disks[c].status == rf_ds_optimal) {
718 /* this might be the one */
719 dumpto = c;
720 break;
721 }
722 }
723
724 /*
725 At this point we have possibly selected a live component.
726 If we didn't find a live ocmponent, we now check to see
727 if there is a relevant spared component.
728 */
729
730 for (c = 0; c < raidPtr->numSpare; c++) {
731 sparecol = raidPtr->numCol + c;
732 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
733 /* How about this one? */
734 scol = -1;
735 for(j=0;j<raidPtr->numCol;j++) {
736 if (raidPtr->Disks[j].spareCol == sparecol) {
737 scol = j;
738 break;
739 }
740 }
741 if (scol == 0) {
742 /*
743 We must have found a spared first
744 component! We'll take that over
745 anything else found so far. (We
746 couldn't have found a real first
747 component before, since this is a
748 used spare, and it's saying that
749 it's replacing the first
750 component.) On reboot (with
751 autoconfiguration turned on)
752 sparecol will become the first
753 component (component0) of this set.
754 */
755 dumpto = sparecol;
756 break;
757 } else if (scol != -1) {
758 /*
759 Must be a spared second component.
760 We'll dump to that if we havn't found
761 anything else so far.
762 */
763 if (dumpto == -1)
764 dumpto = sparecol;
765 }
766 }
767 }
768
769 if (dumpto == -1) {
770 /* we couldn't find any live components to dump to!?!?
771 */
772 error = EINVAL;
773 goto out;
774 }
775
776 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
777 if (bdev == NULL) {
778 error = ENXIO;
779 goto out;
780 }
781
782 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
783 blkno, va, nblk * raidPtr->bytesPerSector);
784
785 out:
786 raidunlock(rs);
787
788 return error;
789 }
790
791 /* ARGSUSED */
792 static int
793 raidopen(dev_t dev, int flags, int fmt,
794 struct lwp *l)
795 {
796 int unit = raidunit(dev);
797 struct raid_softc *rs;
798 struct dk_softc *dksc;
799 int error = 0;
800 int part, pmask;
801
802 if ((rs = raidget(unit, true)) == NULL)
803 return ENXIO;
804 if ((error = raidlock(rs)) != 0)
805 return error;
806
807 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
808 error = EBUSY;
809 goto bad;
810 }
811
812 dksc = &rs->sc_dksc;
813
814 part = DISKPART(dev);
815 pmask = (1 << part);
816
817 if (!DK_BUSY(dksc, pmask) &&
818 ((rs->sc_flags & RAIDF_INITED) != 0)) {
819 /* First one... mark things as dirty... Note that we *MUST*
820 have done a configure before this. I DO NOT WANT TO BE
821 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
822 THAT THEY BELONG TOGETHER!!!!! */
823 /* XXX should check to see if we're only open for reading
824 here... If so, we needn't do this, but then need some
825 other way of keeping track of what's happened.. */
826
827 rf_markalldirty(&rs->sc_r);
828 }
829
830 if ((rs->sc_flags & RAIDF_INITED) != 0)
831 error = dk_open(dksc, dev, flags, fmt, l);
832
833 bad:
834 raidunlock(rs);
835
836 return error;
837
838
839 }
840
841 static int
842 raid_lastclose(device_t self)
843 {
844 struct raid_softc *rs = raidsoftc(self);
845
846 /* Last one... device is not unconfigured yet.
847 Device shutdown has taken care of setting the
848 clean bits if RAIDF_INITED is not set
849 mark things as clean... */
850
851 rf_update_component_labels(&rs->sc_r,
852 RF_FINAL_COMPONENT_UPDATE);
853
854 /* pass to unlocked code */
855 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
856 rs->sc_flags |= RAIDF_DETACH;
857
858 return 0;
859 }
860
861 /* ARGSUSED */
862 static int
863 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
864 {
865 int unit = raidunit(dev);
866 struct raid_softc *rs;
867 struct dk_softc *dksc;
868 cfdata_t cf;
869 int error = 0, do_detach = 0, do_put = 0;
870
871 if ((rs = raidget(unit, false)) == NULL)
872 return ENXIO;
873 dksc = &rs->sc_dksc;
874
875 if ((error = raidlock(rs)) != 0)
876 return error;
877
878 if ((rs->sc_flags & RAIDF_INITED) != 0) {
879 error = dk_close(dksc, dev, flags, fmt, l);
880 if ((rs->sc_flags & RAIDF_DETACH) != 0)
881 do_detach = 1;
882 } else if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
883 do_put = 1;
884
885 raidunlock(rs);
886
887 if (do_detach) {
888 /* free the pseudo device attach bits */
889 cf = device_cfdata(dksc->sc_dev);
890 error = config_detach(dksc->sc_dev, 0);
891 if (error == 0)
892 free(cf, M_RAIDFRAME);
893 } else if (do_put) {
894 raidput(rs);
895 }
896
897 return error;
898
899 }
900
901 static void
902 raid_wakeup(RF_Raid_t *raidPtr)
903 {
904 rf_lock_mutex2(raidPtr->iodone_lock);
905 rf_signal_cond2(raidPtr->iodone_cv);
906 rf_unlock_mutex2(raidPtr->iodone_lock);
907 }
908
909 static void
910 raidstrategy(struct buf *bp)
911 {
912 unsigned int unit;
913 struct raid_softc *rs;
914 struct dk_softc *dksc;
915 RF_Raid_t *raidPtr;
916
917 unit = raidunit(bp->b_dev);
918 if ((rs = raidget(unit, false)) == NULL) {
919 bp->b_error = ENXIO;
920 goto fail;
921 }
922 if ((rs->sc_flags & RAIDF_INITED) == 0) {
923 bp->b_error = ENXIO;
924 goto fail;
925 }
926 dksc = &rs->sc_dksc;
927 raidPtr = &rs->sc_r;
928
929 /* Queue IO only */
930 if (dk_strategy_defer(dksc, bp))
931 goto done;
932
933 /* schedule the IO to happen at the next convenient time */
934 raid_wakeup(raidPtr);
935
936 done:
937 return;
938
939 fail:
940 bp->b_resid = bp->b_bcount;
941 biodone(bp);
942 }
943
944 static int
945 raid_diskstart(device_t dev, struct buf *bp)
946 {
947 struct raid_softc *rs = raidsoftc(dev);
948 RF_Raid_t *raidPtr;
949
950 raidPtr = &rs->sc_r;
951 if (!raidPtr->valid) {
952 db1_printf(("raid is not valid..\n"));
953 return ENODEV;
954 }
955
956 /* XXX */
957 bp->b_resid = 0;
958
959 return raiddoaccess(raidPtr, bp);
960 }
961
962 void
963 raiddone(RF_Raid_t *raidPtr, struct buf *bp)
964 {
965 struct raid_softc *rs;
966 struct dk_softc *dksc;
967
968 rs = raidPtr->softc;
969 dksc = &rs->sc_dksc;
970
971 dk_done(dksc, bp);
972
973 rf_lock_mutex2(raidPtr->mutex);
974 raidPtr->openings++;
975 rf_unlock_mutex2(raidPtr->mutex);
976
977 /* schedule more IO */
978 raid_wakeup(raidPtr);
979 }
980
981 /* ARGSUSED */
982 static int
983 raidread(dev_t dev, struct uio *uio, int flags)
984 {
985 int unit = raidunit(dev);
986 struct raid_softc *rs;
987
988 if ((rs = raidget(unit, false)) == NULL)
989 return ENXIO;
990
991 if ((rs->sc_flags & RAIDF_INITED) == 0)
992 return ENXIO;
993
994 return physio(raidstrategy, NULL, dev, B_READ, minphys, uio);
995
996 }
997
998 /* ARGSUSED */
999 static int
1000 raidwrite(dev_t dev, struct uio *uio, int flags)
1001 {
1002 int unit = raidunit(dev);
1003 struct raid_softc *rs;
1004
1005 if ((rs = raidget(unit, false)) == NULL)
1006 return ENXIO;
1007
1008 if ((rs->sc_flags & RAIDF_INITED) == 0)
1009 return ENXIO;
1010
1011 return physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio);
1012
1013 }
1014
1015 static int
1016 raid_detach_unlocked(struct raid_softc *rs)
1017 {
1018 struct dk_softc *dksc = &rs->sc_dksc;
1019 RF_Raid_t *raidPtr;
1020 int error;
1021
1022 raidPtr = &rs->sc_r;
1023
1024 if (DK_BUSY(dksc, 0) ||
1025 raidPtr->recon_in_progress != 0 ||
1026 raidPtr->parity_rewrite_in_progress != 0 ||
1027 raidPtr->copyback_in_progress != 0)
1028 return EBUSY;
1029
1030 if ((rs->sc_flags & RAIDF_INITED) == 0)
1031 return 0;
1032
1033 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1034
1035 if ((error = rf_Shutdown(raidPtr)) != 0)
1036 return error;
1037
1038 rs->sc_flags &= ~RAIDF_INITED;
1039
1040 /* Kill off any queued buffers */
1041 dk_drain(dksc);
1042 bufq_free(dksc->sc_bufq);
1043
1044 /* Detach the disk. */
1045 dkwedge_delall(&dksc->sc_dkdev);
1046 disk_detach(&dksc->sc_dkdev);
1047 disk_destroy(&dksc->sc_dkdev);
1048 dk_detach(dksc);
1049
1050 return 0;
1051 }
1052
1053 static bool
1054 rf_must_be_initialized(const struct raid_softc *rs, u_long cmd)
1055 {
1056 switch (cmd) {
1057 case RAIDFRAME_ADD_HOT_SPARE:
1058 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1059 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1060 case RAIDFRAME_CHECK_PARITY:
1061 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1062 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1063 case RAIDFRAME_CHECK_RECON_STATUS:
1064 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1065 case RAIDFRAME_COPYBACK:
1066 case RAIDFRAME_DELETE_COMPONENT:
1067 case RAIDFRAME_FAIL_DISK:
1068 case RAIDFRAME_GET_ACCTOTALS:
1069 case RAIDFRAME_GET_COMPONENT_LABEL:
1070 case RAIDFRAME_GET_INFO:
1071 case RAIDFRAME_GET_SIZE:
1072 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1073 case RAIDFRAME_INIT_LABELS:
1074 case RAIDFRAME_KEEP_ACCTOTALS:
1075 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1076 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1077 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1078 case RAIDFRAME_PARITYMAP_STATUS:
1079 case RAIDFRAME_REBUILD_IN_PLACE:
1080 case RAIDFRAME_REMOVE_HOT_SPARE:
1081 case RAIDFRAME_RESET_ACCTOTALS:
1082 case RAIDFRAME_REWRITEPARITY:
1083 case RAIDFRAME_SET_AUTOCONFIG:
1084 case RAIDFRAME_SET_COMPONENT_LABEL:
1085 case RAIDFRAME_SET_ROOT:
1086 return (rs->sc_flags & RAIDF_INITED) == 0;
1087 }
1088 return false;
1089 }
1090
1091 int
1092 rf_fail_disk(RF_Raid_t *raidPtr, struct rf_recon_req *rr)
1093 {
1094 struct rf_recon_req_internal *rrint;
1095
1096 if (raidPtr->Layout.map->faultsTolerated == 0) {
1097 /* Can't do this on a RAID 0!! */
1098 return EINVAL;
1099 }
1100
1101 if (rr->col < 0 || rr->col >= raidPtr->numCol) {
1102 /* bad column */
1103 return EINVAL;
1104 }
1105
1106 rf_lock_mutex2(raidPtr->mutex);
1107 if (raidPtr->status == rf_rs_reconstructing) {
1108 /* you can't fail a disk while we're reconstructing! */
1109 /* XXX wrong for RAID6 */
1110 goto out;
1111 }
1112 if ((raidPtr->Disks[rr->col].status == rf_ds_optimal) &&
1113 (raidPtr->numFailures > 0)) {
1114 /* some other component has failed. Let's not make
1115 things worse. XXX wrong for RAID6 */
1116 goto out;
1117 }
1118 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1119 /* Can't fail a spared disk! */
1120 goto out;
1121 }
1122 rf_unlock_mutex2(raidPtr->mutex);
1123
1124 /* make a copy of the recon request so that we don't rely on
1125 * the user's buffer */
1126 rrint = RF_Malloc(sizeof(*rrint));
1127 if (rrint == NULL)
1128 return(ENOMEM);
1129 rrint->col = rr->col;
1130 rrint->flags = rr->flags;
1131 rrint->raidPtr = raidPtr;
1132
1133 return RF_CREATE_THREAD(raidPtr->recon_thread, rf_ReconThread,
1134 rrint, "raid_recon");
1135 out:
1136 rf_unlock_mutex2(raidPtr->mutex);
1137 return EINVAL;
1138 }
1139
1140 static int
1141 rf_copyinspecificbuf(RF_Config_t *k_cfg)
1142 {
1143 /* allocate a buffer for the layout-specific data, and copy it in */
1144 if (k_cfg->layoutSpecificSize == 0)
1145 return 0;
1146
1147 if (k_cfg->layoutSpecificSize > 10000) {
1148 /* sanity check */
1149 return EINVAL;
1150 }
1151
1152 u_char *specific_buf;
1153 specific_buf = RF_Malloc(k_cfg->layoutSpecificSize);
1154 if (specific_buf == NULL)
1155 return ENOMEM;
1156
1157 int retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1158 k_cfg->layoutSpecificSize);
1159 if (retcode) {
1160 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1161 db1_printf(("%s: retcode=%d copyin.2\n", __func__, retcode));
1162 return retcode;
1163 }
1164
1165 k_cfg->layoutSpecific = specific_buf;
1166 return 0;
1167 }
1168
1169 static int
1170 rf_getConfiguration(struct raid_softc *rs, void *data, RF_Config_t **k_cfg)
1171 {
1172 RF_Config_t *u_cfg = *((RF_Config_t **) data);
1173
1174 if (rs->sc_r.valid) {
1175 /* There is a valid RAID set running on this unit! */
1176 printf("raid%d: Device already configured!\n", rs->sc_unit);
1177 return EINVAL;
1178 }
1179
1180 /* copy-in the configuration information */
1181 /* data points to a pointer to the configuration structure */
1182 *k_cfg = RF_Malloc(sizeof(**k_cfg));
1183 if (*k_cfg == NULL) {
1184 return ENOMEM;
1185 }
1186 int retcode = copyin(u_cfg, *k_cfg, sizeof(RF_Config_t));
1187 if (retcode == 0)
1188 return 0;
1189 RF_Free(*k_cfg, sizeof(RF_Config_t));
1190 db1_printf(("%s: retcode=%d copyin.1\n", __func__, retcode));
1191 rs->sc_flags |= RAIDF_SHUTDOWN;
1192 return retcode;
1193 }
1194
1195 int
1196 rf_construct(struct raid_softc *rs, RF_Config_t *k_cfg)
1197 {
1198 int retcode;
1199 RF_Raid_t *raidPtr = &rs->sc_r;
1200
1201 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1202
1203 if ((retcode = rf_copyinspecificbuf(k_cfg)) != 0)
1204 goto out;
1205
1206 /* should do some kind of sanity check on the configuration.
1207 * Store the sum of all the bytes in the last byte? */
1208
1209 /* configure the system */
1210
1211 /*
1212 * Clear the entire RAID descriptor, just to make sure
1213 * there is no stale data left in the case of a
1214 * reconfiguration
1215 */
1216 memset(raidPtr, 0, sizeof(*raidPtr));
1217 raidPtr->softc = rs;
1218 raidPtr->raidid = rs->sc_unit;
1219
1220 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1221
1222 if (retcode == 0) {
1223 /* allow this many simultaneous IO's to
1224 this RAID device */
1225 raidPtr->openings = RAIDOUTSTANDING;
1226
1227 raidinit(rs);
1228 raid_wakeup(raidPtr);
1229 rf_markalldirty(raidPtr);
1230 }
1231
1232 /* free the buffers. No return code here. */
1233 if (k_cfg->layoutSpecificSize) {
1234 RF_Free(k_cfg->layoutSpecific, k_cfg->layoutSpecificSize);
1235 }
1236 out:
1237 RF_Free(k_cfg, sizeof(RF_Config_t));
1238 if (retcode) {
1239 /*
1240 * If configuration failed, set sc_flags so that we
1241 * will detach the device when we close it.
1242 */
1243 rs->sc_flags |= RAIDF_SHUTDOWN;
1244 }
1245 return retcode;
1246 }
1247
1248 #if RF_DISABLED
1249 static int
1250 rf_set_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
1251 {
1252
1253 /* XXX check the label for valid stuff... */
1254 /* Note that some things *should not* get modified --
1255 the user should be re-initing the labels instead of
1256 trying to patch things.
1257 */
1258 #ifdef DEBUG
1259 int raidid = raidPtr->raidid;
1260 printf("raid%d: Got component label:\n", raidid);
1261 printf("raid%d: Version: %d\n", raidid, clabel->version);
1262 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1263 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1264 printf("raid%d: Column: %d\n", raidid, clabel->column);
1265 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1266 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1267 printf("raid%d: Status: %d\n", raidid, clabel->status);
1268 #endif /* DEBUG */
1269 clabel->row = 0;
1270 int column = clabel->column;
1271
1272 if ((column < 0) || (column >= raidPtr->numCol)) {
1273 return(EINVAL);
1274 }
1275
1276 /* XXX this isn't allowed to do anything for now :-) */
1277
1278 /* XXX and before it is, we need to fill in the rest
1279 of the fields!?!?!?! */
1280 memcpy(raidget_component_label(raidPtr, column),
1281 clabel, sizeof(*clabel));
1282 raidflush_component_label(raidPtr, column);
1283 return 0;
1284 }
1285 #endif
1286
1287 static int
1288 rf_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
1289 {
1290 /*
1291 we only want the serial number from
1292 the above. We get all the rest of the information
1293 from the config that was used to create this RAID
1294 set.
1295 */
1296
1297 raidPtr->serial_number = clabel->serial_number;
1298
1299 for (int column = 0; column < raidPtr->numCol; column++) {
1300 RF_RaidDisk_t *diskPtr = &raidPtr->Disks[column];
1301 if (RF_DEAD_DISK(diskPtr->status))
1302 continue;
1303 RF_ComponentLabel_t *ci_label = raidget_component_label(
1304 raidPtr, column);
1305 /* Zeroing this is important. */
1306 memset(ci_label, 0, sizeof(*ci_label));
1307 raid_init_component_label(raidPtr, ci_label);
1308 ci_label->serial_number = raidPtr->serial_number;
1309 ci_label->row = 0; /* we dont' pretend to support more */
1310 rf_component_label_set_partitionsize(ci_label,
1311 diskPtr->partitionSize);
1312 ci_label->column = column;
1313 raidflush_component_label(raidPtr, column);
1314 /* XXXjld what about the spares? */
1315 }
1316
1317 return 0;
1318 }
1319
1320 static int
1321 rf_rebuild_in_place(RF_Raid_t *raidPtr, RF_SingleComponent_t *componentPtr)
1322 {
1323
1324 if (raidPtr->Layout.map->faultsTolerated == 0) {
1325 /* Can't do this on a RAID 0!! */
1326 return EINVAL;
1327 }
1328
1329 if (raidPtr->recon_in_progress == 1) {
1330 /* a reconstruct is already in progress! */
1331 return EINVAL;
1332 }
1333
1334 RF_SingleComponent_t component;
1335 memcpy(&component, componentPtr, sizeof(RF_SingleComponent_t));
1336 component.row = 0; /* we don't support any more */
1337 int column = component.column;
1338
1339 if ((column < 0) || (column >= raidPtr->numCol)) {
1340 return EINVAL;
1341 }
1342
1343 rf_lock_mutex2(raidPtr->mutex);
1344 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1345 (raidPtr->numFailures > 0)) {
1346 /* XXX 0 above shouldn't be constant!!! */
1347 /* some component other than this has failed.
1348 Let's not make things worse than they already
1349 are... */
1350 printf("raid%d: Unable to reconstruct to disk at:\n",
1351 raidPtr->raidid);
1352 printf("raid%d: Col: %d Too many failures.\n",
1353 raidPtr->raidid, column);
1354 rf_unlock_mutex2(raidPtr->mutex);
1355 return EINVAL;
1356 }
1357
1358 if (raidPtr->Disks[column].status == rf_ds_reconstructing) {
1359 printf("raid%d: Unable to reconstruct to disk at:\n",
1360 raidPtr->raidid);
1361 printf("raid%d: Col: %d "
1362 "Reconstruction already occurring!\n",
1363 raidPtr->raidid, column);
1364
1365 rf_unlock_mutex2(raidPtr->mutex);
1366 return EINVAL;
1367 }
1368
1369 if (raidPtr->Disks[column].status == rf_ds_spared) {
1370 rf_unlock_mutex2(raidPtr->mutex);
1371 return EINVAL;
1372 }
1373
1374 rf_unlock_mutex2(raidPtr->mutex);
1375
1376 struct rf_recon_req_internal *rrint;
1377 rrint = RF_Malloc(sizeof(*rrint));
1378 if (rrint == NULL)
1379 return ENOMEM;
1380
1381 rrint->col = column;
1382 rrint->raidPtr = raidPtr;
1383
1384 return RF_CREATE_THREAD(raidPtr->recon_thread,
1385 rf_ReconstructInPlaceThread, rrint, "raid_reconip");
1386 }
1387
1388 static int
1389 rf_check_recon_status(RF_Raid_t *raidPtr, int *data)
1390 {
1391 /*
1392 * This makes no sense on a RAID 0, or if we are not reconstructing
1393 * so tell the user it's done.
1394 */
1395 if (raidPtr->Layout.map->faultsTolerated == 0 ||
1396 raidPtr->status != rf_rs_reconstructing) {
1397 *data = 100;
1398 return 0;
1399 }
1400 if (raidPtr->reconControl->numRUsTotal == 0) {
1401 *data = 0;
1402 return 0;
1403 }
1404 *data = (raidPtr->reconControl->numRUsComplete * 100
1405 / raidPtr->reconControl->numRUsTotal);
1406 return 0;
1407 }
1408
1409 static int
1410 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1411 {
1412 int unit = raidunit(dev);
1413 int part, pmask;
1414 struct raid_softc *rs;
1415 struct dk_softc *dksc;
1416 RF_Config_t *k_cfg;
1417 RF_Raid_t *raidPtr;
1418 RF_AccTotals_t *totals;
1419 RF_SingleComponent_t component;
1420 RF_DeviceConfig_t *d_cfg, *ucfgp;
1421 int retcode = 0;
1422 int column;
1423 RF_ComponentLabel_t *clabel;
1424 RF_SingleComponent_t *sparePtr,*componentPtr;
1425 int d;
1426
1427 if ((rs = raidget(unit, false)) == NULL)
1428 return ENXIO;
1429
1430 dksc = &rs->sc_dksc;
1431 raidPtr = &rs->sc_r;
1432
1433 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1434 (int) DISKPART(dev), (int) unit, cmd));
1435
1436 /* Must be initialized for these... */
1437 if (rf_must_be_initialized(rs, cmd))
1438 return ENXIO;
1439
1440 switch (cmd) {
1441 /* configure the system */
1442 case RAIDFRAME_CONFIGURE:
1443 if ((retcode = rf_getConfiguration(rs, data, &k_cfg)) != 0)
1444 return retcode;
1445 return rf_construct(rs, k_cfg);
1446
1447 /* shutdown the system */
1448 case RAIDFRAME_SHUTDOWN:
1449
1450 part = DISKPART(dev);
1451 pmask = (1 << part);
1452
1453 if ((retcode = raidlock(rs)) != 0)
1454 return retcode;
1455
1456 if (DK_BUSY(dksc, pmask) ||
1457 raidPtr->recon_in_progress != 0 ||
1458 raidPtr->parity_rewrite_in_progress != 0 ||
1459 raidPtr->copyback_in_progress != 0)
1460 retcode = EBUSY;
1461 else {
1462 /* detach and free on close */
1463 rs->sc_flags |= RAIDF_SHUTDOWN;
1464 retcode = 0;
1465 }
1466
1467 raidunlock(rs);
1468
1469 return retcode;
1470 case RAIDFRAME_GET_COMPONENT_LABEL:
1471 return rf_get_component_label(raidPtr, data);
1472
1473 #if RF_DISABLED
1474 case RAIDFRAME_SET_COMPONENT_LABEL:
1475 return rf_set_component_label(raidPtr, data);
1476 #endif
1477
1478 case RAIDFRAME_INIT_LABELS:
1479 return rf_init_component_label(raidPtr, data);
1480
1481 case RAIDFRAME_SET_AUTOCONFIG:
1482 d = rf_set_autoconfig(raidPtr, *(int *) data);
1483 printf("raid%d: New autoconfig value is: %d\n",
1484 raidPtr->raidid, d);
1485 *(int *) data = d;
1486 return retcode;
1487
1488 case RAIDFRAME_SET_ROOT:
1489 d = rf_set_rootpartition(raidPtr, *(int *) data);
1490 printf("raid%d: New rootpartition value is: %d\n",
1491 raidPtr->raidid, d);
1492 *(int *) data = d;
1493 return retcode;
1494
1495 /* initialize all parity */
1496 case RAIDFRAME_REWRITEPARITY:
1497
1498 if (raidPtr->Layout.map->faultsTolerated == 0) {
1499 /* Parity for RAID 0 is trivially correct */
1500 raidPtr->parity_good = RF_RAID_CLEAN;
1501 return 0;
1502 }
1503
1504 if (raidPtr->parity_rewrite_in_progress == 1) {
1505 /* Re-write is already in progress! */
1506 return EINVAL;
1507 }
1508
1509 return RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1510 rf_RewriteParityThread, raidPtr,"raid_parity");
1511
1512 case RAIDFRAME_ADD_HOT_SPARE:
1513 sparePtr = (RF_SingleComponent_t *) data;
1514 memcpy(&component, sparePtr, sizeof(RF_SingleComponent_t));
1515 return rf_add_hot_spare(raidPtr, &component);
1516
1517 case RAIDFRAME_REMOVE_HOT_SPARE:
1518 return retcode;
1519
1520 case RAIDFRAME_DELETE_COMPONENT:
1521 componentPtr = (RF_SingleComponent_t *)data;
1522 memcpy(&component, componentPtr, sizeof(RF_SingleComponent_t));
1523 return rf_delete_component(raidPtr, &component);
1524
1525 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1526 componentPtr = (RF_SingleComponent_t *)data;
1527 memcpy(&component, componentPtr, sizeof(RF_SingleComponent_t));
1528 return rf_incorporate_hot_spare(raidPtr, &component);
1529
1530 case RAIDFRAME_REBUILD_IN_PLACE:
1531 return rf_rebuild_in_place(raidPtr, data);
1532
1533 case RAIDFRAME_GET_INFO:
1534 ucfgp = *(RF_DeviceConfig_t **)data;
1535 d_cfg = RF_Malloc(sizeof(*d_cfg));
1536 if (d_cfg == NULL)
1537 return ENOMEM;
1538 retcode = rf_get_info(raidPtr, d_cfg);
1539 if (retcode == 0) {
1540 retcode = copyout(d_cfg, ucfgp, sizeof(*d_cfg));
1541 }
1542 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1543 return retcode;
1544
1545 case RAIDFRAME_CHECK_PARITY:
1546 *(int *) data = raidPtr->parity_good;
1547 return 0;
1548
1549 case RAIDFRAME_PARITYMAP_STATUS:
1550 if (rf_paritymap_ineligible(raidPtr))
1551 return EINVAL;
1552 rf_paritymap_status(raidPtr->parity_map, data);
1553 return 0;
1554
1555 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1556 if (rf_paritymap_ineligible(raidPtr))
1557 return EINVAL;
1558 if (raidPtr->parity_map == NULL)
1559 return ENOENT; /* ??? */
1560 if (rf_paritymap_set_params(raidPtr->parity_map, data, 1) != 0)
1561 return EINVAL;
1562 return 0;
1563
1564 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1565 if (rf_paritymap_ineligible(raidPtr))
1566 return EINVAL;
1567 *(int *) data = rf_paritymap_get_disable(raidPtr);
1568 return 0;
1569
1570 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1571 if (rf_paritymap_ineligible(raidPtr))
1572 return EINVAL;
1573 rf_paritymap_set_disable(raidPtr, *(int *)data);
1574 /* XXX should errors be passed up? */
1575 return 0;
1576
1577 case RAIDFRAME_RESET_ACCTOTALS:
1578 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1579 return 0;
1580
1581 case RAIDFRAME_GET_ACCTOTALS:
1582 totals = (RF_AccTotals_t *) data;
1583 *totals = raidPtr->acc_totals;
1584 return 0;
1585
1586 case RAIDFRAME_KEEP_ACCTOTALS:
1587 raidPtr->keep_acc_totals = *(int *)data;
1588 return 0;
1589
1590 case RAIDFRAME_GET_SIZE:
1591 *(int *) data = raidPtr->totalSectors;
1592 return 0;
1593
1594 case RAIDFRAME_FAIL_DISK:
1595 return rf_fail_disk(raidPtr, data);
1596
1597 /* invoke a copyback operation after recon on whatever disk
1598 * needs it, if any */
1599 case RAIDFRAME_COPYBACK:
1600
1601 if (raidPtr->Layout.map->faultsTolerated == 0) {
1602 /* This makes no sense on a RAID 0!! */
1603 return EINVAL;
1604 }
1605
1606 if (raidPtr->copyback_in_progress == 1) {
1607 /* Copyback is already in progress! */
1608 return EINVAL;
1609 }
1610
1611 return RF_CREATE_THREAD(raidPtr->copyback_thread,
1612 rf_CopybackThread, raidPtr, "raid_copyback");
1613
1614 /* return the percentage completion of reconstruction */
1615 case RAIDFRAME_CHECK_RECON_STATUS:
1616 return rf_check_recon_status(raidPtr, data);
1617
1618 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1619 rf_check_recon_status_ext(raidPtr, data);
1620 return 0;
1621
1622 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1623 if (raidPtr->Layout.map->faultsTolerated == 0) {
1624 /* This makes no sense on a RAID 0, so tell the
1625 user it's done. */
1626 *(int *) data = 100;
1627 return 0;
1628 }
1629 if (raidPtr->parity_rewrite_in_progress == 1) {
1630 *(int *) data = 100 *
1631 raidPtr->parity_rewrite_stripes_done /
1632 raidPtr->Layout.numStripe;
1633 } else {
1634 *(int *) data = 100;
1635 }
1636 return 0;
1637
1638 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1639 rf_check_parityrewrite_status_ext(raidPtr, data);
1640 return 0;
1641
1642 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1643 if (raidPtr->Layout.map->faultsTolerated == 0) {
1644 /* This makes no sense on a RAID 0 */
1645 *(int *) data = 100;
1646 return 0;
1647 }
1648 if (raidPtr->copyback_in_progress == 1) {
1649 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1650 raidPtr->Layout.numStripe;
1651 } else {
1652 *(int *) data = 100;
1653 }
1654 return 0;
1655
1656 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1657 rf_check_copyback_status_ext(raidPtr, data);
1658 return 0;
1659
1660 case RAIDFRAME_SET_LAST_UNIT:
1661 for (column = 0; column < raidPtr->numCol; column++)
1662 if (raidPtr->Disks[column].status != rf_ds_optimal)
1663 return EBUSY;
1664
1665 for (column = 0; column < raidPtr->numCol; column++) {
1666 clabel = raidget_component_label(raidPtr, column);
1667 clabel->last_unit = *(int *)data;
1668 raidflush_component_label(raidPtr, column);
1669 }
1670 rs->sc_cflags |= RAIDF_UNIT_CHANGED;
1671 return 0;
1672
1673 /* the sparetable daemon calls this to wait for the kernel to
1674 * need a spare table. this ioctl does not return until a
1675 * spare table is needed. XXX -- calling mpsleep here in the
1676 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1677 * -- I should either compute the spare table in the kernel,
1678 * or have a different -- XXX XXX -- interface (a different
1679 * character device) for delivering the table -- XXX */
1680 #if RF_DISABLED
1681 case RAIDFRAME_SPARET_WAIT:
1682 rf_lock_mutex2(rf_sparet_wait_mutex);
1683 while (!rf_sparet_wait_queue)
1684 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1685 RF_SparetWait_t *waitreq = rf_sparet_wait_queue;
1686 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1687 rf_unlock_mutex2(rf_sparet_wait_mutex);
1688
1689 /* structure assignment */
1690 *((RF_SparetWait_t *) data) = *waitreq;
1691
1692 RF_Free(waitreq, sizeof(*waitreq));
1693 return 0;
1694
1695 /* wakes up a process waiting on SPARET_WAIT and puts an error
1696 * code in it that will cause the dameon to exit */
1697 case RAIDFRAME_ABORT_SPARET_WAIT:
1698 waitreq = RF_Malloc(sizeof(*waitreq));
1699 waitreq->fcol = -1;
1700 rf_lock_mutex2(rf_sparet_wait_mutex);
1701 waitreq->next = rf_sparet_wait_queue;
1702 rf_sparet_wait_queue = waitreq;
1703 rf_broadcast_cond2(rf_sparet_wait_cv);
1704 rf_unlock_mutex2(rf_sparet_wait_mutex);
1705 return 0;
1706
1707 /* used by the spare table daemon to deliver a spare table
1708 * into the kernel */
1709 case RAIDFRAME_SEND_SPARET:
1710
1711 /* install the spare table */
1712 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1713
1714 /* respond to the requestor. the return status of the spare
1715 * table installation is passed in the "fcol" field */
1716 waitred = RF_Malloc(sizeof(*waitreq));
1717 waitreq->fcol = retcode;
1718 rf_lock_mutex2(rf_sparet_wait_mutex);
1719 waitreq->next = rf_sparet_resp_queue;
1720 rf_sparet_resp_queue = waitreq;
1721 rf_broadcast_cond2(rf_sparet_resp_cv);
1722 rf_unlock_mutex2(rf_sparet_wait_mutex);
1723
1724 return retcode;
1725 #endif
1726 default:
1727 /*
1728 * Don't bother trying to load compat modules
1729 * if it is not our ioctl. This is more efficient
1730 * and makes rump tests not depend on compat code
1731 */
1732 if (IOCGROUP(cmd) != 'r')
1733 break;
1734 #ifdef _LP64
1735 if ((l->l_proc->p_flag & PK_32) != 0) {
1736 module_autoload("compat_netbsd32_raid",
1737 MODULE_CLASS_EXEC);
1738 MODULE_HOOK_CALL(raidframe_netbsd32_ioctl_hook,
1739 (rs, cmd, data), enosys(), retcode);
1740 if (retcode != EPASSTHROUGH)
1741 return retcode;
1742 }
1743 #endif
1744 module_autoload("compat_raid_80", MODULE_CLASS_EXEC);
1745 MODULE_HOOK_CALL(raidframe_ioctl_80_hook,
1746 (rs, cmd, data), enosys(), retcode);
1747 if (retcode != EPASSTHROUGH)
1748 return retcode;
1749
1750 module_autoload("compat_raid_50", MODULE_CLASS_EXEC);
1751 MODULE_HOOK_CALL(raidframe_ioctl_50_hook,
1752 (rs, cmd, data), enosys(), retcode);
1753 if (retcode != EPASSTHROUGH)
1754 return retcode;
1755 break; /* fall through to the os-specific code below */
1756
1757 }
1758
1759 if (!raidPtr->valid)
1760 return EINVAL;
1761
1762 /*
1763 * Add support for "regular" device ioctls here.
1764 */
1765
1766 switch (cmd) {
1767 case DIOCGCACHE:
1768 retcode = rf_get_component_caches(raidPtr, (int *)data);
1769 break;
1770
1771 case DIOCCACHESYNC:
1772 retcode = rf_sync_component_caches(raidPtr, *(int *)data);
1773 break;
1774
1775 default:
1776 retcode = dk_ioctl(dksc, dev, cmd, data, flag, l);
1777 break;
1778 }
1779
1780 return retcode;
1781
1782 }
1783
1784
1785 /* raidinit -- complete the rest of the initialization for the
1786 RAIDframe device. */
1787
1788
1789 static void
1790 raidinit(struct raid_softc *rs)
1791 {
1792 cfdata_t cf;
1793 unsigned int unit;
1794 struct dk_softc *dksc = &rs->sc_dksc;
1795 RF_Raid_t *raidPtr = &rs->sc_r;
1796 device_t dev;
1797
1798 unit = raidPtr->raidid;
1799
1800 /* XXX doesn't check bounds. */
1801 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%u", unit);
1802
1803 /* attach the pseudo device */
1804 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1805 cf->cf_name = raid_cd.cd_name;
1806 cf->cf_atname = raid_cd.cd_name;
1807 cf->cf_unit = unit;
1808 cf->cf_fstate = FSTATE_STAR;
1809
1810 dev = config_attach_pseudo(cf);
1811 if (dev == NULL) {
1812 printf("raid%d: config_attach_pseudo failed\n",
1813 raidPtr->raidid);
1814 free(cf, M_RAIDFRAME);
1815 return;
1816 }
1817
1818 /* provide a backpointer to the real softc */
1819 raidsoftc(dev) = rs;
1820
1821 /* disk_attach actually creates space for the CPU disklabel, among
1822 * other things, so it's critical to call this *BEFORE* we try putzing
1823 * with disklabels. */
1824 dk_init(dksc, dev, DKTYPE_RAID);
1825 disk_init(&dksc->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1826
1827 /* XXX There may be a weird interaction here between this, and
1828 * protectedSectors, as used in RAIDframe. */
1829
1830 rs->sc_size = raidPtr->totalSectors;
1831
1832 /* Attach dk and disk subsystems */
1833 dk_attach(dksc);
1834 disk_attach(&dksc->sc_dkdev);
1835 rf_set_geometry(rs, raidPtr);
1836
1837 bufq_alloc(&dksc->sc_bufq, "fcfs", BUFQ_SORT_RAWBLOCK);
1838
1839 /* mark unit as usuable */
1840 rs->sc_flags |= RAIDF_INITED;
1841
1842 dkwedge_discover(&dksc->sc_dkdev);
1843 }
1844
1845 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1846 /* wake up the daemon & tell it to get us a spare table
1847 * XXX
1848 * the entries in the queues should be tagged with the raidPtr
1849 * so that in the extremely rare case that two recons happen at once,
1850 * we know for which device were requesting a spare table
1851 * XXX
1852 *
1853 * XXX This code is not currently used. GO
1854 */
1855 int
1856 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1857 {
1858 int retcode;
1859
1860 rf_lock_mutex2(rf_sparet_wait_mutex);
1861 req->next = rf_sparet_wait_queue;
1862 rf_sparet_wait_queue = req;
1863 rf_broadcast_cond2(rf_sparet_wait_cv);
1864
1865 /* mpsleep unlocks the mutex */
1866 while (!rf_sparet_resp_queue) {
1867 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
1868 }
1869 req = rf_sparet_resp_queue;
1870 rf_sparet_resp_queue = req->next;
1871 rf_unlock_mutex2(rf_sparet_wait_mutex);
1872
1873 retcode = req->fcol;
1874 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1875 * alloc'd */
1876 return retcode;
1877 }
1878 #endif
1879
1880 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1881 * bp & passes it down.
1882 * any calls originating in the kernel must use non-blocking I/O
1883 * do some extra sanity checking to return "appropriate" error values for
1884 * certain conditions (to make some standard utilities work)
1885 *
1886 * Formerly known as: rf_DoAccessKernel
1887 */
1888 void
1889 raidstart(RF_Raid_t *raidPtr)
1890 {
1891 struct raid_softc *rs;
1892 struct dk_softc *dksc;
1893
1894 rs = raidPtr->softc;
1895 dksc = &rs->sc_dksc;
1896 /* quick check to see if anything has died recently */
1897 rf_lock_mutex2(raidPtr->mutex);
1898 if (raidPtr->numNewFailures > 0) {
1899 rf_unlock_mutex2(raidPtr->mutex);
1900 rf_update_component_labels(raidPtr,
1901 RF_NORMAL_COMPONENT_UPDATE);
1902 rf_lock_mutex2(raidPtr->mutex);
1903 raidPtr->numNewFailures--;
1904 }
1905 rf_unlock_mutex2(raidPtr->mutex);
1906
1907 if ((rs->sc_flags & RAIDF_INITED) == 0) {
1908 printf("raid%d: raidstart not ready\n", raidPtr->raidid);
1909 return;
1910 }
1911
1912 dk_start(dksc, NULL);
1913 }
1914
1915 static int
1916 raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp)
1917 {
1918 RF_SectorCount_t num_blocks, pb, sum;
1919 RF_RaidAddr_t raid_addr;
1920 daddr_t blocknum;
1921 int rc;
1922
1923 rf_lock_mutex2(raidPtr->mutex);
1924 if (raidPtr->openings == 0) {
1925 rf_unlock_mutex2(raidPtr->mutex);
1926 return EAGAIN;
1927 }
1928 rf_unlock_mutex2(raidPtr->mutex);
1929
1930 blocknum = bp->b_rawblkno;
1931
1932 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1933 (int) blocknum));
1934
1935 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1936 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1937
1938 /* *THIS* is where we adjust what block we're going to...
1939 * but DO NOT TOUCH bp->b_blkno!!! */
1940 raid_addr = blocknum;
1941
1942 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1943 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1944 sum = raid_addr + num_blocks + pb;
1945 if (1 || rf_debugKernelAccess) {
1946 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1947 (int) raid_addr, (int) sum, (int) num_blocks,
1948 (int) pb, (int) bp->b_resid));
1949 }
1950 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1951 || (sum < num_blocks) || (sum < pb)) {
1952 rc = ENOSPC;
1953 goto done;
1954 }
1955 /*
1956 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1957 */
1958
1959 if (bp->b_bcount & raidPtr->sectorMask) {
1960 rc = ENOSPC;
1961 goto done;
1962 }
1963 db1_printf(("Calling DoAccess..\n"));
1964
1965
1966 rf_lock_mutex2(raidPtr->mutex);
1967 raidPtr->openings--;
1968 rf_unlock_mutex2(raidPtr->mutex);
1969
1970 /* don't ever condition on bp->b_flags & B_WRITE.
1971 * always condition on B_READ instead */
1972
1973 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1974 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1975 raid_addr, num_blocks,
1976 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1977
1978 done:
1979 return rc;
1980 }
1981
1982 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1983
1984 int
1985 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
1986 {
1987 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1988 struct buf *bp;
1989
1990 req->queue = queue;
1991 bp = req->bp;
1992
1993 switch (req->type) {
1994 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1995 /* XXX need to do something extra here.. */
1996 /* I'm leaving this in, as I've never actually seen it used,
1997 * and I'd like folks to report it... GO */
1998 printf("%s: WAKEUP CALLED\n", __func__);
1999 queue->numOutstanding++;
2000
2001 bp->b_flags = 0;
2002 bp->b_private = req;
2003
2004 KernelWakeupFunc(bp);
2005 break;
2006
2007 case RF_IO_TYPE_READ:
2008 case RF_IO_TYPE_WRITE:
2009 #if RF_ACC_TRACE > 0
2010 if (req->tracerec) {
2011 RF_ETIMER_START(req->tracerec->timer);
2012 }
2013 #endif
2014 InitBP(bp, queue->rf_cinfo->ci_vp,
2015 op, queue->rf_cinfo->ci_dev,
2016 req->sectorOffset, req->numSector,
2017 req->buf, KernelWakeupFunc, (void *) req,
2018 queue->raidPtr->logBytesPerSector);
2019
2020 if (rf_debugKernelAccess) {
2021 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2022 (long) bp->b_blkno));
2023 }
2024 queue->numOutstanding++;
2025 queue->last_deq_sector = req->sectorOffset;
2026 /* acc wouldn't have been let in if there were any pending
2027 * reqs at any other priority */
2028 queue->curPriority = req->priority;
2029
2030 db1_printf(("Going for %c to unit %d col %d\n",
2031 req->type, queue->raidPtr->raidid,
2032 queue->col));
2033 db1_printf(("sector %d count %d (%d bytes) %d\n",
2034 (int) req->sectorOffset, (int) req->numSector,
2035 (int) (req->numSector <<
2036 queue->raidPtr->logBytesPerSector),
2037 (int) queue->raidPtr->logBytesPerSector));
2038
2039 /*
2040 * XXX: drop lock here since this can block at
2041 * least with backing SCSI devices. Retake it
2042 * to minimize fuss with calling interfaces.
2043 */
2044
2045 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2046 bdev_strategy(bp);
2047 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2048 break;
2049
2050 default:
2051 panic("bad req->type in rf_DispatchKernelIO");
2052 }
2053 db1_printf(("Exiting from DispatchKernelIO\n"));
2054
2055 return 0;
2056 }
2057 /* this is the callback function associated with a I/O invoked from
2058 kernel code.
2059 */
2060 static void
2061 KernelWakeupFunc(struct buf *bp)
2062 {
2063 RF_DiskQueueData_t *req = NULL;
2064 RF_DiskQueue_t *queue;
2065
2066 db1_printf(("recovering the request queue:\n"));
2067
2068 req = bp->b_private;
2069
2070 queue = (RF_DiskQueue_t *) req->queue;
2071
2072 rf_lock_mutex2(queue->raidPtr->iodone_lock);
2073
2074 #if RF_ACC_TRACE > 0
2075 if (req->tracerec) {
2076 RF_ETIMER_STOP(req->tracerec->timer);
2077 RF_ETIMER_EVAL(req->tracerec->timer);
2078 rf_lock_mutex2(rf_tracing_mutex);
2079 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2080 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2081 req->tracerec->num_phys_ios++;
2082 rf_unlock_mutex2(rf_tracing_mutex);
2083 }
2084 #endif
2085
2086 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2087 * ballistic, and mark the component as hosed... */
2088
2089 if (bp->b_error != 0) {
2090 /* Mark the disk as dead */
2091 /* but only mark it once... */
2092 /* and only if it wouldn't leave this RAID set
2093 completely broken */
2094 if (((queue->raidPtr->Disks[queue->col].status ==
2095 rf_ds_optimal) ||
2096 (queue->raidPtr->Disks[queue->col].status ==
2097 rf_ds_used_spare)) &&
2098 (queue->raidPtr->numFailures <
2099 queue->raidPtr->Layout.map->faultsTolerated)) {
2100 printf("raid%d: IO Error (%d). Marking %s as failed.\n",
2101 queue->raidPtr->raidid,
2102 bp->b_error,
2103 queue->raidPtr->Disks[queue->col].devname);
2104 queue->raidPtr->Disks[queue->col].status =
2105 rf_ds_failed;
2106 queue->raidPtr->status = rf_rs_degraded;
2107 queue->raidPtr->numFailures++;
2108 queue->raidPtr->numNewFailures++;
2109 } else { /* Disk is already dead... */
2110 /* printf("Disk already marked as dead!\n"); */
2111 }
2112
2113 }
2114
2115 /* Fill in the error value */
2116 req->error = bp->b_error;
2117
2118 /* Drop this one on the "finished" queue... */
2119 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2120
2121 /* Let the raidio thread know there is work to be done. */
2122 rf_signal_cond2(queue->raidPtr->iodone_cv);
2123
2124 rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2125 }
2126
2127
2128 /*
2129 * initialize a buf structure for doing an I/O in the kernel.
2130 */
2131 static void
2132 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2133 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2134 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector)
2135 {
2136 bp->b_flags = rw_flag | (bp->b_flags & rf_b_pass);
2137 bp->b_oflags = 0;
2138 bp->b_cflags = 0;
2139 bp->b_bcount = numSect << logBytesPerSector;
2140 bp->b_bufsize = bp->b_bcount;
2141 bp->b_error = 0;
2142 bp->b_dev = dev;
2143 bp->b_data = bf;
2144 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2145 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2146 if (bp->b_bcount == 0) {
2147 panic("bp->b_bcount is zero in InitBP!!");
2148 }
2149 bp->b_iodone = cbFunc;
2150 bp->b_private = cbArg;
2151 }
2152
2153 /*
2154 * Wait interruptibly for an exclusive lock.
2155 *
2156 * XXX
2157 * Several drivers do this; it should be abstracted and made MP-safe.
2158 * (Hmm... where have we seen this warning before :-> GO )
2159 */
2160 static int
2161 raidlock(struct raid_softc *rs)
2162 {
2163 int error;
2164
2165 error = 0;
2166 mutex_enter(&rs->sc_mutex);
2167 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2168 rs->sc_flags |= RAIDF_WANTED;
2169 error = cv_wait_sig(&rs->sc_cv, &rs->sc_mutex);
2170 if (error != 0)
2171 goto done;
2172 }
2173 rs->sc_flags |= RAIDF_LOCKED;
2174 done:
2175 mutex_exit(&rs->sc_mutex);
2176 return error;
2177 }
2178 /*
2179 * Unlock and wake up any waiters.
2180 */
2181 static void
2182 raidunlock(struct raid_softc *rs)
2183 {
2184
2185 mutex_enter(&rs->sc_mutex);
2186 rs->sc_flags &= ~RAIDF_LOCKED;
2187 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2188 rs->sc_flags &= ~RAIDF_WANTED;
2189 cv_broadcast(&rs->sc_cv);
2190 }
2191 mutex_exit(&rs->sc_mutex);
2192 }
2193
2194
2195 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2196 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2197 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2198
2199 static daddr_t
2200 rf_component_info_offset(void)
2201 {
2202
2203 return RF_COMPONENT_INFO_OFFSET;
2204 }
2205
2206 static daddr_t
2207 rf_component_info_size(unsigned secsize)
2208 {
2209 daddr_t info_size;
2210
2211 KASSERT(secsize);
2212 if (secsize > RF_COMPONENT_INFO_SIZE)
2213 info_size = secsize;
2214 else
2215 info_size = RF_COMPONENT_INFO_SIZE;
2216
2217 return info_size;
2218 }
2219
2220 static daddr_t
2221 rf_parity_map_offset(RF_Raid_t *raidPtr)
2222 {
2223 daddr_t map_offset;
2224
2225 KASSERT(raidPtr->bytesPerSector);
2226 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2227 map_offset = raidPtr->bytesPerSector;
2228 else
2229 map_offset = RF_COMPONENT_INFO_SIZE;
2230 map_offset += rf_component_info_offset();
2231
2232 return map_offset;
2233 }
2234
2235 static daddr_t
2236 rf_parity_map_size(RF_Raid_t *raidPtr)
2237 {
2238 daddr_t map_size;
2239
2240 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2241 map_size = raidPtr->bytesPerSector;
2242 else
2243 map_size = RF_PARITY_MAP_SIZE;
2244
2245 return map_size;
2246 }
2247
2248 int
2249 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2250 {
2251 RF_ComponentLabel_t *clabel;
2252
2253 clabel = raidget_component_label(raidPtr, col);
2254 clabel->clean = RF_RAID_CLEAN;
2255 raidflush_component_label(raidPtr, col);
2256 return(0);
2257 }
2258
2259
2260 int
2261 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2262 {
2263 RF_ComponentLabel_t *clabel;
2264
2265 clabel = raidget_component_label(raidPtr, col);
2266 clabel->clean = RF_RAID_DIRTY;
2267 raidflush_component_label(raidPtr, col);
2268 return(0);
2269 }
2270
2271 int
2272 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2273 {
2274 KASSERT(raidPtr->bytesPerSector);
2275
2276 return raidread_component_label(raidPtr->bytesPerSector,
2277 raidPtr->Disks[col].dev,
2278 raidPtr->raid_cinfo[col].ci_vp,
2279 &raidPtr->raid_cinfo[col].ci_label);
2280 }
2281
2282 RF_ComponentLabel_t *
2283 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2284 {
2285 return &raidPtr->raid_cinfo[col].ci_label;
2286 }
2287
2288 int
2289 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2290 {
2291 RF_ComponentLabel_t *label;
2292
2293 label = &raidPtr->raid_cinfo[col].ci_label;
2294 label->mod_counter = raidPtr->mod_counter;
2295 #ifndef RF_NO_PARITY_MAP
2296 label->parity_map_modcount = label->mod_counter;
2297 #endif
2298 return raidwrite_component_label(raidPtr->bytesPerSector,
2299 raidPtr->Disks[col].dev,
2300 raidPtr->raid_cinfo[col].ci_vp, label);
2301 }
2302
2303 /*
2304 * Swap the label endianness.
2305 *
2306 * Everything in the component label is 4-byte-swapped except the version,
2307 * which is kept in the byte-swapped version at all times, and indicates
2308 * for the writer that a swap is necessary.
2309 *
2310 * For reads it is expected that out_label == clabel, but writes expect
2311 * separate labels so only the re-swapped label is written out to disk,
2312 * leaving the swapped-except-version internally.
2313 *
2314 * Only support swapping label version 2.
2315 */
2316 static void
2317 rf_swap_label(RF_ComponentLabel_t *clabel, RF_ComponentLabel_t *out_label)
2318 {
2319 int *in, *out, *in_last;
2320
2321 KASSERT(clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION));
2322
2323 /* Don't swap the label, but do copy it. */
2324 out_label->version = clabel->version;
2325
2326 in = &clabel->serial_number;
2327 in_last = &clabel->future_use2[42];
2328 out = &out_label->serial_number;
2329
2330 for (; in < in_last; in++, out++)
2331 *out = bswap32(*in);
2332 }
2333
2334 static int
2335 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2336 RF_ComponentLabel_t *clabel)
2337 {
2338 int error;
2339
2340 error = raidread_component_area(dev, b_vp, clabel,
2341 sizeof(RF_ComponentLabel_t),
2342 rf_component_info_offset(),
2343 rf_component_info_size(secsize));
2344
2345 if (error == 0 &&
2346 clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) {
2347 rf_swap_label(clabel, clabel);
2348 }
2349
2350 return error;
2351 }
2352
2353 /* ARGSUSED */
2354 static int
2355 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2356 size_t msize, daddr_t offset, daddr_t dsize)
2357 {
2358 struct buf *bp;
2359 int error;
2360
2361 /* XXX should probably ensure that we don't try to do this if
2362 someone has changed rf_protected_sectors. */
2363
2364 if (b_vp == NULL) {
2365 /* For whatever reason, this component is not valid.
2366 Don't try to read a component label from it. */
2367 return(EINVAL);
2368 }
2369
2370 /* get a block of the appropriate size... */
2371 bp = geteblk((int)dsize);
2372 bp->b_dev = dev;
2373
2374 /* get our ducks in a row for the read */
2375 bp->b_blkno = offset / DEV_BSIZE;
2376 bp->b_bcount = dsize;
2377 bp->b_flags |= B_READ;
2378 bp->b_resid = dsize;
2379
2380 bdev_strategy(bp);
2381 error = biowait(bp);
2382
2383 if (!error) {
2384 memcpy(data, bp->b_data, msize);
2385 }
2386
2387 brelse(bp, 0);
2388 return(error);
2389 }
2390
2391 static int
2392 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2393 RF_ComponentLabel_t *clabel)
2394 {
2395 RF_ComponentLabel_t *clabel_write = clabel;
2396 RF_ComponentLabel_t lclabel;
2397 int error;
2398
2399 if (clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) {
2400 clabel_write = &lclabel;
2401 rf_swap_label(clabel, clabel_write);
2402 }
2403 error = raidwrite_component_area(dev, b_vp, clabel_write,
2404 sizeof(RF_ComponentLabel_t),
2405 rf_component_info_offset(),
2406 rf_component_info_size(secsize), 0);
2407
2408 return error;
2409 }
2410
2411 /* ARGSUSED */
2412 static int
2413 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2414 size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2415 {
2416 struct buf *bp;
2417 int error;
2418
2419 /* get a block of the appropriate size... */
2420 bp = geteblk((int)dsize);
2421 bp->b_dev = dev;
2422
2423 /* get our ducks in a row for the write */
2424 bp->b_blkno = offset / DEV_BSIZE;
2425 bp->b_bcount = dsize;
2426 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2427 bp->b_resid = dsize;
2428
2429 memset(bp->b_data, 0, dsize);
2430 memcpy(bp->b_data, data, msize);
2431
2432 bdev_strategy(bp);
2433 if (asyncp)
2434 return 0;
2435 error = biowait(bp);
2436 brelse(bp, 0);
2437 if (error) {
2438 #if 1
2439 printf("Failed to write RAID component info!\n");
2440 #endif
2441 }
2442
2443 return(error);
2444 }
2445
2446 void
2447 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2448 {
2449 int c;
2450
2451 for (c = 0; c < raidPtr->numCol; c++) {
2452 /* Skip dead disks. */
2453 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2454 continue;
2455 /* XXXjld: what if an error occurs here? */
2456 raidwrite_component_area(raidPtr->Disks[c].dev,
2457 raidPtr->raid_cinfo[c].ci_vp, map,
2458 RF_PARITYMAP_NBYTE,
2459 rf_parity_map_offset(raidPtr),
2460 rf_parity_map_size(raidPtr), 0);
2461 }
2462 }
2463
2464 void
2465 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2466 {
2467 struct rf_paritymap_ondisk tmp;
2468 int c,first;
2469
2470 first=1;
2471 for (c = 0; c < raidPtr->numCol; c++) {
2472 /* Skip dead disks. */
2473 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2474 continue;
2475 raidread_component_area(raidPtr->Disks[c].dev,
2476 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2477 RF_PARITYMAP_NBYTE,
2478 rf_parity_map_offset(raidPtr),
2479 rf_parity_map_size(raidPtr));
2480 if (first) {
2481 memcpy(map, &tmp, sizeof(*map));
2482 first = 0;
2483 } else {
2484 rf_paritymap_merge(map, &tmp);
2485 }
2486 }
2487 }
2488
2489 void
2490 rf_markalldirty(RF_Raid_t *raidPtr)
2491 {
2492 RF_ComponentLabel_t *clabel;
2493 int sparecol;
2494 int c;
2495 int j;
2496 int scol = -1;
2497
2498 raidPtr->mod_counter++;
2499 for (c = 0; c < raidPtr->numCol; c++) {
2500 /* we don't want to touch (at all) a disk that has
2501 failed */
2502 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2503 clabel = raidget_component_label(raidPtr, c);
2504 if (clabel->status == rf_ds_spared) {
2505 /* XXX do something special...
2506 but whatever you do, don't
2507 try to access it!! */
2508 } else {
2509 raidmarkdirty(raidPtr, c);
2510 }
2511 }
2512 }
2513
2514 for( c = 0; c < raidPtr->numSpare ; c++) {
2515 sparecol = raidPtr->numCol + c;
2516 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2517 /*
2518
2519 we claim this disk is "optimal" if it's
2520 rf_ds_used_spare, as that means it should be
2521 directly substitutable for the disk it replaced.
2522 We note that too...
2523
2524 */
2525
2526 for(j=0;j<raidPtr->numCol;j++) {
2527 if (raidPtr->Disks[j].spareCol == sparecol) {
2528 scol = j;
2529 break;
2530 }
2531 }
2532
2533 clabel = raidget_component_label(raidPtr, sparecol);
2534 /* make sure status is noted */
2535
2536 raid_init_component_label(raidPtr, clabel);
2537
2538 clabel->row = 0;
2539 clabel->column = scol;
2540 /* Note: we *don't* change status from rf_ds_used_spare
2541 to rf_ds_optimal */
2542 /* clabel.status = rf_ds_optimal; */
2543
2544 raidmarkdirty(raidPtr, sparecol);
2545 }
2546 }
2547 }
2548
2549
2550 void
2551 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2552 {
2553 RF_ComponentLabel_t *clabel;
2554 int sparecol;
2555 int c;
2556 int j;
2557 int scol;
2558 struct raid_softc *rs = raidPtr->softc;
2559
2560 scol = -1;
2561
2562 /* XXX should do extra checks to make sure things really are clean,
2563 rather than blindly setting the clean bit... */
2564
2565 raidPtr->mod_counter++;
2566
2567 for (c = 0; c < raidPtr->numCol; c++) {
2568 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2569 clabel = raidget_component_label(raidPtr, c);
2570 /* make sure status is noted */
2571 clabel->status = rf_ds_optimal;
2572
2573 /* note what unit we are configured as */
2574 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2575 clabel->last_unit = raidPtr->raidid;
2576
2577 raidflush_component_label(raidPtr, c);
2578 if (final == RF_FINAL_COMPONENT_UPDATE) {
2579 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2580 raidmarkclean(raidPtr, c);
2581 }
2582 }
2583 }
2584 /* else we don't touch it.. */
2585 }
2586
2587 for( c = 0; c < raidPtr->numSpare ; c++) {
2588 sparecol = raidPtr->numCol + c;
2589 /* Need to ensure that the reconstruct actually completed! */
2590 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2591 /*
2592
2593 we claim this disk is "optimal" if it's
2594 rf_ds_used_spare, as that means it should be
2595 directly substitutable for the disk it replaced.
2596 We note that too...
2597
2598 */
2599
2600 for(j=0;j<raidPtr->numCol;j++) {
2601 if (raidPtr->Disks[j].spareCol == sparecol) {
2602 scol = j;
2603 break;
2604 }
2605 }
2606
2607 /* XXX shouldn't *really* need this... */
2608 clabel = raidget_component_label(raidPtr, sparecol);
2609 /* make sure status is noted */
2610
2611 raid_init_component_label(raidPtr, clabel);
2612
2613 clabel->column = scol;
2614 clabel->status = rf_ds_optimal;
2615 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2616 clabel->last_unit = raidPtr->raidid;
2617
2618 raidflush_component_label(raidPtr, sparecol);
2619 if (final == RF_FINAL_COMPONENT_UPDATE) {
2620 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2621 raidmarkclean(raidPtr, sparecol);
2622 }
2623 }
2624 }
2625 }
2626 }
2627
2628 void
2629 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2630 {
2631
2632 if (vp != NULL) {
2633 if (auto_configured == 1) {
2634 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2635 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2636 vput(vp);
2637
2638 } else {
2639 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2640 }
2641 }
2642 }
2643
2644
2645 void
2646 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2647 {
2648 int r,c;
2649 struct vnode *vp;
2650 int acd;
2651
2652
2653 /* We take this opportunity to close the vnodes like we should.. */
2654
2655 for (c = 0; c < raidPtr->numCol; c++) {
2656 vp = raidPtr->raid_cinfo[c].ci_vp;
2657 acd = raidPtr->Disks[c].auto_configured;
2658 rf_close_component(raidPtr, vp, acd);
2659 raidPtr->raid_cinfo[c].ci_vp = NULL;
2660 raidPtr->Disks[c].auto_configured = 0;
2661 }
2662
2663 for (r = 0; r < raidPtr->numSpare; r++) {
2664 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2665 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2666 rf_close_component(raidPtr, vp, acd);
2667 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2668 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2669 }
2670 }
2671
2672
2673 static void
2674 rf_ReconThread(struct rf_recon_req_internal *req)
2675 {
2676 int s;
2677 RF_Raid_t *raidPtr;
2678
2679 s = splbio();
2680 raidPtr = (RF_Raid_t *) req->raidPtr;
2681 raidPtr->recon_in_progress = 1;
2682
2683 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2684 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2685
2686 RF_Free(req, sizeof(*req));
2687
2688 raidPtr->recon_in_progress = 0;
2689 splx(s);
2690
2691 /* That's all... */
2692 kthread_exit(0); /* does not return */
2693 }
2694
2695 static void
2696 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2697 {
2698 int retcode;
2699 int s;
2700
2701 raidPtr->parity_rewrite_stripes_done = 0;
2702 raidPtr->parity_rewrite_in_progress = 1;
2703 s = splbio();
2704 retcode = rf_RewriteParity(raidPtr);
2705 splx(s);
2706 if (retcode) {
2707 printf("raid%d: Error re-writing parity (%d)!\n",
2708 raidPtr->raidid, retcode);
2709 } else {
2710 /* set the clean bit! If we shutdown correctly,
2711 the clean bit on each component label will get
2712 set */
2713 raidPtr->parity_good = RF_RAID_CLEAN;
2714 }
2715 raidPtr->parity_rewrite_in_progress = 0;
2716
2717 /* Anyone waiting for us to stop? If so, inform them... */
2718 if (raidPtr->waitShutdown) {
2719 rf_lock_mutex2(raidPtr->rad_lock);
2720 cv_broadcast(&raidPtr->parity_rewrite_cv);
2721 rf_unlock_mutex2(raidPtr->rad_lock);
2722 }
2723
2724 /* That's all... */
2725 kthread_exit(0); /* does not return */
2726 }
2727
2728
2729 static void
2730 rf_CopybackThread(RF_Raid_t *raidPtr)
2731 {
2732 int s;
2733
2734 raidPtr->copyback_in_progress = 1;
2735 s = splbio();
2736 rf_CopybackReconstructedData(raidPtr);
2737 splx(s);
2738 raidPtr->copyback_in_progress = 0;
2739
2740 /* That's all... */
2741 kthread_exit(0); /* does not return */
2742 }
2743
2744
2745 static void
2746 rf_ReconstructInPlaceThread(struct rf_recon_req_internal *req)
2747 {
2748 int s;
2749 RF_Raid_t *raidPtr;
2750
2751 s = splbio();
2752 raidPtr = req->raidPtr;
2753 raidPtr->recon_in_progress = 1;
2754 rf_ReconstructInPlace(raidPtr, req->col);
2755 RF_Free(req, sizeof(*req));
2756 raidPtr->recon_in_progress = 0;
2757 splx(s);
2758
2759 /* That's all... */
2760 kthread_exit(0); /* does not return */
2761 }
2762
2763 static RF_AutoConfig_t *
2764 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
2765 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
2766 unsigned secsize)
2767 {
2768 int good_one = 0;
2769 RF_ComponentLabel_t *clabel;
2770 RF_AutoConfig_t *ac;
2771
2772 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_WAITOK);
2773
2774 if (!raidread_component_label(secsize, dev, vp, clabel)) {
2775 /* Got the label. Does it look reasonable? */
2776 if (rf_reasonable_label(clabel, numsecs) &&
2777 (rf_component_label_partitionsize(clabel) <= size)) {
2778 #ifdef DEBUG
2779 printf("Component on: %s: %llu\n",
2780 cname, (unsigned long long)size);
2781 rf_print_component_label(clabel);
2782 #endif
2783 /* if it's reasonable, add it, else ignore it. */
2784 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
2785 M_WAITOK);
2786 strlcpy(ac->devname, cname, sizeof(ac->devname));
2787 ac->dev = dev;
2788 ac->vp = vp;
2789 ac->clabel = clabel;
2790 ac->next = ac_list;
2791 ac_list = ac;
2792 good_one = 1;
2793 }
2794 }
2795 if (!good_one) {
2796 /* cleanup */
2797 free(clabel, M_RAIDFRAME);
2798 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2799 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2800 vput(vp);
2801 }
2802 return ac_list;
2803 }
2804
2805 static RF_AutoConfig_t *
2806 rf_find_raid_components(void)
2807 {
2808 struct vnode *vp;
2809 struct disklabel label;
2810 device_t dv;
2811 deviter_t di;
2812 dev_t dev;
2813 int bmajor, bminor, wedge, rf_part_found;
2814 int error;
2815 int i;
2816 RF_AutoConfig_t *ac_list;
2817 uint64_t numsecs;
2818 unsigned secsize;
2819 int dowedges;
2820
2821 /* initialize the AutoConfig list */
2822 ac_list = NULL;
2823
2824 /*
2825 * we begin by trolling through *all* the devices on the system *twice*
2826 * first we scan for wedges, second for other devices. This avoids
2827 * using a raw partition instead of a wedge that covers the whole disk
2828 */
2829
2830 for (dowedges=1; dowedges>=0; --dowedges) {
2831 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
2832 dv = deviter_next(&di)) {
2833
2834 /* we are only interested in disks */
2835 if (device_class(dv) != DV_DISK)
2836 continue;
2837
2838 /* we don't care about floppies */
2839 if (device_is_a(dv, "fd")) {
2840 continue;
2841 }
2842
2843 /* we don't care about CDs. */
2844 if (device_is_a(dv, "cd")) {
2845 continue;
2846 }
2847
2848 /* we don't care about md. */
2849 if (device_is_a(dv, "md")) {
2850 continue;
2851 }
2852
2853 /* hdfd is the Atari/Hades floppy driver */
2854 if (device_is_a(dv, "hdfd")) {
2855 continue;
2856 }
2857
2858 /* fdisa is the Atari/Milan floppy driver */
2859 if (device_is_a(dv, "fdisa")) {
2860 continue;
2861 }
2862
2863 /* we don't care about spiflash */
2864 if (device_is_a(dv, "spiflash")) {
2865 continue;
2866 }
2867
2868 /* are we in the wedges pass ? */
2869 wedge = device_is_a(dv, "dk");
2870 if (wedge != dowedges) {
2871 continue;
2872 }
2873
2874 /* need to find the device_name_to_block_device_major stuff */
2875 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
2876
2877 rf_part_found = 0; /*No raid partition as yet*/
2878
2879 /* get a vnode for the raw partition of this disk */
2880 bminor = minor(device_unit(dv));
2881 dev = wedge ? makedev(bmajor, bminor) :
2882 MAKEDISKDEV(bmajor, bminor, RAW_PART);
2883 if (bdevvp(dev, &vp))
2884 panic("RAID can't alloc vnode");
2885
2886 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2887 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
2888
2889 if (error) {
2890 /* "Who cares." Continue looking
2891 for something that exists*/
2892 vput(vp);
2893 continue;
2894 }
2895
2896 error = getdisksize(vp, &numsecs, &secsize);
2897 if (error) {
2898 /*
2899 * Pseudo devices like vnd and cgd can be
2900 * opened but may still need some configuration.
2901 * Ignore these quietly.
2902 */
2903 if (error != ENXIO)
2904 printf("RAIDframe: can't get disk size"
2905 " for dev %s (%d)\n",
2906 device_xname(dv), error);
2907 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2908 vput(vp);
2909 continue;
2910 }
2911 if (wedge) {
2912 struct dkwedge_info dkw;
2913 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
2914 NOCRED);
2915 if (error) {
2916 printf("RAIDframe: can't get wedge info for "
2917 "dev %s (%d)\n", device_xname(dv), error);
2918 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2919 vput(vp);
2920 continue;
2921 }
2922
2923 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
2924 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2925 vput(vp);
2926 continue;
2927 }
2928
2929 VOP_UNLOCK(vp);
2930 ac_list = rf_get_component(ac_list, dev, vp,
2931 device_xname(dv), dkw.dkw_size, numsecs, secsize);
2932 rf_part_found = 1; /*There is a raid component on this disk*/
2933 continue;
2934 }
2935
2936 /* Ok, the disk exists. Go get the disklabel. */
2937 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
2938 if (error) {
2939 /*
2940 * XXX can't happen - open() would
2941 * have errored out (or faked up one)
2942 */
2943 if (error != ENOTTY)
2944 printf("RAIDframe: can't get label for dev "
2945 "%s (%d)\n", device_xname(dv), error);
2946 }
2947
2948 /* don't need this any more. We'll allocate it again
2949 a little later if we really do... */
2950 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2951 vput(vp);
2952
2953 if (error)
2954 continue;
2955
2956 rf_part_found = 0; /*No raid partitions yet*/
2957 for (i = 0; i < label.d_npartitions; i++) {
2958 char cname[sizeof(ac_list->devname)];
2959
2960 /* We only support partitions marked as RAID */
2961 if (label.d_partitions[i].p_fstype != FS_RAID)
2962 continue;
2963
2964 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
2965 if (bdevvp(dev, &vp))
2966 panic("RAID can't alloc vnode");
2967
2968 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2969 error = VOP_OPEN(vp, FREAD, NOCRED);
2970 if (error) {
2971 /* Whatever... */
2972 vput(vp);
2973 continue;
2974 }
2975 VOP_UNLOCK(vp);
2976 snprintf(cname, sizeof(cname), "%s%c",
2977 device_xname(dv), 'a' + i);
2978 ac_list = rf_get_component(ac_list, dev, vp, cname,
2979 label.d_partitions[i].p_size, numsecs, secsize);
2980 rf_part_found = 1; /*There is at least one raid partition on this disk*/
2981 }
2982
2983 /*
2984 *If there is no raid component on this disk, either in a
2985 *disklabel or inside a wedge, check the raw partition as well,
2986 *as it is possible to configure raid components on raw disk
2987 *devices.
2988 */
2989
2990 if (!rf_part_found) {
2991 char cname[sizeof(ac_list->devname)];
2992
2993 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
2994 if (bdevvp(dev, &vp))
2995 panic("RAID can't alloc vnode");
2996
2997 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2998
2999 error = VOP_OPEN(vp, FREAD, NOCRED);
3000 if (error) {
3001 /* Whatever... */
3002 vput(vp);
3003 continue;
3004 }
3005 VOP_UNLOCK(vp);
3006 snprintf(cname, sizeof(cname), "%s%c",
3007 device_xname(dv), 'a' + RAW_PART);
3008 ac_list = rf_get_component(ac_list, dev, vp, cname,
3009 label.d_partitions[RAW_PART].p_size, numsecs, secsize);
3010 }
3011 }
3012 deviter_release(&di);
3013 }
3014 return ac_list;
3015 }
3016
3017 int
3018 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3019 {
3020
3021 if ((clabel->version==RF_COMPONENT_LABEL_VERSION_1 ||
3022 clabel->version==RF_COMPONENT_LABEL_VERSION ||
3023 clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) &&
3024 (clabel->clean == RF_RAID_CLEAN ||
3025 clabel->clean == RF_RAID_DIRTY) &&
3026 clabel->row >=0 &&
3027 clabel->column >= 0 &&
3028 clabel->num_rows > 0 &&
3029 clabel->num_columns > 0 &&
3030 clabel->row < clabel->num_rows &&
3031 clabel->column < clabel->num_columns &&
3032 clabel->blockSize > 0 &&
3033 /*
3034 * numBlocksHi may contain garbage, but it is ok since
3035 * the type is unsigned. If it is really garbage,
3036 * rf_fix_old_label_size() will fix it.
3037 */
3038 rf_component_label_numblocks(clabel) > 0) {
3039 /*
3040 * label looks reasonable enough...
3041 * let's make sure it has no old garbage.
3042 */
3043 if (numsecs)
3044 rf_fix_old_label_size(clabel, numsecs);
3045 return(1);
3046 }
3047 return(0);
3048 }
3049
3050
3051 /*
3052 * For reasons yet unknown, some old component labels have garbage in
3053 * the newer numBlocksHi region, and this causes lossage. Since those
3054 * disks will also have numsecs set to less than 32 bits of sectors,
3055 * we can determine when this corruption has occurred, and fix it.
3056 *
3057 * The exact same problem, with the same unknown reason, happens to
3058 * the partitionSizeHi member as well.
3059 */
3060 static void
3061 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3062 {
3063
3064 if (numsecs < ((uint64_t)1 << 32)) {
3065 if (clabel->numBlocksHi) {
3066 printf("WARNING: total sectors < 32 bits, yet "
3067 "numBlocksHi set\n"
3068 "WARNING: resetting numBlocksHi to zero.\n");
3069 clabel->numBlocksHi = 0;
3070 }
3071
3072 if (clabel->partitionSizeHi) {
3073 printf("WARNING: total sectors < 32 bits, yet "
3074 "partitionSizeHi set\n"
3075 "WARNING: resetting partitionSizeHi to zero.\n");
3076 clabel->partitionSizeHi = 0;
3077 }
3078 }
3079 }
3080
3081
3082 #ifdef DEBUG
3083 void
3084 rf_print_component_label(RF_ComponentLabel_t *clabel)
3085 {
3086 uint64_t numBlocks;
3087 static const char *rp[] = {
3088 "No", "Force", "Soft", "*invalid*"
3089 };
3090
3091
3092 numBlocks = rf_component_label_numblocks(clabel);
3093
3094 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3095 clabel->row, clabel->column,
3096 clabel->num_rows, clabel->num_columns);
3097 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3098 clabel->version, clabel->serial_number,
3099 clabel->mod_counter);
3100 printf(" Clean: %s Status: %d\n",
3101 clabel->clean ? "Yes" : "No", clabel->status);
3102 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3103 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3104 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3105 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3106 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3107 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]);
3108 printf(" Last configured as: raid%d\n", clabel->last_unit);
3109 #if 0
3110 printf(" Config order: %d\n", clabel->config_order);
3111 #endif
3112
3113 }
3114 #endif
3115
3116 static RF_ConfigSet_t *
3117 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3118 {
3119 RF_AutoConfig_t *ac;
3120 RF_ConfigSet_t *config_sets;
3121 RF_ConfigSet_t *cset;
3122 RF_AutoConfig_t *ac_next;
3123
3124
3125 config_sets = NULL;
3126
3127 /* Go through the AutoConfig list, and figure out which components
3128 belong to what sets. */
3129 ac = ac_list;
3130 while(ac!=NULL) {
3131 /* we're going to putz with ac->next, so save it here
3132 for use at the end of the loop */
3133 ac_next = ac->next;
3134
3135 if (config_sets == NULL) {
3136 /* will need at least this one... */
3137 config_sets = malloc(sizeof(RF_ConfigSet_t),
3138 M_RAIDFRAME, M_WAITOK);
3139 /* this one is easy :) */
3140 config_sets->ac = ac;
3141 config_sets->next = NULL;
3142 config_sets->rootable = 0;
3143 ac->next = NULL;
3144 } else {
3145 /* which set does this component fit into? */
3146 cset = config_sets;
3147 while(cset!=NULL) {
3148 if (rf_does_it_fit(cset, ac)) {
3149 /* looks like it matches... */
3150 ac->next = cset->ac;
3151 cset->ac = ac;
3152 break;
3153 }
3154 cset = cset->next;
3155 }
3156 if (cset==NULL) {
3157 /* didn't find a match above... new set..*/
3158 cset = malloc(sizeof(RF_ConfigSet_t),
3159 M_RAIDFRAME, M_WAITOK);
3160 cset->ac = ac;
3161 ac->next = NULL;
3162 cset->next = config_sets;
3163 cset->rootable = 0;
3164 config_sets = cset;
3165 }
3166 }
3167 ac = ac_next;
3168 }
3169
3170
3171 return(config_sets);
3172 }
3173
3174 static int
3175 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3176 {
3177 RF_ComponentLabel_t *clabel1, *clabel2;
3178
3179 /* If this one matches the *first* one in the set, that's good
3180 enough, since the other members of the set would have been
3181 through here too... */
3182 /* note that we are not checking partitionSize here..
3183
3184 Note that we are also not checking the mod_counters here.
3185 If everything else matches except the mod_counter, that's
3186 good enough for this test. We will deal with the mod_counters
3187 a little later in the autoconfiguration process.
3188
3189 (clabel1->mod_counter == clabel2->mod_counter) &&
3190
3191 The reason we don't check for this is that failed disks
3192 will have lower modification counts. If those disks are
3193 not added to the set they used to belong to, then they will
3194 form their own set, which may result in 2 different sets,
3195 for example, competing to be configured at raid0, and
3196 perhaps competing to be the root filesystem set. If the
3197 wrong ones get configured, or both attempt to become /,
3198 weird behaviour and or serious lossage will occur. Thus we
3199 need to bring them into the fold here, and kick them out at
3200 a later point.
3201
3202 */
3203
3204 clabel1 = cset->ac->clabel;
3205 clabel2 = ac->clabel;
3206 if ((clabel1->version == clabel2->version) &&
3207 (clabel1->serial_number == clabel2->serial_number) &&
3208 (clabel1->num_rows == clabel2->num_rows) &&
3209 (clabel1->num_columns == clabel2->num_columns) &&
3210 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3211 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3212 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3213 (clabel1->parityConfig == clabel2->parityConfig) &&
3214 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3215 (clabel1->blockSize == clabel2->blockSize) &&
3216 rf_component_label_numblocks(clabel1) ==
3217 rf_component_label_numblocks(clabel2) &&
3218 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3219 (clabel1->root_partition == clabel2->root_partition) &&
3220 (clabel1->last_unit == clabel2->last_unit) &&
3221 (clabel1->config_order == clabel2->config_order)) {
3222 /* if it get's here, it almost *has* to be a match */
3223 } else {
3224 /* it's not consistent with somebody in the set..
3225 punt */
3226 return(0);
3227 }
3228 /* all was fine.. it must fit... */
3229 return(1);
3230 }
3231
3232 static int
3233 rf_have_enough_components(RF_ConfigSet_t *cset)
3234 {
3235 RF_AutoConfig_t *ac;
3236 RF_AutoConfig_t *auto_config;
3237 RF_ComponentLabel_t *clabel;
3238 int c;
3239 int num_cols;
3240 int num_missing;
3241 int mod_counter;
3242 int mod_counter_found;
3243 int even_pair_failed;
3244 char parity_type;
3245
3246
3247 /* check to see that we have enough 'live' components
3248 of this set. If so, we can configure it if necessary */
3249
3250 num_cols = cset->ac->clabel->num_columns;
3251 parity_type = cset->ac->clabel->parityConfig;
3252
3253 /* XXX Check for duplicate components!?!?!? */
3254
3255 /* Determine what the mod_counter is supposed to be for this set. */
3256
3257 mod_counter_found = 0;
3258 mod_counter = 0;
3259 ac = cset->ac;
3260 while(ac!=NULL) {
3261 if (mod_counter_found==0) {
3262 mod_counter = ac->clabel->mod_counter;
3263 mod_counter_found = 1;
3264 } else {
3265 if (ac->clabel->mod_counter > mod_counter) {
3266 mod_counter = ac->clabel->mod_counter;
3267 }
3268 }
3269 ac = ac->next;
3270 }
3271
3272 num_missing = 0;
3273 auto_config = cset->ac;
3274
3275 even_pair_failed = 0;
3276 for(c=0; c<num_cols; c++) {
3277 ac = auto_config;
3278 while(ac!=NULL) {
3279 if ((ac->clabel->column == c) &&
3280 (ac->clabel->mod_counter == mod_counter)) {
3281 /* it's this one... */
3282 #ifdef DEBUG
3283 printf("Found: %s at %d\n",
3284 ac->devname,c);
3285 #endif
3286 break;
3287 }
3288 ac=ac->next;
3289 }
3290 if (ac==NULL) {
3291 /* Didn't find one here! */
3292 /* special case for RAID 1, especially
3293 where there are more than 2
3294 components (where RAIDframe treats
3295 things a little differently :( ) */
3296 if (parity_type == '1') {
3297 if (c%2 == 0) { /* even component */
3298 even_pair_failed = 1;
3299 } else { /* odd component. If
3300 we're failed, and
3301 so is the even
3302 component, it's
3303 "Good Night, Charlie" */
3304 if (even_pair_failed == 1) {
3305 return(0);
3306 }
3307 }
3308 } else {
3309 /* normal accounting */
3310 num_missing++;
3311 }
3312 }
3313 if ((parity_type == '1') && (c%2 == 1)) {
3314 /* Just did an even component, and we didn't
3315 bail.. reset the even_pair_failed flag,
3316 and go on to the next component.... */
3317 even_pair_failed = 0;
3318 }
3319 }
3320
3321 clabel = cset->ac->clabel;
3322
3323 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3324 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3325 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3326 /* XXX this needs to be made *much* more general */
3327 /* Too many failures */
3328 return(0);
3329 }
3330 /* otherwise, all is well, and we've got enough to take a kick
3331 at autoconfiguring this set */
3332 return(1);
3333 }
3334
3335 static void
3336 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3337 RF_Raid_t *raidPtr)
3338 {
3339 RF_ComponentLabel_t *clabel;
3340 int i;
3341
3342 clabel = ac->clabel;
3343
3344 /* 1. Fill in the common stuff */
3345 config->numCol = clabel->num_columns;
3346 config->numSpare = 0; /* XXX should this be set here? */
3347 config->sectPerSU = clabel->sectPerSU;
3348 config->SUsPerPU = clabel->SUsPerPU;
3349 config->SUsPerRU = clabel->SUsPerRU;
3350 config->parityConfig = clabel->parityConfig;
3351 /* XXX... */
3352 strcpy(config->diskQueueType,"fifo");
3353 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3354 config->layoutSpecificSize = 0; /* XXX ?? */
3355
3356 while(ac!=NULL) {
3357 /* row/col values will be in range due to the checks
3358 in reasonable_label() */
3359 strcpy(config->devnames[0][ac->clabel->column],
3360 ac->devname);
3361 ac = ac->next;
3362 }
3363
3364 for(i=0;i<RF_MAXDBGV;i++) {
3365 config->debugVars[i][0] = 0;
3366 }
3367 }
3368
3369 static int
3370 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3371 {
3372 RF_ComponentLabel_t *clabel;
3373 int column;
3374 int sparecol;
3375
3376 raidPtr->autoconfigure = new_value;
3377
3378 for(column=0; column<raidPtr->numCol; column++) {
3379 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3380 clabel = raidget_component_label(raidPtr, column);
3381 clabel->autoconfigure = new_value;
3382 raidflush_component_label(raidPtr, column);
3383 }
3384 }
3385 for(column = 0; column < raidPtr->numSpare ; column++) {
3386 sparecol = raidPtr->numCol + column;
3387 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3388 clabel = raidget_component_label(raidPtr, sparecol);
3389 clabel->autoconfigure = new_value;
3390 raidflush_component_label(raidPtr, sparecol);
3391 }
3392 }
3393 return(new_value);
3394 }
3395
3396 static int
3397 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3398 {
3399 RF_ComponentLabel_t *clabel;
3400 int column;
3401 int sparecol;
3402
3403 raidPtr->root_partition = new_value;
3404 for(column=0; column<raidPtr->numCol; column++) {
3405 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3406 clabel = raidget_component_label(raidPtr, column);
3407 clabel->root_partition = new_value;
3408 raidflush_component_label(raidPtr, column);
3409 }
3410 }
3411 for(column = 0; column < raidPtr->numSpare ; column++) {
3412 sparecol = raidPtr->numCol + column;
3413 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3414 clabel = raidget_component_label(raidPtr, sparecol);
3415 clabel->root_partition = new_value;
3416 raidflush_component_label(raidPtr, sparecol);
3417 }
3418 }
3419 return(new_value);
3420 }
3421
3422 static void
3423 rf_release_all_vps(RF_ConfigSet_t *cset)
3424 {
3425 RF_AutoConfig_t *ac;
3426
3427 ac = cset->ac;
3428 while(ac!=NULL) {
3429 /* Close the vp, and give it back */
3430 if (ac->vp) {
3431 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3432 VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
3433 vput(ac->vp);
3434 ac->vp = NULL;
3435 }
3436 ac = ac->next;
3437 }
3438 }
3439
3440
3441 static void
3442 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3443 {
3444 RF_AutoConfig_t *ac;
3445 RF_AutoConfig_t *next_ac;
3446
3447 ac = cset->ac;
3448 while(ac!=NULL) {
3449 next_ac = ac->next;
3450 /* nuke the label */
3451 free(ac->clabel, M_RAIDFRAME);
3452 /* cleanup the config structure */
3453 free(ac, M_RAIDFRAME);
3454 /* "next.." */
3455 ac = next_ac;
3456 }
3457 /* and, finally, nuke the config set */
3458 free(cset, M_RAIDFRAME);
3459 }
3460
3461
3462 void
3463 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3464 {
3465 /* avoid over-writing byteswapped version. */
3466 if (clabel->version != bswap32(RF_COMPONENT_LABEL_VERSION))
3467 clabel->version = RF_COMPONENT_LABEL_VERSION;
3468 clabel->serial_number = raidPtr->serial_number;
3469 clabel->mod_counter = raidPtr->mod_counter;
3470
3471 clabel->num_rows = 1;
3472 clabel->num_columns = raidPtr->numCol;
3473 clabel->clean = RF_RAID_DIRTY; /* not clean */
3474 clabel->status = rf_ds_optimal; /* "It's good!" */
3475
3476 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3477 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3478 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3479
3480 clabel->blockSize = raidPtr->bytesPerSector;
3481 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3482
3483 /* XXX not portable */
3484 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3485 clabel->maxOutstanding = raidPtr->maxOutstanding;
3486 clabel->autoconfigure = raidPtr->autoconfigure;
3487 clabel->root_partition = raidPtr->root_partition;
3488 clabel->last_unit = raidPtr->raidid;
3489 clabel->config_order = raidPtr->config_order;
3490
3491 #ifndef RF_NO_PARITY_MAP
3492 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3493 #endif
3494 }
3495
3496 static struct raid_softc *
3497 rf_auto_config_set(RF_ConfigSet_t *cset)
3498 {
3499 RF_Raid_t *raidPtr;
3500 RF_Config_t *config;
3501 int raidID;
3502 struct raid_softc *sc;
3503
3504 #ifdef DEBUG
3505 printf("RAID autoconfigure\n");
3506 #endif
3507
3508 /* 1. Create a config structure */
3509 config = malloc(sizeof(*config), M_RAIDFRAME, M_WAITOK|M_ZERO);
3510
3511 /*
3512 2. Figure out what RAID ID this one is supposed to live at
3513 See if we can get the same RAID dev that it was configured
3514 on last time..
3515 */
3516
3517 raidID = cset->ac->clabel->last_unit;
3518 for (sc = raidget(raidID, false); sc && sc->sc_r.valid != 0;
3519 sc = raidget(++raidID, false))
3520 continue;
3521 #ifdef DEBUG
3522 printf("Configuring raid%d:\n",raidID);
3523 #endif
3524
3525 if (sc == NULL)
3526 sc = raidget(raidID, true);
3527 raidPtr = &sc->sc_r;
3528
3529 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3530 raidPtr->softc = sc;
3531 raidPtr->raidid = raidID;
3532 raidPtr->openings = RAIDOUTSTANDING;
3533
3534 /* 3. Build the configuration structure */
3535 rf_create_configuration(cset->ac, config, raidPtr);
3536
3537 /* 4. Do the configuration */
3538 if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3539 raidinit(sc);
3540
3541 rf_markalldirty(raidPtr);
3542 raidPtr->autoconfigure = 1; /* XXX do this here? */
3543 switch (cset->ac->clabel->root_partition) {
3544 case 1: /* Force Root */
3545 case 2: /* Soft Root: root when boot partition part of raid */
3546 /*
3547 * everything configured just fine. Make a note
3548 * that this set is eligible to be root,
3549 * or forced to be root
3550 */
3551 cset->rootable = cset->ac->clabel->root_partition;
3552 /* XXX do this here? */
3553 raidPtr->root_partition = cset->rootable;
3554 break;
3555 default:
3556 break;
3557 }
3558 } else {
3559 raidput(sc);
3560 sc = NULL;
3561 }
3562
3563 /* 5. Cleanup */
3564 free(config, M_RAIDFRAME);
3565 return sc;
3566 }
3567
3568 void
3569 rf_pool_init(RF_Raid_t *raidPtr, char *w_chan, struct pool *p, size_t size, const char *pool_name,
3570 size_t xmin, size_t xmax)
3571 {
3572
3573 /* Format: raid%d_foo */
3574 snprintf(w_chan, RF_MAX_POOLNAMELEN, "raid%d_%s", raidPtr->raidid, pool_name);
3575
3576 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3577 pool_sethiwat(p, xmax);
3578 pool_prime(p, xmin);
3579 }
3580
3581
3582 /*
3583 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buffer queue
3584 * to see if there is IO pending and if that IO could possibly be done
3585 * for a given RAID set. Returns 0 if IO is waiting and can be done, 1
3586 * otherwise.
3587 *
3588 */
3589 int
3590 rf_buf_queue_check(RF_Raid_t *raidPtr)
3591 {
3592 struct raid_softc *rs;
3593 struct dk_softc *dksc;
3594
3595 rs = raidPtr->softc;
3596 dksc = &rs->sc_dksc;
3597
3598 if ((rs->sc_flags & RAIDF_INITED) == 0)
3599 return 1;
3600
3601 if (dk_strategy_pending(dksc) && raidPtr->openings > 0) {
3602 /* there is work to do */
3603 return 0;
3604 }
3605 /* default is nothing to do */
3606 return 1;
3607 }
3608
3609 int
3610 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3611 {
3612 uint64_t numsecs;
3613 unsigned secsize;
3614 int error;
3615
3616 error = getdisksize(vp, &numsecs, &secsize);
3617 if (error == 0) {
3618 diskPtr->blockSize = secsize;
3619 diskPtr->numBlocks = numsecs - rf_protectedSectors;
3620 diskPtr->partitionSize = numsecs;
3621 return 0;
3622 }
3623 return error;
3624 }
3625
3626 static int
3627 raid_match(device_t self, cfdata_t cfdata, void *aux)
3628 {
3629 return 1;
3630 }
3631
3632 static void
3633 raid_attach(device_t parent, device_t self, void *aux)
3634 {
3635 }
3636
3637
3638 static int
3639 raid_detach(device_t self, int flags)
3640 {
3641 int error;
3642 struct raid_softc *rs = raidsoftc(self);
3643
3644 if (rs == NULL)
3645 return ENXIO;
3646
3647 if ((error = raidlock(rs)) != 0)
3648 return error;
3649
3650 error = raid_detach_unlocked(rs);
3651
3652 raidunlock(rs);
3653
3654 /* XXX raid can be referenced here */
3655
3656 if (error)
3657 return error;
3658
3659 /* Free the softc */
3660 raidput(rs);
3661
3662 return 0;
3663 }
3664
3665 static void
3666 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
3667 {
3668 struct dk_softc *dksc = &rs->sc_dksc;
3669 struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
3670
3671 memset(dg, 0, sizeof(*dg));
3672
3673 dg->dg_secperunit = raidPtr->totalSectors;
3674 dg->dg_secsize = raidPtr->bytesPerSector;
3675 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
3676 dg->dg_ntracks = 4 * raidPtr->numCol;
3677
3678 disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, NULL);
3679 }
3680
3681 /*
3682 * Get cache info for all the components (including spares).
3683 * Returns intersection of all the cache flags of all disks, or first
3684 * error if any encountered.
3685 * XXXfua feature flags can change as spares are added - lock down somehow
3686 */
3687 static int
3688 rf_get_component_caches(RF_Raid_t *raidPtr, int *data)
3689 {
3690 int c;
3691 int error;
3692 int dkwhole = 0, dkpart;
3693
3694 for (c = 0; c < raidPtr->numCol + raidPtr->numSpare; c++) {
3695 /*
3696 * Check any non-dead disk, even when currently being
3697 * reconstructed.
3698 */
3699 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)
3700 || raidPtr->Disks[c].status == rf_ds_reconstructing) {
3701 error = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp,
3702 DIOCGCACHE, &dkpart, FREAD, NOCRED);
3703 if (error) {
3704 if (error != ENODEV) {
3705 printf("raid%d: get cache for component %s failed\n",
3706 raidPtr->raidid,
3707 raidPtr->Disks[c].devname);
3708 }
3709
3710 return error;
3711 }
3712
3713 if (c == 0)
3714 dkwhole = dkpart;
3715 else
3716 dkwhole = DKCACHE_COMBINE(dkwhole, dkpart);
3717 }
3718 }
3719
3720 *data = dkwhole;
3721
3722 return 0;
3723 }
3724
3725 /*
3726 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3727 * We end up returning whatever error was returned by the first cache flush
3728 * that fails.
3729 */
3730
3731 static int
3732 rf_sync_component_cache(RF_Raid_t *raidPtr, int c, int force)
3733 {
3734 int e = 0;
3735 for (int i = 0; i < 5; i++) {
3736 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3737 &force, FWRITE, NOCRED);
3738 if (!e || e == ENODEV)
3739 return e;
3740 printf("raid%d: cache flush[%d] to component %s failed (%d)\n",
3741 raidPtr->raidid, i, raidPtr->Disks[c].devname, e);
3742 }
3743 return e;
3744 }
3745
3746 int
3747 rf_sync_component_caches(RF_Raid_t *raidPtr, int force)
3748 {
3749 int c, error;
3750
3751 error = 0;
3752 for (c = 0; c < raidPtr->numCol; c++) {
3753 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3754 int e = rf_sync_component_cache(raidPtr, c, force);
3755 if (e && !error)
3756 error = e;
3757 }
3758 }
3759
3760 for (c = 0; c < raidPtr->numSpare ; c++) {
3761 int sparecol = raidPtr->numCol + c;
3762 /* Need to ensure that the reconstruct actually completed! */
3763 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3764 int e = rf_sync_component_cache(raidPtr, sparecol,
3765 force);
3766 if (e && !error)
3767 error = e;
3768 }
3769 }
3770 return error;
3771 }
3772
3773 /* Fill in info with the current status */
3774 void
3775 rf_check_recon_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3776 {
3777
3778 if (raidPtr->status != rf_rs_reconstructing) {
3779 info->total = 100;
3780 info->completed = 100;
3781 } else {
3782 info->total = raidPtr->reconControl->numRUsTotal;
3783 info->completed = raidPtr->reconControl->numRUsComplete;
3784 }
3785 info->remaining = info->total - info->completed;
3786 }
3787
3788 /* Fill in info with the current status */
3789 void
3790 rf_check_parityrewrite_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3791 {
3792
3793 if (raidPtr->parity_rewrite_in_progress == 1) {
3794 info->total = raidPtr->Layout.numStripe;
3795 info->completed = raidPtr->parity_rewrite_stripes_done;
3796 } else {
3797 info->completed = 100;
3798 info->total = 100;
3799 }
3800 info->remaining = info->total - info->completed;
3801 }
3802
3803 /* Fill in info with the current status */
3804 void
3805 rf_check_copyback_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3806 {
3807
3808 if (raidPtr->copyback_in_progress == 1) {
3809 info->total = raidPtr->Layout.numStripe;
3810 info->completed = raidPtr->copyback_stripes_done;
3811 info->remaining = info->total - info->completed;
3812 } else {
3813 info->remaining = 0;
3814 info->completed = 100;
3815 info->total = 100;
3816 }
3817 }
3818
3819 /* Fill in config with the current info */
3820 int
3821 rf_get_info(RF_Raid_t *raidPtr, RF_DeviceConfig_t *config)
3822 {
3823 int d, i, j;
3824
3825 if (!raidPtr->valid)
3826 return ENODEV;
3827 config->cols = raidPtr->numCol;
3828 config->ndevs = raidPtr->numCol;
3829 if (config->ndevs >= RF_MAX_DISKS)
3830 return ENOMEM;
3831 config->nspares = raidPtr->numSpare;
3832 if (config->nspares >= RF_MAX_DISKS)
3833 return ENOMEM;
3834 config->maxqdepth = raidPtr->maxQueueDepth;
3835 d = 0;
3836 for (j = 0; j < config->cols; j++) {
3837 config->devs[d] = raidPtr->Disks[j];
3838 d++;
3839 }
3840 for (j = config->cols, i = 0; i < config->nspares; i++, j++) {
3841 config->spares[i] = raidPtr->Disks[j];
3842 if (config->spares[i].status == rf_ds_rebuilding_spare) {
3843 /* XXX: raidctl(8) expects to see this as a used spare */
3844 config->spares[i].status = rf_ds_used_spare;
3845 }
3846 }
3847 return 0;
3848 }
3849
3850 int
3851 rf_get_component_label(RF_Raid_t *raidPtr, void *data)
3852 {
3853 RF_ComponentLabel_t *clabel = (RF_ComponentLabel_t *)data;
3854 RF_ComponentLabel_t *raid_clabel;
3855 int column = clabel->column;
3856
3857 if ((column < 0) || (column >= raidPtr->numCol + raidPtr->numSpare))
3858 return EINVAL;
3859 raid_clabel = raidget_component_label(raidPtr, column);
3860 memcpy(clabel, raid_clabel, sizeof *clabel);
3861 /* Fix-up for userland. */
3862 if (clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION))
3863 clabel->version = RF_COMPONENT_LABEL_VERSION;
3864
3865 return 0;
3866 }
3867
3868 /*
3869 * Module interface
3870 */
3871
3872 MODULE(MODULE_CLASS_DRIVER, raid, "dk_subr,bufq_fcfs");
3873
3874 #ifdef _MODULE
3875 CFDRIVER_DECL(raid, DV_DISK, NULL);
3876 #endif
3877
3878 static int raid_modcmd(modcmd_t, void *);
3879 static int raid_modcmd_init(void);
3880 static int raid_modcmd_fini(void);
3881
3882 static int
3883 raid_modcmd(modcmd_t cmd, void *data)
3884 {
3885 int error;
3886
3887 error = 0;
3888 switch (cmd) {
3889 case MODULE_CMD_INIT:
3890 error = raid_modcmd_init();
3891 break;
3892 case MODULE_CMD_FINI:
3893 error = raid_modcmd_fini();
3894 break;
3895 default:
3896 error = ENOTTY;
3897 break;
3898 }
3899 return error;
3900 }
3901
3902 static int
3903 raid_modcmd_init(void)
3904 {
3905 int error;
3906 int bmajor, cmajor;
3907
3908 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
3909 mutex_enter(&raid_lock);
3910 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
3911 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
3912 rf_init_cond2(rf_sparet_wait_cv, "sparetw");
3913 rf_init_cond2(rf_sparet_resp_cv, "rfgst");
3914
3915 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
3916 #endif
3917
3918 bmajor = cmajor = -1;
3919 error = devsw_attach("raid", &raid_bdevsw, &bmajor,
3920 &raid_cdevsw, &cmajor);
3921 if (error != 0 && error != EEXIST) {
3922 aprint_error("%s: devsw_attach failed %d\n", __func__, error);
3923 mutex_exit(&raid_lock);
3924 return error;
3925 }
3926 #ifdef _MODULE
3927 error = config_cfdriver_attach(&raid_cd);
3928 if (error != 0) {
3929 aprint_error("%s: config_cfdriver_attach failed %d\n",
3930 __func__, error);
3931 devsw_detach(&raid_bdevsw, &raid_cdevsw);
3932 mutex_exit(&raid_lock);
3933 return error;
3934 }
3935 #endif
3936 error = config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3937 if (error != 0) {
3938 aprint_error("%s: config_cfattach_attach failed %d\n",
3939 __func__, error);
3940 #ifdef _MODULE
3941 config_cfdriver_detach(&raid_cd);
3942 #endif
3943 devsw_detach(&raid_bdevsw, &raid_cdevsw);
3944 mutex_exit(&raid_lock);
3945 return error;
3946 }
3947
3948 raidautoconfigdone = false;
3949
3950 mutex_exit(&raid_lock);
3951
3952 if (error == 0) {
3953 if (rf_BootRaidframe(true) == 0)
3954 aprint_verbose("Kernelized RAIDframe activated\n");
3955 else
3956 panic("Serious error activating RAID!!");
3957 }
3958
3959 /*
3960 * Register a finalizer which will be used to auto-config RAID
3961 * sets once all real hardware devices have been found.
3962 */
3963 error = config_finalize_register(NULL, rf_autoconfig);
3964 if (error != 0) {
3965 aprint_error("WARNING: unable to register RAIDframe "
3966 "finalizer\n");
3967 error = 0;
3968 }
3969
3970 return error;
3971 }
3972
3973 static int
3974 raid_modcmd_fini(void)
3975 {
3976 int error;
3977
3978 mutex_enter(&raid_lock);
3979
3980 /* Don't allow unload if raid device(s) exist. */
3981 if (!LIST_EMPTY(&raids)) {
3982 mutex_exit(&raid_lock);
3983 return EBUSY;
3984 }
3985
3986 error = config_cfattach_detach(raid_cd.cd_name, &raid_ca);
3987 if (error != 0) {
3988 aprint_error("%s: cannot detach cfattach\n",__func__);
3989 mutex_exit(&raid_lock);
3990 return error;
3991 }
3992 #ifdef _MODULE
3993 error = config_cfdriver_detach(&raid_cd);
3994 if (error != 0) {
3995 aprint_error("%s: cannot detach cfdriver\n",__func__);
3996 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3997 mutex_exit(&raid_lock);
3998 return error;
3999 }
4000 #endif
4001 error = devsw_detach(&raid_bdevsw, &raid_cdevsw);
4002 if (error != 0) {
4003 aprint_error("%s: cannot detach devsw\n",__func__);
4004 #ifdef _MODULE
4005 config_cfdriver_attach(&raid_cd);
4006 #endif
4007 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
4008 mutex_exit(&raid_lock);
4009 return error;
4010 }
4011 rf_BootRaidframe(false);
4012 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
4013 rf_destroy_mutex2(rf_sparet_wait_mutex);
4014 rf_destroy_cond2(rf_sparet_wait_cv);
4015 rf_destroy_cond2(rf_sparet_resp_cv);
4016 #endif
4017 mutex_exit(&raid_lock);
4018 mutex_destroy(&raid_lock);
4019
4020 return error;
4021 }
4022