rf_netbsdkintf.c revision 1.360 1 /* $NetBSD: rf_netbsdkintf.c,v 1.360 2019/01/29 09:28:50 pgoyette Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: cd.c 1.6 90/11/28$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
89 * School of Computer Science
90 * Carnegie Mellon University
91 * Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97 /***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.360 2019/01/29 09:28:50 pgoyette Exp $");
105
106 #ifdef _KERNEL_OPT
107 #include "opt_compat_netbsd.h"
108 #include "opt_compat_netbsd32.h"
109 #include "opt_raid_autoconfig.h"
110 #endif
111
112 #include <sys/param.h>
113 #include <sys/errno.h>
114 #include <sys/pool.h>
115 #include <sys/proc.h>
116 #include <sys/queue.h>
117 #include <sys/disk.h>
118 #include <sys/device.h>
119 #include <sys/stat.h>
120 #include <sys/ioctl.h>
121 #include <sys/fcntl.h>
122 #include <sys/systm.h>
123 #include <sys/vnode.h>
124 #include <sys/disklabel.h>
125 #include <sys/conf.h>
126 #include <sys/buf.h>
127 #include <sys/bufq.h>
128 #include <sys/reboot.h>
129 #include <sys/kauth.h>
130 #include <sys/module.h>
131 #include <sys/compat_stub.h>
132
133 #include <prop/proplib.h>
134
135 #include <dev/raidframe/raidframevar.h>
136 #include <dev/raidframe/raidframeio.h>
137 #include <dev/raidframe/rf_paritymap.h>
138
139 #include "rf_raid.h"
140 #include "rf_copyback.h"
141 #include "rf_dag.h"
142 #include "rf_dagflags.h"
143 #include "rf_desc.h"
144 #include "rf_diskqueue.h"
145 #include "rf_etimer.h"
146 #include "rf_general.h"
147 #include "rf_kintf.h"
148 #include "rf_options.h"
149 #include "rf_driver.h"
150 #include "rf_parityscan.h"
151 #include "rf_threadstuff.h"
152
153 #include "rf_compat50.h"
154
155 #include "rf_compat80.h"
156
157 #ifdef COMPAT_NETBSD32
158 #include "rf_compat32.h"
159 #endif
160
161 #include "ioconf.h"
162
163 #ifdef DEBUG
164 int rf_kdebug_level = 0;
165 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
166 #else /* DEBUG */
167 #define db1_printf(a) { }
168 #endif /* DEBUG */
169
170 #ifdef DEBUG_ROOT
171 #define DPRINTF(a, ...) printf(a, __VA_ARGS__)
172 #else
173 #define DPRINTF(a, ...)
174 #endif
175
176 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
177 static rf_declare_mutex2(rf_sparet_wait_mutex);
178 static rf_declare_cond2(rf_sparet_wait_cv);
179 static rf_declare_cond2(rf_sparet_resp_cv);
180
181 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
182 * spare table */
183 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
184 * installation process */
185 #endif
186
187 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
188
189 /* prototypes */
190 static void KernelWakeupFunc(struct buf *);
191 static void InitBP(struct buf *, struct vnode *, unsigned,
192 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
193 void *, int, struct proc *);
194 struct raid_softc;
195 static void raidinit(struct raid_softc *);
196 static int raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp);
197 static int rf_get_component_caches(RF_Raid_t *raidPtr, int *);
198
199 static int raid_match(device_t, cfdata_t, void *);
200 static void raid_attach(device_t, device_t, void *);
201 static int raid_detach(device_t, int);
202
203 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
204 daddr_t, daddr_t);
205 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
206 daddr_t, daddr_t, int);
207
208 static int raidwrite_component_label(unsigned,
209 dev_t, struct vnode *, RF_ComponentLabel_t *);
210 static int raidread_component_label(unsigned,
211 dev_t, struct vnode *, RF_ComponentLabel_t *);
212
213 static int raid_diskstart(device_t, struct buf *bp);
214 static int raid_dumpblocks(device_t, void *, daddr_t, int);
215 static int raid_lastclose(device_t);
216
217 static dev_type_open(raidopen);
218 static dev_type_close(raidclose);
219 static dev_type_read(raidread);
220 static dev_type_write(raidwrite);
221 static dev_type_ioctl(raidioctl);
222 static dev_type_strategy(raidstrategy);
223 static dev_type_dump(raiddump);
224 static dev_type_size(raidsize);
225
226 const struct bdevsw raid_bdevsw = {
227 .d_open = raidopen,
228 .d_close = raidclose,
229 .d_strategy = raidstrategy,
230 .d_ioctl = raidioctl,
231 .d_dump = raiddump,
232 .d_psize = raidsize,
233 .d_discard = nodiscard,
234 .d_flag = D_DISK
235 };
236
237 const struct cdevsw raid_cdevsw = {
238 .d_open = raidopen,
239 .d_close = raidclose,
240 .d_read = raidread,
241 .d_write = raidwrite,
242 .d_ioctl = raidioctl,
243 .d_stop = nostop,
244 .d_tty = notty,
245 .d_poll = nopoll,
246 .d_mmap = nommap,
247 .d_kqfilter = nokqfilter,
248 .d_discard = nodiscard,
249 .d_flag = D_DISK
250 };
251
252 static struct dkdriver rf_dkdriver = {
253 .d_open = raidopen,
254 .d_close = raidclose,
255 .d_strategy = raidstrategy,
256 .d_diskstart = raid_diskstart,
257 .d_dumpblocks = raid_dumpblocks,
258 .d_lastclose = raid_lastclose,
259 .d_minphys = minphys
260 };
261
262 struct raid_softc {
263 struct dk_softc sc_dksc;
264 int sc_unit;
265 int sc_flags; /* flags */
266 int sc_cflags; /* configuration flags */
267 kmutex_t sc_mutex; /* interlock mutex */
268 kcondvar_t sc_cv; /* and the condvar */
269 uint64_t sc_size; /* size of the raid device */
270 char sc_xname[20]; /* XXX external name */
271 RF_Raid_t sc_r;
272 LIST_ENTRY(raid_softc) sc_link;
273 };
274 /* sc_flags */
275 #define RAIDF_INITED 0x01 /* unit has been initialized */
276 #define RAIDF_SHUTDOWN 0x02 /* unit is being shutdown */
277 #define RAIDF_DETACH 0x04 /* detach after final close */
278 #define RAIDF_WANTED 0x08 /* someone waiting to obtain a lock */
279 #define RAIDF_LOCKED 0x10 /* unit is locked */
280 #define RAIDF_UNIT_CHANGED 0x20 /* unit is being changed */
281
282 #define raidunit(x) DISKUNIT(x)
283 #define raidsoftc(dev) (((struct raid_softc *)device_private(dev))->sc_r.softc)
284
285 extern struct cfdriver raid_cd;
286 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
287 raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
288 DVF_DETACH_SHUTDOWN);
289
290 /* Internal representation of a rf_recon_req */
291 struct rf_recon_req_internal {
292 RF_RowCol_t col;
293 RF_ReconReqFlags_t flags;
294 void *raidPtr;
295 };
296
297 /*
298 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
299 * Be aware that large numbers can allow the driver to consume a lot of
300 * kernel memory, especially on writes, and in degraded mode reads.
301 *
302 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
303 * a single 64K write will typically require 64K for the old data,
304 * 64K for the old parity, and 64K for the new parity, for a total
305 * of 192K (if the parity buffer is not re-used immediately).
306 * Even it if is used immediately, that's still 128K, which when multiplied
307 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
308 *
309 * Now in degraded mode, for example, a 64K read on the above setup may
310 * require data reconstruction, which will require *all* of the 4 remaining
311 * disks to participate -- 4 * 32K/disk == 128K again.
312 */
313
314 #ifndef RAIDOUTSTANDING
315 #define RAIDOUTSTANDING 6
316 #endif
317
318 #define RAIDLABELDEV(dev) \
319 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
320
321 /* declared here, and made public, for the benefit of KVM stuff.. */
322
323 static int raidlock(struct raid_softc *);
324 static void raidunlock(struct raid_softc *);
325
326 static int raid_detach_unlocked(struct raid_softc *);
327
328 static void rf_markalldirty(RF_Raid_t *);
329 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
330
331 void rf_ReconThread(struct rf_recon_req_internal *);
332 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
333 void rf_CopybackThread(RF_Raid_t *raidPtr);
334 void rf_ReconstructInPlaceThread(struct rf_recon_req_internal *);
335 int rf_autoconfig(device_t);
336 void rf_buildroothack(RF_ConfigSet_t *);
337
338 RF_AutoConfig_t *rf_find_raid_components(void);
339 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
340 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
341 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
342 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
343 int rf_set_autoconfig(RF_Raid_t *, int);
344 int rf_set_rootpartition(RF_Raid_t *, int);
345 void rf_release_all_vps(RF_ConfigSet_t *);
346 void rf_cleanup_config_set(RF_ConfigSet_t *);
347 int rf_have_enough_components(RF_ConfigSet_t *);
348 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
349 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
350
351 /*
352 * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
353 * Note that this is overridden by having RAID_AUTOCONFIG as an option
354 * in the kernel config file.
355 */
356 #ifdef RAID_AUTOCONFIG
357 int raidautoconfig = 1;
358 #else
359 int raidautoconfig = 0;
360 #endif
361 static bool raidautoconfigdone = false;
362
363 struct RF_Pools_s rf_pools;
364
365 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
366 static kmutex_t raid_lock;
367
368 static struct raid_softc *
369 raidcreate(int unit) {
370 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
371 sc->sc_unit = unit;
372 cv_init(&sc->sc_cv, "raidunit");
373 mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_NONE);
374 return sc;
375 }
376
377 static void
378 raiddestroy(struct raid_softc *sc) {
379 cv_destroy(&sc->sc_cv);
380 mutex_destroy(&sc->sc_mutex);
381 kmem_free(sc, sizeof(*sc));
382 }
383
384 static struct raid_softc *
385 raidget(int unit, bool create) {
386 struct raid_softc *sc;
387 if (unit < 0) {
388 #ifdef DIAGNOSTIC
389 panic("%s: unit %d!", __func__, unit);
390 #endif
391 return NULL;
392 }
393 mutex_enter(&raid_lock);
394 LIST_FOREACH(sc, &raids, sc_link) {
395 if (sc->sc_unit == unit) {
396 mutex_exit(&raid_lock);
397 return sc;
398 }
399 }
400 mutex_exit(&raid_lock);
401 if (!create)
402 return NULL;
403 if ((sc = raidcreate(unit)) == NULL)
404 return NULL;
405 mutex_enter(&raid_lock);
406 LIST_INSERT_HEAD(&raids, sc, sc_link);
407 mutex_exit(&raid_lock);
408 return sc;
409 }
410
411 static void
412 raidput(struct raid_softc *sc) {
413 mutex_enter(&raid_lock);
414 LIST_REMOVE(sc, sc_link);
415 mutex_exit(&raid_lock);
416 raiddestroy(sc);
417 }
418
419 void
420 raidattach(int num)
421 {
422
423 /*
424 * Device attachment and associated initialization now occurs
425 * as part of the module initialization.
426 */
427 }
428
429 int
430 rf_autoconfig(device_t self)
431 {
432 RF_AutoConfig_t *ac_list;
433 RF_ConfigSet_t *config_sets;
434
435 if (!raidautoconfig || raidautoconfigdone == true)
436 return (0);
437
438 /* XXX This code can only be run once. */
439 raidautoconfigdone = true;
440
441 #ifdef __HAVE_CPU_BOOTCONF
442 /*
443 * 0. find the boot device if needed first so we can use it later
444 * this needs to be done before we autoconfigure any raid sets,
445 * because if we use wedges we are not going to be able to open
446 * the boot device later
447 */
448 if (booted_device == NULL)
449 cpu_bootconf();
450 #endif
451 /* 1. locate all RAID components on the system */
452 aprint_debug("Searching for RAID components...\n");
453 ac_list = rf_find_raid_components();
454
455 /* 2. Sort them into their respective sets. */
456 config_sets = rf_create_auto_sets(ac_list);
457
458 /*
459 * 3. Evaluate each set and configure the valid ones.
460 * This gets done in rf_buildroothack().
461 */
462 rf_buildroothack(config_sets);
463
464 return 1;
465 }
466
467 static int
468 rf_containsboot(RF_Raid_t *r, device_t bdv) {
469 const char *bootname;
470 size_t len;
471
472 /* if bdv is NULL, the set can't contain it. exit early. */
473 if (bdv == NULL)
474 return 0;
475
476 bootname = device_xname(bdv);
477 len = strlen(bootname);
478
479 for (int col = 0; col < r->numCol; col++) {
480 const char *devname = r->Disks[col].devname;
481 devname += sizeof("/dev/") - 1;
482 if (strncmp(devname, "dk", 2) == 0) {
483 const char *parent =
484 dkwedge_get_parent_name(r->Disks[col].dev);
485 if (parent != NULL)
486 devname = parent;
487 }
488 if (strncmp(devname, bootname, len) == 0) {
489 struct raid_softc *sc = r->softc;
490 aprint_debug("raid%d includes boot device %s\n",
491 sc->sc_unit, devname);
492 return 1;
493 }
494 }
495 return 0;
496 }
497
498 void
499 rf_buildroothack(RF_ConfigSet_t *config_sets)
500 {
501 RF_ConfigSet_t *cset;
502 RF_ConfigSet_t *next_cset;
503 int num_root;
504 struct raid_softc *sc, *rsc;
505 struct dk_softc *dksc;
506
507 sc = rsc = NULL;
508 num_root = 0;
509 cset = config_sets;
510 while (cset != NULL) {
511 next_cset = cset->next;
512 if (rf_have_enough_components(cset) &&
513 cset->ac->clabel->autoconfigure == 1) {
514 sc = rf_auto_config_set(cset);
515 if (sc != NULL) {
516 aprint_debug("raid%d: configured ok, rootable %d\n",
517 sc->sc_unit, cset->rootable);
518 if (cset->rootable) {
519 rsc = sc;
520 num_root++;
521 }
522 } else {
523 /* The autoconfig didn't work :( */
524 aprint_debug("Autoconfig failed\n");
525 rf_release_all_vps(cset);
526 }
527 } else {
528 /* we're not autoconfiguring this set...
529 release the associated resources */
530 rf_release_all_vps(cset);
531 }
532 /* cleanup */
533 rf_cleanup_config_set(cset);
534 cset = next_cset;
535 }
536 dksc = &rsc->sc_dksc;
537
538 /* if the user has specified what the root device should be
539 then we don't touch booted_device or boothowto... */
540
541 if (rootspec != NULL) {
542 DPRINTF("%s: rootspec %s\n", __func__, rootspec);
543 return;
544 }
545
546 /* we found something bootable... */
547
548 /*
549 * XXX: The following code assumes that the root raid
550 * is the first ('a') partition. This is about the best
551 * we can do with a BSD disklabel, but we might be able
552 * to do better with a GPT label, by setting a specified
553 * attribute to indicate the root partition. We can then
554 * stash the partition number in the r->root_partition
555 * high bits (the bottom 2 bits are already used). For
556 * now we just set booted_partition to 0 when we override
557 * root.
558 */
559 if (num_root == 1) {
560 device_t candidate_root;
561 if (dksc->sc_dkdev.dk_nwedges != 0) {
562 char cname[sizeof(cset->ac->devname)];
563 /* XXX: assume partition 'a' first */
564 snprintf(cname, sizeof(cname), "%s%c",
565 device_xname(dksc->sc_dev), 'a');
566 candidate_root = dkwedge_find_by_wname(cname);
567 DPRINTF("%s: candidate wedge root=%s\n", __func__,
568 cname);
569 if (candidate_root == NULL) {
570 /*
571 * If that is not found, because we don't use
572 * disklabel, return the first dk child
573 * XXX: we can skip the 'a' check above
574 * and always do this...
575 */
576 size_t i = 0;
577 candidate_root = dkwedge_find_by_parent(
578 device_xname(dksc->sc_dev), &i);
579 }
580 DPRINTF("%s: candidate wedge root=%p\n", __func__,
581 candidate_root);
582 } else
583 candidate_root = dksc->sc_dev;
584 DPRINTF("%s: candidate root=%p\n", __func__, candidate_root);
585 DPRINTF("%s: booted_device=%p root_partition=%d "
586 "contains_boot=%d",
587 __func__, booted_device, rsc->sc_r.root_partition,
588 rf_containsboot(&rsc->sc_r, booted_device));
589 /* XXX the check for booted_device == NULL can probably be
590 * dropped, now that rf_containsboot handles that case.
591 */
592 if (booted_device == NULL ||
593 rsc->sc_r.root_partition == 1 ||
594 rf_containsboot(&rsc->sc_r, booted_device)) {
595 booted_device = candidate_root;
596 booted_method = "raidframe/single";
597 booted_partition = 0; /* XXX assume 'a' */
598 }
599 } else if (num_root > 1) {
600 DPRINTF("%s: many roots=%d, %p\n", __func__, num_root,
601 booted_device);
602
603 /*
604 * Maybe the MD code can help. If it cannot, then
605 * setroot() will discover that we have no
606 * booted_device and will ask the user if nothing was
607 * hardwired in the kernel config file
608 */
609 if (booted_device == NULL)
610 return;
611
612 num_root = 0;
613 mutex_enter(&raid_lock);
614 LIST_FOREACH(sc, &raids, sc_link) {
615 RF_Raid_t *r = &sc->sc_r;
616 if (r->valid == 0)
617 continue;
618
619 if (r->root_partition == 0)
620 continue;
621
622 if (rf_containsboot(r, booted_device)) {
623 num_root++;
624 rsc = sc;
625 dksc = &rsc->sc_dksc;
626 }
627 }
628 mutex_exit(&raid_lock);
629
630 if (num_root == 1) {
631 booted_device = dksc->sc_dev;
632 booted_method = "raidframe/multi";
633 booted_partition = 0; /* XXX assume 'a' */
634 } else {
635 /* we can't guess.. require the user to answer... */
636 boothowto |= RB_ASKNAME;
637 }
638 }
639 }
640
641 static int
642 raidsize(dev_t dev)
643 {
644 struct raid_softc *rs;
645 struct dk_softc *dksc;
646 unsigned int unit;
647
648 unit = raidunit(dev);
649 if ((rs = raidget(unit, false)) == NULL)
650 return -1;
651 dksc = &rs->sc_dksc;
652
653 if ((rs->sc_flags & RAIDF_INITED) == 0)
654 return -1;
655
656 return dk_size(dksc, dev);
657 }
658
659 static int
660 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
661 {
662 unsigned int unit;
663 struct raid_softc *rs;
664 struct dk_softc *dksc;
665
666 unit = raidunit(dev);
667 if ((rs = raidget(unit, false)) == NULL)
668 return ENXIO;
669 dksc = &rs->sc_dksc;
670
671 if ((rs->sc_flags & RAIDF_INITED) == 0)
672 return ENODEV;
673
674 /*
675 Note that blkno is relative to this particular partition.
676 By adding adding RF_PROTECTED_SECTORS, we get a value that
677 is relative to the partition used for the underlying component.
678 */
679 blkno += RF_PROTECTED_SECTORS;
680
681 return dk_dump(dksc, dev, blkno, va, size);
682 }
683
684 static int
685 raid_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk)
686 {
687 struct raid_softc *rs = raidsoftc(dev);
688 const struct bdevsw *bdev;
689 RF_Raid_t *raidPtr;
690 int c, sparecol, j, scol, dumpto;
691 int error = 0;
692
693 raidPtr = &rs->sc_r;
694
695 /* we only support dumping to RAID 1 sets */
696 if (raidPtr->Layout.numDataCol != 1 ||
697 raidPtr->Layout.numParityCol != 1)
698 return EINVAL;
699
700 if ((error = raidlock(rs)) != 0)
701 return error;
702
703 /* figure out what device is alive.. */
704
705 /*
706 Look for a component to dump to. The preference for the
707 component to dump to is as follows:
708 1) the master
709 2) a used_spare of the master
710 3) the slave
711 4) a used_spare of the slave
712 */
713
714 dumpto = -1;
715 for (c = 0; c < raidPtr->numCol; c++) {
716 if (raidPtr->Disks[c].status == rf_ds_optimal) {
717 /* this might be the one */
718 dumpto = c;
719 break;
720 }
721 }
722
723 /*
724 At this point we have possibly selected a live master or a
725 live slave. We now check to see if there is a spared
726 master (or a spared slave), if we didn't find a live master
727 or a live slave.
728 */
729
730 for (c = 0; c < raidPtr->numSpare; c++) {
731 sparecol = raidPtr->numCol + c;
732 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
733 /* How about this one? */
734 scol = -1;
735 for(j=0;j<raidPtr->numCol;j++) {
736 if (raidPtr->Disks[j].spareCol == sparecol) {
737 scol = j;
738 break;
739 }
740 }
741 if (scol == 0) {
742 /*
743 We must have found a spared master!
744 We'll take that over anything else
745 found so far. (We couldn't have
746 found a real master before, since
747 this is a used spare, and it's
748 saying that it's replacing the
749 master.) On reboot (with
750 autoconfiguration turned on)
751 sparecol will become the 1st
752 component (component0) of this set.
753 */
754 dumpto = sparecol;
755 break;
756 } else if (scol != -1) {
757 /*
758 Must be a spared slave. We'll dump
759 to that if we havn't found anything
760 else so far.
761 */
762 if (dumpto == -1)
763 dumpto = sparecol;
764 }
765 }
766 }
767
768 if (dumpto == -1) {
769 /* we couldn't find any live components to dump to!?!?
770 */
771 error = EINVAL;
772 goto out;
773 }
774
775 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
776 if (bdev == NULL) {
777 error = ENXIO;
778 goto out;
779 }
780
781 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
782 blkno, va, nblk * raidPtr->bytesPerSector);
783
784 out:
785 raidunlock(rs);
786
787 return error;
788 }
789
790 /* ARGSUSED */
791 static int
792 raidopen(dev_t dev, int flags, int fmt,
793 struct lwp *l)
794 {
795 int unit = raidunit(dev);
796 struct raid_softc *rs;
797 struct dk_softc *dksc;
798 int error = 0;
799 int part, pmask;
800
801 if ((rs = raidget(unit, true)) == NULL)
802 return ENXIO;
803 if ((error = raidlock(rs)) != 0)
804 return (error);
805
806 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
807 error = EBUSY;
808 goto bad;
809 }
810
811 dksc = &rs->sc_dksc;
812
813 part = DISKPART(dev);
814 pmask = (1 << part);
815
816 if (!DK_BUSY(dksc, pmask) &&
817 ((rs->sc_flags & RAIDF_INITED) != 0)) {
818 /* First one... mark things as dirty... Note that we *MUST*
819 have done a configure before this. I DO NOT WANT TO BE
820 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
821 THAT THEY BELONG TOGETHER!!!!! */
822 /* XXX should check to see if we're only open for reading
823 here... If so, we needn't do this, but then need some
824 other way of keeping track of what's happened.. */
825
826 rf_markalldirty(&rs->sc_r);
827 }
828
829 if ((rs->sc_flags & RAIDF_INITED) != 0)
830 error = dk_open(dksc, dev, flags, fmt, l);
831
832 bad:
833 raidunlock(rs);
834
835 return (error);
836
837
838 }
839
840 static int
841 raid_lastclose(device_t self)
842 {
843 struct raid_softc *rs = raidsoftc(self);
844
845 /* Last one... device is not unconfigured yet.
846 Device shutdown has taken care of setting the
847 clean bits if RAIDF_INITED is not set
848 mark things as clean... */
849
850 rf_update_component_labels(&rs->sc_r,
851 RF_FINAL_COMPONENT_UPDATE);
852
853 /* pass to unlocked code */
854 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
855 rs->sc_flags |= RAIDF_DETACH;
856
857 return 0;
858 }
859
860 /* ARGSUSED */
861 static int
862 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
863 {
864 int unit = raidunit(dev);
865 struct raid_softc *rs;
866 struct dk_softc *dksc;
867 cfdata_t cf;
868 int error = 0, do_detach = 0, do_put = 0;
869
870 if ((rs = raidget(unit, false)) == NULL)
871 return ENXIO;
872 dksc = &rs->sc_dksc;
873
874 if ((error = raidlock(rs)) != 0)
875 return (error);
876
877 if ((rs->sc_flags & RAIDF_INITED) != 0) {
878 error = dk_close(dksc, dev, flags, fmt, l);
879 if ((rs->sc_flags & RAIDF_DETACH) != 0)
880 do_detach = 1;
881 } else if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
882 do_put = 1;
883
884 raidunlock(rs);
885
886 if (do_detach) {
887 /* free the pseudo device attach bits */
888 cf = device_cfdata(dksc->sc_dev);
889 error = config_detach(dksc->sc_dev, 0);
890 if (error == 0)
891 free(cf, M_RAIDFRAME);
892 } else if (do_put) {
893 raidput(rs);
894 }
895
896 return (error);
897
898 }
899
900 static void
901 raid_wakeup(RF_Raid_t *raidPtr)
902 {
903 rf_lock_mutex2(raidPtr->iodone_lock);
904 rf_signal_cond2(raidPtr->iodone_cv);
905 rf_unlock_mutex2(raidPtr->iodone_lock);
906 }
907
908 static void
909 raidstrategy(struct buf *bp)
910 {
911 unsigned int unit;
912 struct raid_softc *rs;
913 struct dk_softc *dksc;
914 RF_Raid_t *raidPtr;
915
916 unit = raidunit(bp->b_dev);
917 if ((rs = raidget(unit, false)) == NULL) {
918 bp->b_error = ENXIO;
919 goto fail;
920 }
921 if ((rs->sc_flags & RAIDF_INITED) == 0) {
922 bp->b_error = ENXIO;
923 goto fail;
924 }
925 dksc = &rs->sc_dksc;
926 raidPtr = &rs->sc_r;
927
928 /* Queue IO only */
929 if (dk_strategy_defer(dksc, bp))
930 goto done;
931
932 /* schedule the IO to happen at the next convenient time */
933 raid_wakeup(raidPtr);
934
935 done:
936 return;
937
938 fail:
939 bp->b_resid = bp->b_bcount;
940 biodone(bp);
941 }
942
943 static int
944 raid_diskstart(device_t dev, struct buf *bp)
945 {
946 struct raid_softc *rs = raidsoftc(dev);
947 RF_Raid_t *raidPtr;
948
949 raidPtr = &rs->sc_r;
950 if (!raidPtr->valid) {
951 db1_printf(("raid is not valid..\n"));
952 return ENODEV;
953 }
954
955 /* XXX */
956 bp->b_resid = 0;
957
958 return raiddoaccess(raidPtr, bp);
959 }
960
961 void
962 raiddone(RF_Raid_t *raidPtr, struct buf *bp)
963 {
964 struct raid_softc *rs;
965 struct dk_softc *dksc;
966
967 rs = raidPtr->softc;
968 dksc = &rs->sc_dksc;
969
970 dk_done(dksc, bp);
971
972 rf_lock_mutex2(raidPtr->mutex);
973 raidPtr->openings++;
974 rf_unlock_mutex2(raidPtr->mutex);
975
976 /* schedule more IO */
977 raid_wakeup(raidPtr);
978 }
979
980 /* ARGSUSED */
981 static int
982 raidread(dev_t dev, struct uio *uio, int flags)
983 {
984 int unit = raidunit(dev);
985 struct raid_softc *rs;
986
987 if ((rs = raidget(unit, false)) == NULL)
988 return ENXIO;
989
990 if ((rs->sc_flags & RAIDF_INITED) == 0)
991 return (ENXIO);
992
993 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
994
995 }
996
997 /* ARGSUSED */
998 static int
999 raidwrite(dev_t dev, struct uio *uio, int flags)
1000 {
1001 int unit = raidunit(dev);
1002 struct raid_softc *rs;
1003
1004 if ((rs = raidget(unit, false)) == NULL)
1005 return ENXIO;
1006
1007 if ((rs->sc_flags & RAIDF_INITED) == 0)
1008 return (ENXIO);
1009
1010 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
1011
1012 }
1013
1014 static int
1015 raid_detach_unlocked(struct raid_softc *rs)
1016 {
1017 struct dk_softc *dksc = &rs->sc_dksc;
1018 RF_Raid_t *raidPtr;
1019 int error;
1020
1021 raidPtr = &rs->sc_r;
1022
1023 if (DK_BUSY(dksc, 0) ||
1024 raidPtr->recon_in_progress != 0 ||
1025 raidPtr->parity_rewrite_in_progress != 0 ||
1026 raidPtr->copyback_in_progress != 0)
1027 return EBUSY;
1028
1029 if ((rs->sc_flags & RAIDF_INITED) == 0)
1030 return 0;
1031
1032 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1033
1034 if ((error = rf_Shutdown(raidPtr)) != 0)
1035 return error;
1036
1037 rs->sc_flags &= ~RAIDF_INITED;
1038
1039 /* Kill off any queued buffers */
1040 dk_drain(dksc);
1041 bufq_free(dksc->sc_bufq);
1042
1043 /* Detach the disk. */
1044 dkwedge_delall(&dksc->sc_dkdev);
1045 disk_detach(&dksc->sc_dkdev);
1046 disk_destroy(&dksc->sc_dkdev);
1047 dk_detach(dksc);
1048
1049 return 0;
1050 }
1051
1052 static int
1053 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1054 {
1055 int unit = raidunit(dev);
1056 int error = 0;
1057 int part, pmask;
1058 struct raid_softc *rs;
1059 struct dk_softc *dksc;
1060 RF_Config_t *k_cfg, *u_cfg;
1061 RF_Raid_t *raidPtr;
1062 RF_RaidDisk_t *diskPtr;
1063 RF_AccTotals_t *totals;
1064 RF_DeviceConfig_t *d_cfg, *ucfgp;
1065 u_char *specific_buf;
1066 int retcode = 0;
1067 int column;
1068 /* int raidid; */
1069 struct rf_recon_req *rr;
1070 struct rf_recon_req_internal *rrint;
1071 RF_ComponentLabel_t *clabel;
1072 RF_ComponentLabel_t *ci_label;
1073 RF_SingleComponent_t *sparePtr,*componentPtr;
1074 RF_SingleComponent_t component;
1075 int d;
1076
1077 if ((rs = raidget(unit, false)) == NULL)
1078 return ENXIO;
1079 dksc = &rs->sc_dksc;
1080 raidPtr = &rs->sc_r;
1081
1082 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1083 (int) DISKPART(dev), (int) unit, cmd));
1084
1085 /* Must be initialized for these... */
1086 switch (cmd) {
1087 case RAIDFRAME_REWRITEPARITY:
1088 case RAIDFRAME_GET_INFO:
1089 case RAIDFRAME_RESET_ACCTOTALS:
1090 case RAIDFRAME_GET_ACCTOTALS:
1091 case RAIDFRAME_KEEP_ACCTOTALS:
1092 case RAIDFRAME_GET_SIZE:
1093 case RAIDFRAME_FAIL_DISK:
1094 case RAIDFRAME_COPYBACK:
1095 case RAIDFRAME_CHECK_RECON_STATUS:
1096 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1097 case RAIDFRAME_GET_COMPONENT_LABEL:
1098 case RAIDFRAME_SET_COMPONENT_LABEL:
1099 case RAIDFRAME_ADD_HOT_SPARE:
1100 case RAIDFRAME_REMOVE_HOT_SPARE:
1101 case RAIDFRAME_INIT_LABELS:
1102 case RAIDFRAME_REBUILD_IN_PLACE:
1103 case RAIDFRAME_CHECK_PARITY:
1104 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1105 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1106 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1107 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1108 case RAIDFRAME_SET_AUTOCONFIG:
1109 case RAIDFRAME_SET_ROOT:
1110 case RAIDFRAME_DELETE_COMPONENT:
1111 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1112 case RAIDFRAME_PARITYMAP_STATUS:
1113 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1114 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1115 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1116 #ifdef COMPAT_NETBSD32
1117 #ifdef _LP64
1118 case RAIDFRAME_GET_INFO32:
1119 #endif
1120 #endif
1121 if ((rs->sc_flags & RAIDF_INITED) == 0)
1122 return (ENXIO);
1123 }
1124
1125 /*
1126 * Handle compat ioctl calls
1127 *
1128 * * If compat code is not loaded, stub returns ENOSYS and we just
1129 * check the "native" cmd's
1130 * * If compat code is loaded but does not recognize the cmd, it
1131 * returns EPASSTHROUGH, and we just check the "native" cmd's
1132 * * If compat code returns EAGAIN, we need to finish via config
1133 * * Otherwise the cmd has been handled and we just return
1134 */
1135 MODULE_CALL_HOOK(raidframe_ioctl_50_hook,
1136 (cmd, (rs->sc_flags & RAIDF_INITED),raidPtr, unit, data, &k_cfg),
1137 enosys(), retcode);
1138 if (retcode == ENOSYS)
1139 retcode = 0;
1140 else if (retcode == EAGAIN)
1141 goto config;
1142 else if (retcode != EPASSTHROUGH)
1143 return retcode;
1144
1145 MODULE_CALL_HOOK(raidframe_ioctl_80_hook,
1146 (cmd, (rs->sc_flags & RAIDF_INITED),raidPtr, unit, data, &k_cfg),
1147 enosys(), retcode);
1148 if (retcode == ENOSYS)
1149 retcode = 0;
1150 else if (retcode == EAGAIN)
1151 goto config;
1152 else if (retcode != EPASSTHROUGH)
1153 return retcode;
1154
1155 /*
1156 * XXX
1157 * Handling of FAIL_DISK80 command requires us to retain retcode's
1158 * value of EPASSTHROUGH. If you add more compat code later, make
1159 * sure you don't overwrite retcode and break this!
1160 */
1161
1162 switch (cmd) {
1163
1164 /* configure the system */
1165 case RAIDFRAME_CONFIGURE:
1166 #ifdef COMPAT_NETBSD32
1167 #ifdef _LP64
1168 case RAIDFRAME_CONFIGURE32:
1169 #endif
1170 #endif
1171
1172 if (raidPtr->valid) {
1173 /* There is a valid RAID set running on this unit! */
1174 printf("raid%d: Device already configured!\n",unit);
1175 return(EINVAL);
1176 }
1177
1178 /* copy-in the configuration information */
1179 /* data points to a pointer to the configuration structure */
1180
1181 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1182 if (k_cfg == NULL) {
1183 return (ENOMEM);
1184 }
1185 #ifdef COMPAT_NETBSD32
1186 #ifdef _LP64
1187 if (cmd == RAIDFRAME_CONFIGURE32 &&
1188 (l->l_proc->p_flag & PK_32) != 0)
1189 retcode = rf_config_netbsd32(data, k_cfg);
1190 else
1191 #endif
1192 #endif
1193 {
1194 u_cfg = *((RF_Config_t **) data);
1195 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1196 }
1197 if (retcode) {
1198 RF_Free(k_cfg, sizeof(RF_Config_t));
1199 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1200 retcode));
1201 goto no_config;
1202 }
1203 goto config;
1204 config:
1205 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1206
1207 /* allocate a buffer for the layout-specific data, and copy it
1208 * in */
1209 if (k_cfg->layoutSpecificSize) {
1210 if (k_cfg->layoutSpecificSize > 10000) {
1211 /* sanity check */
1212 RF_Free(k_cfg, sizeof(RF_Config_t));
1213 retcode = EINVAL;
1214 goto no_config;
1215 }
1216 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1217 (u_char *));
1218 if (specific_buf == NULL) {
1219 RF_Free(k_cfg, sizeof(RF_Config_t));
1220 retcode = ENOMEM;
1221 goto no_config;
1222 }
1223 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1224 k_cfg->layoutSpecificSize);
1225 if (retcode) {
1226 RF_Free(k_cfg, sizeof(RF_Config_t));
1227 RF_Free(specific_buf,
1228 k_cfg->layoutSpecificSize);
1229 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1230 retcode));
1231 goto no_config;
1232 }
1233 } else
1234 specific_buf = NULL;
1235 k_cfg->layoutSpecific = specific_buf;
1236
1237 /* should do some kind of sanity check on the configuration.
1238 * Store the sum of all the bytes in the last byte? */
1239
1240 /* configure the system */
1241
1242 /*
1243 * Clear the entire RAID descriptor, just to make sure
1244 * there is no stale data left in the case of a
1245 * reconfiguration
1246 */
1247 memset(raidPtr, 0, sizeof(*raidPtr));
1248 raidPtr->softc = rs;
1249 raidPtr->raidid = unit;
1250
1251 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1252
1253 if (retcode == 0) {
1254
1255 /* allow this many simultaneous IO's to
1256 this RAID device */
1257 raidPtr->openings = RAIDOUTSTANDING;
1258
1259 raidinit(rs);
1260 raid_wakeup(raidPtr);
1261 rf_markalldirty(raidPtr);
1262 }
1263 /* free the buffers. No return code here. */
1264 if (k_cfg->layoutSpecificSize) {
1265 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1266 }
1267 RF_Free(k_cfg, sizeof(RF_Config_t));
1268
1269 no_config:
1270 /*
1271 * If configuration failed, set sc_flags so that we
1272 * will detach the device when we close it.
1273 */
1274 if (retcode != 0)
1275 rs->sc_flags |= RAIDF_SHUTDOWN;
1276 return (retcode);
1277
1278 /* shutdown the system */
1279 case RAIDFRAME_SHUTDOWN:
1280
1281 part = DISKPART(dev);
1282 pmask = (1 << part);
1283
1284 if ((error = raidlock(rs)) != 0)
1285 return (error);
1286
1287 if (DK_BUSY(dksc, pmask) ||
1288 raidPtr->recon_in_progress != 0 ||
1289 raidPtr->parity_rewrite_in_progress != 0 ||
1290 raidPtr->copyback_in_progress != 0)
1291 retcode = EBUSY;
1292 else {
1293 /* detach and free on close */
1294 rs->sc_flags |= RAIDF_SHUTDOWN;
1295 retcode = 0;
1296 }
1297
1298 raidunlock(rs);
1299
1300 return (retcode);
1301 case RAIDFRAME_GET_COMPONENT_LABEL:
1302 return rf_get_component_label(raidPtr, data);
1303
1304 #if 0
1305 case RAIDFRAME_SET_COMPONENT_LABEL:
1306 clabel = (RF_ComponentLabel_t *) data;
1307
1308 /* XXX check the label for valid stuff... */
1309 /* Note that some things *should not* get modified --
1310 the user should be re-initing the labels instead of
1311 trying to patch things.
1312 */
1313
1314 raidid = raidPtr->raidid;
1315 #ifdef DEBUG
1316 printf("raid%d: Got component label:\n", raidid);
1317 printf("raid%d: Version: %d\n", raidid, clabel->version);
1318 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1319 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1320 printf("raid%d: Column: %d\n", raidid, clabel->column);
1321 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1322 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1323 printf("raid%d: Status: %d\n", raidid, clabel->status);
1324 #endif
1325 clabel->row = 0;
1326 column = clabel->column;
1327
1328 if ((column < 0) || (column >= raidPtr->numCol)) {
1329 return(EINVAL);
1330 }
1331
1332 /* XXX this isn't allowed to do anything for now :-) */
1333
1334 /* XXX and before it is, we need to fill in the rest
1335 of the fields!?!?!?! */
1336 memcpy(raidget_component_label(raidPtr, column),
1337 clabel, sizeof(*clabel));
1338 raidflush_component_label(raidPtr, column);
1339 return (0);
1340 #endif
1341
1342 case RAIDFRAME_INIT_LABELS:
1343 clabel = (RF_ComponentLabel_t *) data;
1344 /*
1345 we only want the serial number from
1346 the above. We get all the rest of the information
1347 from the config that was used to create this RAID
1348 set.
1349 */
1350
1351 raidPtr->serial_number = clabel->serial_number;
1352
1353 for(column=0;column<raidPtr->numCol;column++) {
1354 diskPtr = &raidPtr->Disks[column];
1355 if (!RF_DEAD_DISK(diskPtr->status)) {
1356 ci_label = raidget_component_label(raidPtr,
1357 column);
1358 /* Zeroing this is important. */
1359 memset(ci_label, 0, sizeof(*ci_label));
1360 raid_init_component_label(raidPtr, ci_label);
1361 ci_label->serial_number =
1362 raidPtr->serial_number;
1363 ci_label->row = 0; /* we dont' pretend to support more */
1364 rf_component_label_set_partitionsize(ci_label,
1365 diskPtr->partitionSize);
1366 ci_label->column = column;
1367 raidflush_component_label(raidPtr, column);
1368 }
1369 /* XXXjld what about the spares? */
1370 }
1371
1372 return (retcode);
1373 case RAIDFRAME_SET_AUTOCONFIG:
1374 d = rf_set_autoconfig(raidPtr, *(int *) data);
1375 printf("raid%d: New autoconfig value is: %d\n",
1376 raidPtr->raidid, d);
1377 *(int *) data = d;
1378 return (retcode);
1379
1380 case RAIDFRAME_SET_ROOT:
1381 d = rf_set_rootpartition(raidPtr, *(int *) data);
1382 printf("raid%d: New rootpartition value is: %d\n",
1383 raidPtr->raidid, d);
1384 *(int *) data = d;
1385 return (retcode);
1386
1387 /* initialize all parity */
1388 case RAIDFRAME_REWRITEPARITY:
1389
1390 if (raidPtr->Layout.map->faultsTolerated == 0) {
1391 /* Parity for RAID 0 is trivially correct */
1392 raidPtr->parity_good = RF_RAID_CLEAN;
1393 return(0);
1394 }
1395
1396 if (raidPtr->parity_rewrite_in_progress == 1) {
1397 /* Re-write is already in progress! */
1398 return(EINVAL);
1399 }
1400
1401 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1402 rf_RewriteParityThread,
1403 raidPtr,"raid_parity");
1404 return (retcode);
1405
1406
1407 case RAIDFRAME_ADD_HOT_SPARE:
1408 sparePtr = (RF_SingleComponent_t *) data;
1409 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1410 retcode = rf_add_hot_spare(raidPtr, &component);
1411 return(retcode);
1412
1413 case RAIDFRAME_REMOVE_HOT_SPARE:
1414 return(retcode);
1415
1416 case RAIDFRAME_DELETE_COMPONENT:
1417 componentPtr = (RF_SingleComponent_t *)data;
1418 memcpy( &component, componentPtr,
1419 sizeof(RF_SingleComponent_t));
1420 retcode = rf_delete_component(raidPtr, &component);
1421 return(retcode);
1422
1423 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1424 componentPtr = (RF_SingleComponent_t *)data;
1425 memcpy( &component, componentPtr,
1426 sizeof(RF_SingleComponent_t));
1427 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1428 return(retcode);
1429
1430 case RAIDFRAME_REBUILD_IN_PLACE:
1431
1432 if (raidPtr->Layout.map->faultsTolerated == 0) {
1433 /* Can't do this on a RAID 0!! */
1434 return(EINVAL);
1435 }
1436
1437 if (raidPtr->recon_in_progress == 1) {
1438 /* a reconstruct is already in progress! */
1439 return(EINVAL);
1440 }
1441
1442 componentPtr = (RF_SingleComponent_t *) data;
1443 memcpy( &component, componentPtr,
1444 sizeof(RF_SingleComponent_t));
1445 component.row = 0; /* we don't support any more */
1446 column = component.column;
1447
1448 if ((column < 0) || (column >= raidPtr->numCol)) {
1449 return(EINVAL);
1450 }
1451
1452 rf_lock_mutex2(raidPtr->mutex);
1453 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1454 (raidPtr->numFailures > 0)) {
1455 /* XXX 0 above shouldn't be constant!!! */
1456 /* some component other than this has failed.
1457 Let's not make things worse than they already
1458 are... */
1459 printf("raid%d: Unable to reconstruct to disk at:\n",
1460 raidPtr->raidid);
1461 printf("raid%d: Col: %d Too many failures.\n",
1462 raidPtr->raidid, column);
1463 rf_unlock_mutex2(raidPtr->mutex);
1464 return (EINVAL);
1465 }
1466 if (raidPtr->Disks[column].status ==
1467 rf_ds_reconstructing) {
1468 printf("raid%d: Unable to reconstruct to disk at:\n",
1469 raidPtr->raidid);
1470 printf("raid%d: Col: %d Reconstruction already occurring!\n", raidPtr->raidid, column);
1471
1472 rf_unlock_mutex2(raidPtr->mutex);
1473 return (EINVAL);
1474 }
1475 if (raidPtr->Disks[column].status == rf_ds_spared) {
1476 rf_unlock_mutex2(raidPtr->mutex);
1477 return (EINVAL);
1478 }
1479 rf_unlock_mutex2(raidPtr->mutex);
1480
1481 RF_Malloc(rrint, sizeof(*rrint), (struct rf_recon_req_internal *));
1482 if (rrint == NULL)
1483 return(ENOMEM);
1484
1485 rrint->col = column;
1486 rrint->raidPtr = raidPtr;
1487
1488 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1489 rf_ReconstructInPlaceThread,
1490 rrint, "raid_reconip");
1491 return(retcode);
1492
1493 case RAIDFRAME_GET_INFO:
1494 #ifdef COMPAT_NETBSD32
1495 #ifdef _LP64
1496 case RAIDFRAME_GET_INFO32:
1497 #endif
1498 #endif
1499 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1500 (RF_DeviceConfig_t *));
1501 if (d_cfg == NULL)
1502 return (ENOMEM);
1503 retcode = rf_get_info(raidPtr, d_cfg);
1504 if (retcode == 0) {
1505 #ifdef COMPAT_NETBSD32
1506 #ifdef _LP64
1507 if (cmd == RAIDFRAME_GET_INFO32)
1508 ucfgp = NETBSD32PTR64(*(netbsd32_pointer_t *)data);
1509 else
1510 #endif
1511 #endif
1512 ucfgp = *(RF_DeviceConfig_t **)data;
1513 retcode = copyout(d_cfg, ucfgp, sizeof(RF_DeviceConfig_t));
1514 }
1515 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1516
1517 return (retcode);
1518
1519 case RAIDFRAME_CHECK_PARITY:
1520 *(int *) data = raidPtr->parity_good;
1521 return (0);
1522
1523 case RAIDFRAME_PARITYMAP_STATUS:
1524 if (rf_paritymap_ineligible(raidPtr))
1525 return EINVAL;
1526 rf_paritymap_status(raidPtr->parity_map,
1527 (struct rf_pmstat *)data);
1528 return 0;
1529
1530 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1531 if (rf_paritymap_ineligible(raidPtr))
1532 return EINVAL;
1533 if (raidPtr->parity_map == NULL)
1534 return ENOENT; /* ??? */
1535 if (0 != rf_paritymap_set_params(raidPtr->parity_map,
1536 (struct rf_pmparams *)data, 1))
1537 return EINVAL;
1538 return 0;
1539
1540 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1541 if (rf_paritymap_ineligible(raidPtr))
1542 return EINVAL;
1543 *(int *) data = rf_paritymap_get_disable(raidPtr);
1544 return 0;
1545
1546 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1547 if (rf_paritymap_ineligible(raidPtr))
1548 return EINVAL;
1549 rf_paritymap_set_disable(raidPtr, *(int *)data);
1550 /* XXX should errors be passed up? */
1551 return 0;
1552
1553 case RAIDFRAME_RESET_ACCTOTALS:
1554 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1555 return (0);
1556
1557 case RAIDFRAME_GET_ACCTOTALS:
1558 totals = (RF_AccTotals_t *) data;
1559 *totals = raidPtr->acc_totals;
1560 return (0);
1561
1562 case RAIDFRAME_KEEP_ACCTOTALS:
1563 raidPtr->keep_acc_totals = *(int *)data;
1564 return (0);
1565
1566 case RAIDFRAME_GET_SIZE:
1567 *(int *) data = raidPtr->totalSectors;
1568 return (0);
1569
1570 /* fail a disk & optionally start reconstruction */
1571 case RAIDFRAME_FAIL_DISK80:
1572 /* Check if we called compat code for this cmd */
1573 if (retcode != EPASSTHROUGH)
1574 return EINVAL;
1575 /* FALLTHRU */
1576 case RAIDFRAME_FAIL_DISK:
1577 if (raidPtr->Layout.map->faultsTolerated == 0) {
1578 /* Can't do this on a RAID 0!! */
1579 return(EINVAL);
1580 }
1581
1582 rr = (struct rf_recon_req *) data;
1583 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1584 return (EINVAL);
1585
1586 rf_lock_mutex2(raidPtr->mutex);
1587 if (raidPtr->status == rf_rs_reconstructing) {
1588 /* you can't fail a disk while we're reconstructing! */
1589 /* XXX wrong for RAID6 */
1590 rf_unlock_mutex2(raidPtr->mutex);
1591 return (EINVAL);
1592 }
1593 if ((raidPtr->Disks[rr->col].status ==
1594 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1595 /* some other component has failed. Let's not make
1596 things worse. XXX wrong for RAID6 */
1597 rf_unlock_mutex2(raidPtr->mutex);
1598 return (EINVAL);
1599 }
1600 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1601 /* Can't fail a spared disk! */
1602 rf_unlock_mutex2(raidPtr->mutex);
1603 return (EINVAL);
1604 }
1605 rf_unlock_mutex2(raidPtr->mutex);
1606
1607 /* make a copy of the recon request so that we don't rely on
1608 * the user's buffer */
1609 RF_Malloc(rrint, sizeof(*rrint), (struct rf_recon_req_internal *));
1610 if (rrint == NULL)
1611 return(ENOMEM);
1612 rrint->col = rr->col;
1613 rrint->flags = rr->flags;
1614 rrint->raidPtr = raidPtr;
1615
1616 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1617 rf_ReconThread,
1618 rrint, "raid_recon");
1619 return (0);
1620
1621 /* invoke a copyback operation after recon on whatever disk
1622 * needs it, if any */
1623 case RAIDFRAME_COPYBACK:
1624
1625 if (raidPtr->Layout.map->faultsTolerated == 0) {
1626 /* This makes no sense on a RAID 0!! */
1627 return(EINVAL);
1628 }
1629
1630 if (raidPtr->copyback_in_progress == 1) {
1631 /* Copyback is already in progress! */
1632 return(EINVAL);
1633 }
1634
1635 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1636 rf_CopybackThread,
1637 raidPtr,"raid_copyback");
1638 return (retcode);
1639
1640 /* return the percentage completion of reconstruction */
1641 case RAIDFRAME_CHECK_RECON_STATUS:
1642 if (raidPtr->Layout.map->faultsTolerated == 0) {
1643 /* This makes no sense on a RAID 0, so tell the
1644 user it's done. */
1645 *(int *) data = 100;
1646 return(0);
1647 }
1648 if (raidPtr->status != rf_rs_reconstructing)
1649 *(int *) data = 100;
1650 else {
1651 if (raidPtr->reconControl->numRUsTotal > 0) {
1652 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1653 } else {
1654 *(int *) data = 0;
1655 }
1656 }
1657 return (0);
1658 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1659 rf_check_recon_status_ext(raidPtr, data);
1660 return (0);
1661
1662 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1663 if (raidPtr->Layout.map->faultsTolerated == 0) {
1664 /* This makes no sense on a RAID 0, so tell the
1665 user it's done. */
1666 *(int *) data = 100;
1667 return(0);
1668 }
1669 if (raidPtr->parity_rewrite_in_progress == 1) {
1670 *(int *) data = 100 *
1671 raidPtr->parity_rewrite_stripes_done /
1672 raidPtr->Layout.numStripe;
1673 } else {
1674 *(int *) data = 100;
1675 }
1676 return (0);
1677
1678 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1679 rf_check_parityrewrite_status_ext(raidPtr, data);
1680 return (0);
1681
1682 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1683 if (raidPtr->Layout.map->faultsTolerated == 0) {
1684 /* This makes no sense on a RAID 0 */
1685 *(int *) data = 100;
1686 return(0);
1687 }
1688 if (raidPtr->copyback_in_progress == 1) {
1689 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1690 raidPtr->Layout.numStripe;
1691 } else {
1692 *(int *) data = 100;
1693 }
1694 return (0);
1695
1696 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1697 rf_check_copyback_status_ext(raidPtr, data);
1698 return 0;
1699
1700 case RAIDFRAME_SET_LAST_UNIT:
1701 for (column = 0; column < raidPtr->numCol; column++)
1702 if (raidPtr->Disks[column].status != rf_ds_optimal)
1703 return EBUSY;
1704
1705 for (column = 0; column < raidPtr->numCol; column++) {
1706 clabel = raidget_component_label(raidPtr, column);
1707 clabel->last_unit = *(int *)data;
1708 raidflush_component_label(raidPtr, column);
1709 }
1710 rs->sc_cflags |= RAIDF_UNIT_CHANGED;
1711 return 0;
1712
1713 /* the sparetable daemon calls this to wait for the kernel to
1714 * need a spare table. this ioctl does not return until a
1715 * spare table is needed. XXX -- calling mpsleep here in the
1716 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1717 * -- I should either compute the spare table in the kernel,
1718 * or have a different -- XXX XXX -- interface (a different
1719 * character device) for delivering the table -- XXX */
1720 #if 0
1721 case RAIDFRAME_SPARET_WAIT:
1722 rf_lock_mutex2(rf_sparet_wait_mutex);
1723 while (!rf_sparet_wait_queue)
1724 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1725 waitreq = rf_sparet_wait_queue;
1726 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1727 rf_unlock_mutex2(rf_sparet_wait_mutex);
1728
1729 /* structure assignment */
1730 *((RF_SparetWait_t *) data) = *waitreq;
1731
1732 RF_Free(waitreq, sizeof(*waitreq));
1733 return (0);
1734
1735 /* wakes up a process waiting on SPARET_WAIT and puts an error
1736 * code in it that will cause the dameon to exit */
1737 case RAIDFRAME_ABORT_SPARET_WAIT:
1738 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1739 waitreq->fcol = -1;
1740 rf_lock_mutex2(rf_sparet_wait_mutex);
1741 waitreq->next = rf_sparet_wait_queue;
1742 rf_sparet_wait_queue = waitreq;
1743 rf_broadcast_conf2(rf_sparet_wait_cv);
1744 rf_unlock_mutex2(rf_sparet_wait_mutex);
1745 return (0);
1746
1747 /* used by the spare table daemon to deliver a spare table
1748 * into the kernel */
1749 case RAIDFRAME_SEND_SPARET:
1750
1751 /* install the spare table */
1752 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1753
1754 /* respond to the requestor. the return status of the spare
1755 * table installation is passed in the "fcol" field */
1756 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1757 waitreq->fcol = retcode;
1758 rf_lock_mutex2(rf_sparet_wait_mutex);
1759 waitreq->next = rf_sparet_resp_queue;
1760 rf_sparet_resp_queue = waitreq;
1761 rf_broadcast_cond2(rf_sparet_resp_cv);
1762 rf_unlock_mutex2(rf_sparet_wait_mutex);
1763
1764 return (retcode);
1765 #endif
1766
1767 default:
1768 break; /* fall through to the os-specific code below */
1769
1770 }
1771
1772 if (!raidPtr->valid)
1773 return (EINVAL);
1774
1775 /*
1776 * Add support for "regular" device ioctls here.
1777 */
1778
1779 switch (cmd) {
1780 case DIOCGCACHE:
1781 retcode = rf_get_component_caches(raidPtr, (int *)data);
1782 break;
1783
1784 case DIOCCACHESYNC:
1785 retcode = rf_sync_component_caches(raidPtr);
1786 break;
1787
1788 default:
1789 retcode = dk_ioctl(dksc, dev, cmd, data, flag, l);
1790 break;
1791 }
1792
1793 return (retcode);
1794
1795 }
1796
1797
1798 /* raidinit -- complete the rest of the initialization for the
1799 RAIDframe device. */
1800
1801
1802 static void
1803 raidinit(struct raid_softc *rs)
1804 {
1805 cfdata_t cf;
1806 unsigned int unit;
1807 struct dk_softc *dksc = &rs->sc_dksc;
1808 RF_Raid_t *raidPtr = &rs->sc_r;
1809 device_t dev;
1810
1811 unit = raidPtr->raidid;
1812
1813 /* XXX doesn't check bounds. */
1814 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%u", unit);
1815
1816 /* attach the pseudo device */
1817 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1818 cf->cf_name = raid_cd.cd_name;
1819 cf->cf_atname = raid_cd.cd_name;
1820 cf->cf_unit = unit;
1821 cf->cf_fstate = FSTATE_STAR;
1822
1823 dev = config_attach_pseudo(cf);
1824 if (dev == NULL) {
1825 printf("raid%d: config_attach_pseudo failed\n",
1826 raidPtr->raidid);
1827 free(cf, M_RAIDFRAME);
1828 return;
1829 }
1830
1831 /* provide a backpointer to the real softc */
1832 raidsoftc(dev) = rs;
1833
1834 /* disk_attach actually creates space for the CPU disklabel, among
1835 * other things, so it's critical to call this *BEFORE* we try putzing
1836 * with disklabels. */
1837 dk_init(dksc, dev, DKTYPE_RAID);
1838 disk_init(&dksc->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1839
1840 /* XXX There may be a weird interaction here between this, and
1841 * protectedSectors, as used in RAIDframe. */
1842
1843 rs->sc_size = raidPtr->totalSectors;
1844
1845 /* Attach dk and disk subsystems */
1846 dk_attach(dksc);
1847 disk_attach(&dksc->sc_dkdev);
1848 rf_set_geometry(rs, raidPtr);
1849
1850 bufq_alloc(&dksc->sc_bufq, "fcfs", BUFQ_SORT_RAWBLOCK);
1851
1852 /* mark unit as usuable */
1853 rs->sc_flags |= RAIDF_INITED;
1854
1855 dkwedge_discover(&dksc->sc_dkdev);
1856 }
1857
1858 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1859 /* wake up the daemon & tell it to get us a spare table
1860 * XXX
1861 * the entries in the queues should be tagged with the raidPtr
1862 * so that in the extremely rare case that two recons happen at once,
1863 * we know for which device were requesting a spare table
1864 * XXX
1865 *
1866 * XXX This code is not currently used. GO
1867 */
1868 int
1869 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1870 {
1871 int retcode;
1872
1873 rf_lock_mutex2(rf_sparet_wait_mutex);
1874 req->next = rf_sparet_wait_queue;
1875 rf_sparet_wait_queue = req;
1876 rf_broadcast_cond2(rf_sparet_wait_cv);
1877
1878 /* mpsleep unlocks the mutex */
1879 while (!rf_sparet_resp_queue) {
1880 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
1881 }
1882 req = rf_sparet_resp_queue;
1883 rf_sparet_resp_queue = req->next;
1884 rf_unlock_mutex2(rf_sparet_wait_mutex);
1885
1886 retcode = req->fcol;
1887 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1888 * alloc'd */
1889 return (retcode);
1890 }
1891 #endif
1892
1893 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1894 * bp & passes it down.
1895 * any calls originating in the kernel must use non-blocking I/O
1896 * do some extra sanity checking to return "appropriate" error values for
1897 * certain conditions (to make some standard utilities work)
1898 *
1899 * Formerly known as: rf_DoAccessKernel
1900 */
1901 void
1902 raidstart(RF_Raid_t *raidPtr)
1903 {
1904 struct raid_softc *rs;
1905 struct dk_softc *dksc;
1906
1907 rs = raidPtr->softc;
1908 dksc = &rs->sc_dksc;
1909 /* quick check to see if anything has died recently */
1910 rf_lock_mutex2(raidPtr->mutex);
1911 if (raidPtr->numNewFailures > 0) {
1912 rf_unlock_mutex2(raidPtr->mutex);
1913 rf_update_component_labels(raidPtr,
1914 RF_NORMAL_COMPONENT_UPDATE);
1915 rf_lock_mutex2(raidPtr->mutex);
1916 raidPtr->numNewFailures--;
1917 }
1918 rf_unlock_mutex2(raidPtr->mutex);
1919
1920 if ((rs->sc_flags & RAIDF_INITED) == 0) {
1921 printf("raid%d: raidstart not ready\n", raidPtr->raidid);
1922 return;
1923 }
1924
1925 dk_start(dksc, NULL);
1926 }
1927
1928 static int
1929 raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp)
1930 {
1931 RF_SectorCount_t num_blocks, pb, sum;
1932 RF_RaidAddr_t raid_addr;
1933 daddr_t blocknum;
1934 int do_async;
1935 int rc;
1936
1937 rf_lock_mutex2(raidPtr->mutex);
1938 if (raidPtr->openings == 0) {
1939 rf_unlock_mutex2(raidPtr->mutex);
1940 return EAGAIN;
1941 }
1942 rf_unlock_mutex2(raidPtr->mutex);
1943
1944 blocknum = bp->b_rawblkno;
1945
1946 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1947 (int) blocknum));
1948
1949 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1950 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1951
1952 /* *THIS* is where we adjust what block we're going to...
1953 * but DO NOT TOUCH bp->b_blkno!!! */
1954 raid_addr = blocknum;
1955
1956 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1957 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1958 sum = raid_addr + num_blocks + pb;
1959 if (1 || rf_debugKernelAccess) {
1960 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1961 (int) raid_addr, (int) sum, (int) num_blocks,
1962 (int) pb, (int) bp->b_resid));
1963 }
1964 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1965 || (sum < num_blocks) || (sum < pb)) {
1966 rc = ENOSPC;
1967 goto done;
1968 }
1969 /*
1970 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1971 */
1972
1973 if (bp->b_bcount & raidPtr->sectorMask) {
1974 rc = ENOSPC;
1975 goto done;
1976 }
1977 db1_printf(("Calling DoAccess..\n"));
1978
1979
1980 rf_lock_mutex2(raidPtr->mutex);
1981 raidPtr->openings--;
1982 rf_unlock_mutex2(raidPtr->mutex);
1983
1984 /*
1985 * Everything is async.
1986 */
1987 do_async = 1;
1988
1989 /* don't ever condition on bp->b_flags & B_WRITE.
1990 * always condition on B_READ instead */
1991
1992 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1993 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1994 do_async, raid_addr, num_blocks,
1995 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1996
1997 done:
1998 return rc;
1999 }
2000
2001 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
2002
2003 int
2004 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
2005 {
2006 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2007 struct buf *bp;
2008
2009 req->queue = queue;
2010 bp = req->bp;
2011
2012 switch (req->type) {
2013 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2014 /* XXX need to do something extra here.. */
2015 /* I'm leaving this in, as I've never actually seen it used,
2016 * and I'd like folks to report it... GO */
2017 printf(("WAKEUP CALLED\n"));
2018 queue->numOutstanding++;
2019
2020 bp->b_flags = 0;
2021 bp->b_private = req;
2022
2023 KernelWakeupFunc(bp);
2024 break;
2025
2026 case RF_IO_TYPE_READ:
2027 case RF_IO_TYPE_WRITE:
2028 #if RF_ACC_TRACE > 0
2029 if (req->tracerec) {
2030 RF_ETIMER_START(req->tracerec->timer);
2031 }
2032 #endif
2033 InitBP(bp, queue->rf_cinfo->ci_vp,
2034 op, queue->rf_cinfo->ci_dev,
2035 req->sectorOffset, req->numSector,
2036 req->buf, KernelWakeupFunc, (void *) req,
2037 queue->raidPtr->logBytesPerSector, req->b_proc);
2038
2039 if (rf_debugKernelAccess) {
2040 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2041 (long) bp->b_blkno));
2042 }
2043 queue->numOutstanding++;
2044 queue->last_deq_sector = req->sectorOffset;
2045 /* acc wouldn't have been let in if there were any pending
2046 * reqs at any other priority */
2047 queue->curPriority = req->priority;
2048
2049 db1_printf(("Going for %c to unit %d col %d\n",
2050 req->type, queue->raidPtr->raidid,
2051 queue->col));
2052 db1_printf(("sector %d count %d (%d bytes) %d\n",
2053 (int) req->sectorOffset, (int) req->numSector,
2054 (int) (req->numSector <<
2055 queue->raidPtr->logBytesPerSector),
2056 (int) queue->raidPtr->logBytesPerSector));
2057
2058 /*
2059 * XXX: drop lock here since this can block at
2060 * least with backing SCSI devices. Retake it
2061 * to minimize fuss with calling interfaces.
2062 */
2063
2064 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2065 bdev_strategy(bp);
2066 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2067 break;
2068
2069 default:
2070 panic("bad req->type in rf_DispatchKernelIO");
2071 }
2072 db1_printf(("Exiting from DispatchKernelIO\n"));
2073
2074 return (0);
2075 }
2076 /* this is the callback function associated with a I/O invoked from
2077 kernel code.
2078 */
2079 static void
2080 KernelWakeupFunc(struct buf *bp)
2081 {
2082 RF_DiskQueueData_t *req = NULL;
2083 RF_DiskQueue_t *queue;
2084
2085 db1_printf(("recovering the request queue:\n"));
2086
2087 req = bp->b_private;
2088
2089 queue = (RF_DiskQueue_t *) req->queue;
2090
2091 rf_lock_mutex2(queue->raidPtr->iodone_lock);
2092
2093 #if RF_ACC_TRACE > 0
2094 if (req->tracerec) {
2095 RF_ETIMER_STOP(req->tracerec->timer);
2096 RF_ETIMER_EVAL(req->tracerec->timer);
2097 rf_lock_mutex2(rf_tracing_mutex);
2098 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2099 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2100 req->tracerec->num_phys_ios++;
2101 rf_unlock_mutex2(rf_tracing_mutex);
2102 }
2103 #endif
2104
2105 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2106 * ballistic, and mark the component as hosed... */
2107
2108 if (bp->b_error != 0) {
2109 /* Mark the disk as dead */
2110 /* but only mark it once... */
2111 /* and only if it wouldn't leave this RAID set
2112 completely broken */
2113 if (((queue->raidPtr->Disks[queue->col].status ==
2114 rf_ds_optimal) ||
2115 (queue->raidPtr->Disks[queue->col].status ==
2116 rf_ds_used_spare)) &&
2117 (queue->raidPtr->numFailures <
2118 queue->raidPtr->Layout.map->faultsTolerated)) {
2119 printf("raid%d: IO Error (%d). Marking %s as failed.\n",
2120 queue->raidPtr->raidid,
2121 bp->b_error,
2122 queue->raidPtr->Disks[queue->col].devname);
2123 queue->raidPtr->Disks[queue->col].status =
2124 rf_ds_failed;
2125 queue->raidPtr->status = rf_rs_degraded;
2126 queue->raidPtr->numFailures++;
2127 queue->raidPtr->numNewFailures++;
2128 } else { /* Disk is already dead... */
2129 /* printf("Disk already marked as dead!\n"); */
2130 }
2131
2132 }
2133
2134 /* Fill in the error value */
2135 req->error = bp->b_error;
2136
2137 /* Drop this one on the "finished" queue... */
2138 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2139
2140 /* Let the raidio thread know there is work to be done. */
2141 rf_signal_cond2(queue->raidPtr->iodone_cv);
2142
2143 rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2144 }
2145
2146
2147 /*
2148 * initialize a buf structure for doing an I/O in the kernel.
2149 */
2150 static void
2151 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2152 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2153 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2154 struct proc *b_proc)
2155 {
2156 /* bp->b_flags = B_PHYS | rw_flag; */
2157 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */
2158 bp->b_oflags = 0;
2159 bp->b_cflags = 0;
2160 bp->b_bcount = numSect << logBytesPerSector;
2161 bp->b_bufsize = bp->b_bcount;
2162 bp->b_error = 0;
2163 bp->b_dev = dev;
2164 bp->b_data = bf;
2165 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2166 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2167 if (bp->b_bcount == 0) {
2168 panic("bp->b_bcount is zero in InitBP!!");
2169 }
2170 bp->b_proc = b_proc;
2171 bp->b_iodone = cbFunc;
2172 bp->b_private = cbArg;
2173 }
2174
2175 /*
2176 * Wait interruptibly for an exclusive lock.
2177 *
2178 * XXX
2179 * Several drivers do this; it should be abstracted and made MP-safe.
2180 * (Hmm... where have we seen this warning before :-> GO )
2181 */
2182 static int
2183 raidlock(struct raid_softc *rs)
2184 {
2185 int error;
2186
2187 error = 0;
2188 mutex_enter(&rs->sc_mutex);
2189 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2190 rs->sc_flags |= RAIDF_WANTED;
2191 error = cv_wait_sig(&rs->sc_cv, &rs->sc_mutex);
2192 if (error != 0)
2193 goto done;
2194 }
2195 rs->sc_flags |= RAIDF_LOCKED;
2196 done:
2197 mutex_exit(&rs->sc_mutex);
2198 return (error);
2199 }
2200 /*
2201 * Unlock and wake up any waiters.
2202 */
2203 static void
2204 raidunlock(struct raid_softc *rs)
2205 {
2206
2207 mutex_enter(&rs->sc_mutex);
2208 rs->sc_flags &= ~RAIDF_LOCKED;
2209 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2210 rs->sc_flags &= ~RAIDF_WANTED;
2211 cv_broadcast(&rs->sc_cv);
2212 }
2213 mutex_exit(&rs->sc_mutex);
2214 }
2215
2216
2217 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2218 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2219 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2220
2221 static daddr_t
2222 rf_component_info_offset(void)
2223 {
2224
2225 return RF_COMPONENT_INFO_OFFSET;
2226 }
2227
2228 static daddr_t
2229 rf_component_info_size(unsigned secsize)
2230 {
2231 daddr_t info_size;
2232
2233 KASSERT(secsize);
2234 if (secsize > RF_COMPONENT_INFO_SIZE)
2235 info_size = secsize;
2236 else
2237 info_size = RF_COMPONENT_INFO_SIZE;
2238
2239 return info_size;
2240 }
2241
2242 static daddr_t
2243 rf_parity_map_offset(RF_Raid_t *raidPtr)
2244 {
2245 daddr_t map_offset;
2246
2247 KASSERT(raidPtr->bytesPerSector);
2248 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2249 map_offset = raidPtr->bytesPerSector;
2250 else
2251 map_offset = RF_COMPONENT_INFO_SIZE;
2252 map_offset += rf_component_info_offset();
2253
2254 return map_offset;
2255 }
2256
2257 static daddr_t
2258 rf_parity_map_size(RF_Raid_t *raidPtr)
2259 {
2260 daddr_t map_size;
2261
2262 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2263 map_size = raidPtr->bytesPerSector;
2264 else
2265 map_size = RF_PARITY_MAP_SIZE;
2266
2267 return map_size;
2268 }
2269
2270 int
2271 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2272 {
2273 RF_ComponentLabel_t *clabel;
2274
2275 clabel = raidget_component_label(raidPtr, col);
2276 clabel->clean = RF_RAID_CLEAN;
2277 raidflush_component_label(raidPtr, col);
2278 return(0);
2279 }
2280
2281
2282 int
2283 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2284 {
2285 RF_ComponentLabel_t *clabel;
2286
2287 clabel = raidget_component_label(raidPtr, col);
2288 clabel->clean = RF_RAID_DIRTY;
2289 raidflush_component_label(raidPtr, col);
2290 return(0);
2291 }
2292
2293 int
2294 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2295 {
2296 KASSERT(raidPtr->bytesPerSector);
2297 return raidread_component_label(raidPtr->bytesPerSector,
2298 raidPtr->Disks[col].dev,
2299 raidPtr->raid_cinfo[col].ci_vp,
2300 &raidPtr->raid_cinfo[col].ci_label);
2301 }
2302
2303 RF_ComponentLabel_t *
2304 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2305 {
2306 return &raidPtr->raid_cinfo[col].ci_label;
2307 }
2308
2309 int
2310 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2311 {
2312 RF_ComponentLabel_t *label;
2313
2314 label = &raidPtr->raid_cinfo[col].ci_label;
2315 label->mod_counter = raidPtr->mod_counter;
2316 #ifndef RF_NO_PARITY_MAP
2317 label->parity_map_modcount = label->mod_counter;
2318 #endif
2319 return raidwrite_component_label(raidPtr->bytesPerSector,
2320 raidPtr->Disks[col].dev,
2321 raidPtr->raid_cinfo[col].ci_vp, label);
2322 }
2323
2324
2325 static int
2326 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2327 RF_ComponentLabel_t *clabel)
2328 {
2329 return raidread_component_area(dev, b_vp, clabel,
2330 sizeof(RF_ComponentLabel_t),
2331 rf_component_info_offset(),
2332 rf_component_info_size(secsize));
2333 }
2334
2335 /* ARGSUSED */
2336 static int
2337 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2338 size_t msize, daddr_t offset, daddr_t dsize)
2339 {
2340 struct buf *bp;
2341 int error;
2342
2343 /* XXX should probably ensure that we don't try to do this if
2344 someone has changed rf_protected_sectors. */
2345
2346 if (b_vp == NULL) {
2347 /* For whatever reason, this component is not valid.
2348 Don't try to read a component label from it. */
2349 return(EINVAL);
2350 }
2351
2352 /* get a block of the appropriate size... */
2353 bp = geteblk((int)dsize);
2354 bp->b_dev = dev;
2355
2356 /* get our ducks in a row for the read */
2357 bp->b_blkno = offset / DEV_BSIZE;
2358 bp->b_bcount = dsize;
2359 bp->b_flags |= B_READ;
2360 bp->b_resid = dsize;
2361
2362 bdev_strategy(bp);
2363 error = biowait(bp);
2364
2365 if (!error) {
2366 memcpy(data, bp->b_data, msize);
2367 }
2368
2369 brelse(bp, 0);
2370 return(error);
2371 }
2372
2373
2374 static int
2375 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2376 RF_ComponentLabel_t *clabel)
2377 {
2378 return raidwrite_component_area(dev, b_vp, clabel,
2379 sizeof(RF_ComponentLabel_t),
2380 rf_component_info_offset(),
2381 rf_component_info_size(secsize), 0);
2382 }
2383
2384 /* ARGSUSED */
2385 static int
2386 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2387 size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2388 {
2389 struct buf *bp;
2390 int error;
2391
2392 /* get a block of the appropriate size... */
2393 bp = geteblk((int)dsize);
2394 bp->b_dev = dev;
2395
2396 /* get our ducks in a row for the write */
2397 bp->b_blkno = offset / DEV_BSIZE;
2398 bp->b_bcount = dsize;
2399 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2400 bp->b_resid = dsize;
2401
2402 memset(bp->b_data, 0, dsize);
2403 memcpy(bp->b_data, data, msize);
2404
2405 bdev_strategy(bp);
2406 if (asyncp)
2407 return 0;
2408 error = biowait(bp);
2409 brelse(bp, 0);
2410 if (error) {
2411 #if 1
2412 printf("Failed to write RAID component info!\n");
2413 #endif
2414 }
2415
2416 return(error);
2417 }
2418
2419 void
2420 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2421 {
2422 int c;
2423
2424 for (c = 0; c < raidPtr->numCol; c++) {
2425 /* Skip dead disks. */
2426 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2427 continue;
2428 /* XXXjld: what if an error occurs here? */
2429 raidwrite_component_area(raidPtr->Disks[c].dev,
2430 raidPtr->raid_cinfo[c].ci_vp, map,
2431 RF_PARITYMAP_NBYTE,
2432 rf_parity_map_offset(raidPtr),
2433 rf_parity_map_size(raidPtr), 0);
2434 }
2435 }
2436
2437 void
2438 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2439 {
2440 struct rf_paritymap_ondisk tmp;
2441 int c,first;
2442
2443 first=1;
2444 for (c = 0; c < raidPtr->numCol; c++) {
2445 /* Skip dead disks. */
2446 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2447 continue;
2448 raidread_component_area(raidPtr->Disks[c].dev,
2449 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2450 RF_PARITYMAP_NBYTE,
2451 rf_parity_map_offset(raidPtr),
2452 rf_parity_map_size(raidPtr));
2453 if (first) {
2454 memcpy(map, &tmp, sizeof(*map));
2455 first = 0;
2456 } else {
2457 rf_paritymap_merge(map, &tmp);
2458 }
2459 }
2460 }
2461
2462 void
2463 rf_markalldirty(RF_Raid_t *raidPtr)
2464 {
2465 RF_ComponentLabel_t *clabel;
2466 int sparecol;
2467 int c;
2468 int j;
2469 int scol = -1;
2470
2471 raidPtr->mod_counter++;
2472 for (c = 0; c < raidPtr->numCol; c++) {
2473 /* we don't want to touch (at all) a disk that has
2474 failed */
2475 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2476 clabel = raidget_component_label(raidPtr, c);
2477 if (clabel->status == rf_ds_spared) {
2478 /* XXX do something special...
2479 but whatever you do, don't
2480 try to access it!! */
2481 } else {
2482 raidmarkdirty(raidPtr, c);
2483 }
2484 }
2485 }
2486
2487 for( c = 0; c < raidPtr->numSpare ; c++) {
2488 sparecol = raidPtr->numCol + c;
2489 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2490 /*
2491
2492 we claim this disk is "optimal" if it's
2493 rf_ds_used_spare, as that means it should be
2494 directly substitutable for the disk it replaced.
2495 We note that too...
2496
2497 */
2498
2499 for(j=0;j<raidPtr->numCol;j++) {
2500 if (raidPtr->Disks[j].spareCol == sparecol) {
2501 scol = j;
2502 break;
2503 }
2504 }
2505
2506 clabel = raidget_component_label(raidPtr, sparecol);
2507 /* make sure status is noted */
2508
2509 raid_init_component_label(raidPtr, clabel);
2510
2511 clabel->row = 0;
2512 clabel->column = scol;
2513 /* Note: we *don't* change status from rf_ds_used_spare
2514 to rf_ds_optimal */
2515 /* clabel.status = rf_ds_optimal; */
2516
2517 raidmarkdirty(raidPtr, sparecol);
2518 }
2519 }
2520 }
2521
2522
2523 void
2524 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2525 {
2526 RF_ComponentLabel_t *clabel;
2527 int sparecol;
2528 int c;
2529 int j;
2530 int scol;
2531 struct raid_softc *rs = raidPtr->softc;
2532
2533 scol = -1;
2534
2535 /* XXX should do extra checks to make sure things really are clean,
2536 rather than blindly setting the clean bit... */
2537
2538 raidPtr->mod_counter++;
2539
2540 for (c = 0; c < raidPtr->numCol; c++) {
2541 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2542 clabel = raidget_component_label(raidPtr, c);
2543 /* make sure status is noted */
2544 clabel->status = rf_ds_optimal;
2545
2546 /* note what unit we are configured as */
2547 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2548 clabel->last_unit = raidPtr->raidid;
2549
2550 raidflush_component_label(raidPtr, c);
2551 if (final == RF_FINAL_COMPONENT_UPDATE) {
2552 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2553 raidmarkclean(raidPtr, c);
2554 }
2555 }
2556 }
2557 /* else we don't touch it.. */
2558 }
2559
2560 for( c = 0; c < raidPtr->numSpare ; c++) {
2561 sparecol = raidPtr->numCol + c;
2562 /* Need to ensure that the reconstruct actually completed! */
2563 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2564 /*
2565
2566 we claim this disk is "optimal" if it's
2567 rf_ds_used_spare, as that means it should be
2568 directly substitutable for the disk it replaced.
2569 We note that too...
2570
2571 */
2572
2573 for(j=0;j<raidPtr->numCol;j++) {
2574 if (raidPtr->Disks[j].spareCol == sparecol) {
2575 scol = j;
2576 break;
2577 }
2578 }
2579
2580 /* XXX shouldn't *really* need this... */
2581 clabel = raidget_component_label(raidPtr, sparecol);
2582 /* make sure status is noted */
2583
2584 raid_init_component_label(raidPtr, clabel);
2585
2586 clabel->column = scol;
2587 clabel->status = rf_ds_optimal;
2588 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2589 clabel->last_unit = raidPtr->raidid;
2590
2591 raidflush_component_label(raidPtr, sparecol);
2592 if (final == RF_FINAL_COMPONENT_UPDATE) {
2593 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2594 raidmarkclean(raidPtr, sparecol);
2595 }
2596 }
2597 }
2598 }
2599 }
2600
2601 void
2602 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2603 {
2604
2605 if (vp != NULL) {
2606 if (auto_configured == 1) {
2607 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2608 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2609 vput(vp);
2610
2611 } else {
2612 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2613 }
2614 }
2615 }
2616
2617
2618 void
2619 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2620 {
2621 int r,c;
2622 struct vnode *vp;
2623 int acd;
2624
2625
2626 /* We take this opportunity to close the vnodes like we should.. */
2627
2628 for (c = 0; c < raidPtr->numCol; c++) {
2629 vp = raidPtr->raid_cinfo[c].ci_vp;
2630 acd = raidPtr->Disks[c].auto_configured;
2631 rf_close_component(raidPtr, vp, acd);
2632 raidPtr->raid_cinfo[c].ci_vp = NULL;
2633 raidPtr->Disks[c].auto_configured = 0;
2634 }
2635
2636 for (r = 0; r < raidPtr->numSpare; r++) {
2637 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2638 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2639 rf_close_component(raidPtr, vp, acd);
2640 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2641 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2642 }
2643 }
2644
2645
2646 void
2647 rf_ReconThread(struct rf_recon_req_internal *req)
2648 {
2649 int s;
2650 RF_Raid_t *raidPtr;
2651
2652 s = splbio();
2653 raidPtr = (RF_Raid_t *) req->raidPtr;
2654 raidPtr->recon_in_progress = 1;
2655
2656 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2657 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2658
2659 RF_Free(req, sizeof(*req));
2660
2661 raidPtr->recon_in_progress = 0;
2662 splx(s);
2663
2664 /* That's all... */
2665 kthread_exit(0); /* does not return */
2666 }
2667
2668 void
2669 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2670 {
2671 int retcode;
2672 int s;
2673
2674 raidPtr->parity_rewrite_stripes_done = 0;
2675 raidPtr->parity_rewrite_in_progress = 1;
2676 s = splbio();
2677 retcode = rf_RewriteParity(raidPtr);
2678 splx(s);
2679 if (retcode) {
2680 printf("raid%d: Error re-writing parity (%d)!\n",
2681 raidPtr->raidid, retcode);
2682 } else {
2683 /* set the clean bit! If we shutdown correctly,
2684 the clean bit on each component label will get
2685 set */
2686 raidPtr->parity_good = RF_RAID_CLEAN;
2687 }
2688 raidPtr->parity_rewrite_in_progress = 0;
2689
2690 /* Anyone waiting for us to stop? If so, inform them... */
2691 if (raidPtr->waitShutdown) {
2692 rf_lock_mutex2(raidPtr->rad_lock);
2693 cv_broadcast(&raidPtr->parity_rewrite_cv);
2694 rf_unlock_mutex2(raidPtr->rad_lock);
2695 }
2696
2697 /* That's all... */
2698 kthread_exit(0); /* does not return */
2699 }
2700
2701
2702 void
2703 rf_CopybackThread(RF_Raid_t *raidPtr)
2704 {
2705 int s;
2706
2707 raidPtr->copyback_in_progress = 1;
2708 s = splbio();
2709 rf_CopybackReconstructedData(raidPtr);
2710 splx(s);
2711 raidPtr->copyback_in_progress = 0;
2712
2713 /* That's all... */
2714 kthread_exit(0); /* does not return */
2715 }
2716
2717
2718 void
2719 rf_ReconstructInPlaceThread(struct rf_recon_req_internal *req)
2720 {
2721 int s;
2722 RF_Raid_t *raidPtr;
2723
2724 s = splbio();
2725 raidPtr = req->raidPtr;
2726 raidPtr->recon_in_progress = 1;
2727 rf_ReconstructInPlace(raidPtr, req->col);
2728 RF_Free(req, sizeof(*req));
2729 raidPtr->recon_in_progress = 0;
2730 splx(s);
2731
2732 /* That's all... */
2733 kthread_exit(0); /* does not return */
2734 }
2735
2736 static RF_AutoConfig_t *
2737 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
2738 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
2739 unsigned secsize)
2740 {
2741 int good_one = 0;
2742 RF_ComponentLabel_t *clabel;
2743 RF_AutoConfig_t *ac;
2744
2745 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
2746 if (clabel == NULL) {
2747 oomem:
2748 while(ac_list) {
2749 ac = ac_list;
2750 if (ac->clabel)
2751 free(ac->clabel, M_RAIDFRAME);
2752 ac_list = ac_list->next;
2753 free(ac, M_RAIDFRAME);
2754 }
2755 printf("RAID auto config: out of memory!\n");
2756 return NULL; /* XXX probably should panic? */
2757 }
2758
2759 if (!raidread_component_label(secsize, dev, vp, clabel)) {
2760 /* Got the label. Does it look reasonable? */
2761 if (rf_reasonable_label(clabel, numsecs) &&
2762 (rf_component_label_partitionsize(clabel) <= size)) {
2763 #ifdef DEBUG
2764 printf("Component on: %s: %llu\n",
2765 cname, (unsigned long long)size);
2766 rf_print_component_label(clabel);
2767 #endif
2768 /* if it's reasonable, add it, else ignore it. */
2769 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
2770 M_NOWAIT);
2771 if (ac == NULL) {
2772 free(clabel, M_RAIDFRAME);
2773 goto oomem;
2774 }
2775 strlcpy(ac->devname, cname, sizeof(ac->devname));
2776 ac->dev = dev;
2777 ac->vp = vp;
2778 ac->clabel = clabel;
2779 ac->next = ac_list;
2780 ac_list = ac;
2781 good_one = 1;
2782 }
2783 }
2784 if (!good_one) {
2785 /* cleanup */
2786 free(clabel, M_RAIDFRAME);
2787 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2788 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2789 vput(vp);
2790 }
2791 return ac_list;
2792 }
2793
2794 RF_AutoConfig_t *
2795 rf_find_raid_components(void)
2796 {
2797 struct vnode *vp;
2798 struct disklabel label;
2799 device_t dv;
2800 deviter_t di;
2801 dev_t dev;
2802 int bmajor, bminor, wedge, rf_part_found;
2803 int error;
2804 int i;
2805 RF_AutoConfig_t *ac_list;
2806 uint64_t numsecs;
2807 unsigned secsize;
2808 int dowedges;
2809
2810 /* initialize the AutoConfig list */
2811 ac_list = NULL;
2812
2813 /*
2814 * we begin by trolling through *all* the devices on the system *twice*
2815 * first we scan for wedges, second for other devices. This avoids
2816 * using a raw partition instead of a wedge that covers the whole disk
2817 */
2818
2819 for (dowedges=1; dowedges>=0; --dowedges) {
2820 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
2821 dv = deviter_next(&di)) {
2822
2823 /* we are only interested in disks... */
2824 if (device_class(dv) != DV_DISK)
2825 continue;
2826
2827 /* we don't care about floppies... */
2828 if (device_is_a(dv, "fd")) {
2829 continue;
2830 }
2831
2832 /* we don't care about CD's... */
2833 if (device_is_a(dv, "cd")) {
2834 continue;
2835 }
2836
2837 /* we don't care about md's... */
2838 if (device_is_a(dv, "md")) {
2839 continue;
2840 }
2841
2842 /* hdfd is the Atari/Hades floppy driver */
2843 if (device_is_a(dv, "hdfd")) {
2844 continue;
2845 }
2846
2847 /* fdisa is the Atari/Milan floppy driver */
2848 if (device_is_a(dv, "fdisa")) {
2849 continue;
2850 }
2851
2852 /* are we in the wedges pass ? */
2853 wedge = device_is_a(dv, "dk");
2854 if (wedge != dowedges) {
2855 continue;
2856 }
2857
2858 /* need to find the device_name_to_block_device_major stuff */
2859 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
2860
2861 rf_part_found = 0; /*No raid partition as yet*/
2862
2863 /* get a vnode for the raw partition of this disk */
2864 bminor = minor(device_unit(dv));
2865 dev = wedge ? makedev(bmajor, bminor) :
2866 MAKEDISKDEV(bmajor, bminor, RAW_PART);
2867 if (bdevvp(dev, &vp))
2868 panic("RAID can't alloc vnode");
2869
2870 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
2871
2872 if (error) {
2873 /* "Who cares." Continue looking
2874 for something that exists*/
2875 vput(vp);
2876 continue;
2877 }
2878
2879 error = getdisksize(vp, &numsecs, &secsize);
2880 if (error) {
2881 /*
2882 * Pseudo devices like vnd and cgd can be
2883 * opened but may still need some configuration.
2884 * Ignore these quietly.
2885 */
2886 if (error != ENXIO)
2887 printf("RAIDframe: can't get disk size"
2888 " for dev %s (%d)\n",
2889 device_xname(dv), error);
2890 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2891 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2892 vput(vp);
2893 continue;
2894 }
2895 if (wedge) {
2896 struct dkwedge_info dkw;
2897 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
2898 NOCRED);
2899 if (error) {
2900 printf("RAIDframe: can't get wedge info for "
2901 "dev %s (%d)\n", device_xname(dv), error);
2902 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2903 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2904 vput(vp);
2905 continue;
2906 }
2907
2908 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
2909 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2910 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2911 vput(vp);
2912 continue;
2913 }
2914
2915 ac_list = rf_get_component(ac_list, dev, vp,
2916 device_xname(dv), dkw.dkw_size, numsecs, secsize);
2917 rf_part_found = 1; /*There is a raid component on this disk*/
2918 continue;
2919 }
2920
2921 /* Ok, the disk exists. Go get the disklabel. */
2922 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
2923 if (error) {
2924 /*
2925 * XXX can't happen - open() would
2926 * have errored out (or faked up one)
2927 */
2928 if (error != ENOTTY)
2929 printf("RAIDframe: can't get label for dev "
2930 "%s (%d)\n", device_xname(dv), error);
2931 }
2932
2933 /* don't need this any more. We'll allocate it again
2934 a little later if we really do... */
2935 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2936 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2937 vput(vp);
2938
2939 if (error)
2940 continue;
2941
2942 rf_part_found = 0; /*No raid partitions yet*/
2943 for (i = 0; i < label.d_npartitions; i++) {
2944 char cname[sizeof(ac_list->devname)];
2945
2946 /* We only support partitions marked as RAID */
2947 if (label.d_partitions[i].p_fstype != FS_RAID)
2948 continue;
2949
2950 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
2951 if (bdevvp(dev, &vp))
2952 panic("RAID can't alloc vnode");
2953
2954 error = VOP_OPEN(vp, FREAD, NOCRED);
2955 if (error) {
2956 /* Whatever... */
2957 vput(vp);
2958 continue;
2959 }
2960 snprintf(cname, sizeof(cname), "%s%c",
2961 device_xname(dv), 'a' + i);
2962 ac_list = rf_get_component(ac_list, dev, vp, cname,
2963 label.d_partitions[i].p_size, numsecs, secsize);
2964 rf_part_found = 1; /*There is at least one raid partition on this disk*/
2965 }
2966
2967 /*
2968 *If there is no raid component on this disk, either in a
2969 *disklabel or inside a wedge, check the raw partition as well,
2970 *as it is possible to configure raid components on raw disk
2971 *devices.
2972 */
2973
2974 if (!rf_part_found) {
2975 char cname[sizeof(ac_list->devname)];
2976
2977 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
2978 if (bdevvp(dev, &vp))
2979 panic("RAID can't alloc vnode");
2980
2981 error = VOP_OPEN(vp, FREAD, NOCRED);
2982 if (error) {
2983 /* Whatever... */
2984 vput(vp);
2985 continue;
2986 }
2987 snprintf(cname, sizeof(cname), "%s%c",
2988 device_xname(dv), 'a' + RAW_PART);
2989 ac_list = rf_get_component(ac_list, dev, vp, cname,
2990 label.d_partitions[RAW_PART].p_size, numsecs, secsize);
2991 }
2992 }
2993 deviter_release(&di);
2994 }
2995 return ac_list;
2996 }
2997
2998
2999 int
3000 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3001 {
3002
3003 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
3004 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
3005 ((clabel->clean == RF_RAID_CLEAN) ||
3006 (clabel->clean == RF_RAID_DIRTY)) &&
3007 clabel->row >=0 &&
3008 clabel->column >= 0 &&
3009 clabel->num_rows > 0 &&
3010 clabel->num_columns > 0 &&
3011 clabel->row < clabel->num_rows &&
3012 clabel->column < clabel->num_columns &&
3013 clabel->blockSize > 0 &&
3014 /*
3015 * numBlocksHi may contain garbage, but it is ok since
3016 * the type is unsigned. If it is really garbage,
3017 * rf_fix_old_label_size() will fix it.
3018 */
3019 rf_component_label_numblocks(clabel) > 0) {
3020 /*
3021 * label looks reasonable enough...
3022 * let's make sure it has no old garbage.
3023 */
3024 if (numsecs)
3025 rf_fix_old_label_size(clabel, numsecs);
3026 return(1);
3027 }
3028 return(0);
3029 }
3030
3031
3032 /*
3033 * For reasons yet unknown, some old component labels have garbage in
3034 * the newer numBlocksHi region, and this causes lossage. Since those
3035 * disks will also have numsecs set to less than 32 bits of sectors,
3036 * we can determine when this corruption has occurred, and fix it.
3037 *
3038 * The exact same problem, with the same unknown reason, happens to
3039 * the partitionSizeHi member as well.
3040 */
3041 static void
3042 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3043 {
3044
3045 if (numsecs < ((uint64_t)1 << 32)) {
3046 if (clabel->numBlocksHi) {
3047 printf("WARNING: total sectors < 32 bits, yet "
3048 "numBlocksHi set\n"
3049 "WARNING: resetting numBlocksHi to zero.\n");
3050 clabel->numBlocksHi = 0;
3051 }
3052
3053 if (clabel->partitionSizeHi) {
3054 printf("WARNING: total sectors < 32 bits, yet "
3055 "partitionSizeHi set\n"
3056 "WARNING: resetting partitionSizeHi to zero.\n");
3057 clabel->partitionSizeHi = 0;
3058 }
3059 }
3060 }
3061
3062
3063 #ifdef DEBUG
3064 void
3065 rf_print_component_label(RF_ComponentLabel_t *clabel)
3066 {
3067 uint64_t numBlocks;
3068 static const char *rp[] = {
3069 "No", "Force", "Soft", "*invalid*"
3070 };
3071
3072
3073 numBlocks = rf_component_label_numblocks(clabel);
3074
3075 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3076 clabel->row, clabel->column,
3077 clabel->num_rows, clabel->num_columns);
3078 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3079 clabel->version, clabel->serial_number,
3080 clabel->mod_counter);
3081 printf(" Clean: %s Status: %d\n",
3082 clabel->clean ? "Yes" : "No", clabel->status);
3083 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3084 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3085 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3086 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3087 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3088 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]);
3089 printf(" Last configured as: raid%d\n", clabel->last_unit);
3090 #if 0
3091 printf(" Config order: %d\n", clabel->config_order);
3092 #endif
3093
3094 }
3095 #endif
3096
3097 RF_ConfigSet_t *
3098 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3099 {
3100 RF_AutoConfig_t *ac;
3101 RF_ConfigSet_t *config_sets;
3102 RF_ConfigSet_t *cset;
3103 RF_AutoConfig_t *ac_next;
3104
3105
3106 config_sets = NULL;
3107
3108 /* Go through the AutoConfig list, and figure out which components
3109 belong to what sets. */
3110 ac = ac_list;
3111 while(ac!=NULL) {
3112 /* we're going to putz with ac->next, so save it here
3113 for use at the end of the loop */
3114 ac_next = ac->next;
3115
3116 if (config_sets == NULL) {
3117 /* will need at least this one... */
3118 config_sets = (RF_ConfigSet_t *)
3119 malloc(sizeof(RF_ConfigSet_t),
3120 M_RAIDFRAME, M_NOWAIT);
3121 if (config_sets == NULL) {
3122 panic("rf_create_auto_sets: No memory!");
3123 }
3124 /* this one is easy :) */
3125 config_sets->ac = ac;
3126 config_sets->next = NULL;
3127 config_sets->rootable = 0;
3128 ac->next = NULL;
3129 } else {
3130 /* which set does this component fit into? */
3131 cset = config_sets;
3132 while(cset!=NULL) {
3133 if (rf_does_it_fit(cset, ac)) {
3134 /* looks like it matches... */
3135 ac->next = cset->ac;
3136 cset->ac = ac;
3137 break;
3138 }
3139 cset = cset->next;
3140 }
3141 if (cset==NULL) {
3142 /* didn't find a match above... new set..*/
3143 cset = (RF_ConfigSet_t *)
3144 malloc(sizeof(RF_ConfigSet_t),
3145 M_RAIDFRAME, M_NOWAIT);
3146 if (cset == NULL) {
3147 panic("rf_create_auto_sets: No memory!");
3148 }
3149 cset->ac = ac;
3150 ac->next = NULL;
3151 cset->next = config_sets;
3152 cset->rootable = 0;
3153 config_sets = cset;
3154 }
3155 }
3156 ac = ac_next;
3157 }
3158
3159
3160 return(config_sets);
3161 }
3162
3163 static int
3164 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3165 {
3166 RF_ComponentLabel_t *clabel1, *clabel2;
3167
3168 /* If this one matches the *first* one in the set, that's good
3169 enough, since the other members of the set would have been
3170 through here too... */
3171 /* note that we are not checking partitionSize here..
3172
3173 Note that we are also not checking the mod_counters here.
3174 If everything else matches except the mod_counter, that's
3175 good enough for this test. We will deal with the mod_counters
3176 a little later in the autoconfiguration process.
3177
3178 (clabel1->mod_counter == clabel2->mod_counter) &&
3179
3180 The reason we don't check for this is that failed disks
3181 will have lower modification counts. If those disks are
3182 not added to the set they used to belong to, then they will
3183 form their own set, which may result in 2 different sets,
3184 for example, competing to be configured at raid0, and
3185 perhaps competing to be the root filesystem set. If the
3186 wrong ones get configured, or both attempt to become /,
3187 weird behaviour and or serious lossage will occur. Thus we
3188 need to bring them into the fold here, and kick them out at
3189 a later point.
3190
3191 */
3192
3193 clabel1 = cset->ac->clabel;
3194 clabel2 = ac->clabel;
3195 if ((clabel1->version == clabel2->version) &&
3196 (clabel1->serial_number == clabel2->serial_number) &&
3197 (clabel1->num_rows == clabel2->num_rows) &&
3198 (clabel1->num_columns == clabel2->num_columns) &&
3199 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3200 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3201 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3202 (clabel1->parityConfig == clabel2->parityConfig) &&
3203 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3204 (clabel1->blockSize == clabel2->blockSize) &&
3205 rf_component_label_numblocks(clabel1) ==
3206 rf_component_label_numblocks(clabel2) &&
3207 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3208 (clabel1->root_partition == clabel2->root_partition) &&
3209 (clabel1->last_unit == clabel2->last_unit) &&
3210 (clabel1->config_order == clabel2->config_order)) {
3211 /* if it get's here, it almost *has* to be a match */
3212 } else {
3213 /* it's not consistent with somebody in the set..
3214 punt */
3215 return(0);
3216 }
3217 /* all was fine.. it must fit... */
3218 return(1);
3219 }
3220
3221 int
3222 rf_have_enough_components(RF_ConfigSet_t *cset)
3223 {
3224 RF_AutoConfig_t *ac;
3225 RF_AutoConfig_t *auto_config;
3226 RF_ComponentLabel_t *clabel;
3227 int c;
3228 int num_cols;
3229 int num_missing;
3230 int mod_counter;
3231 int mod_counter_found;
3232 int even_pair_failed;
3233 char parity_type;
3234
3235
3236 /* check to see that we have enough 'live' components
3237 of this set. If so, we can configure it if necessary */
3238
3239 num_cols = cset->ac->clabel->num_columns;
3240 parity_type = cset->ac->clabel->parityConfig;
3241
3242 /* XXX Check for duplicate components!?!?!? */
3243
3244 /* Determine what the mod_counter is supposed to be for this set. */
3245
3246 mod_counter_found = 0;
3247 mod_counter = 0;
3248 ac = cset->ac;
3249 while(ac!=NULL) {
3250 if (mod_counter_found==0) {
3251 mod_counter = ac->clabel->mod_counter;
3252 mod_counter_found = 1;
3253 } else {
3254 if (ac->clabel->mod_counter > mod_counter) {
3255 mod_counter = ac->clabel->mod_counter;
3256 }
3257 }
3258 ac = ac->next;
3259 }
3260
3261 num_missing = 0;
3262 auto_config = cset->ac;
3263
3264 even_pair_failed = 0;
3265 for(c=0; c<num_cols; c++) {
3266 ac = auto_config;
3267 while(ac!=NULL) {
3268 if ((ac->clabel->column == c) &&
3269 (ac->clabel->mod_counter == mod_counter)) {
3270 /* it's this one... */
3271 #ifdef DEBUG
3272 printf("Found: %s at %d\n",
3273 ac->devname,c);
3274 #endif
3275 break;
3276 }
3277 ac=ac->next;
3278 }
3279 if (ac==NULL) {
3280 /* Didn't find one here! */
3281 /* special case for RAID 1, especially
3282 where there are more than 2
3283 components (where RAIDframe treats
3284 things a little differently :( ) */
3285 if (parity_type == '1') {
3286 if (c%2 == 0) { /* even component */
3287 even_pair_failed = 1;
3288 } else { /* odd component. If
3289 we're failed, and
3290 so is the even
3291 component, it's
3292 "Good Night, Charlie" */
3293 if (even_pair_failed == 1) {
3294 return(0);
3295 }
3296 }
3297 } else {
3298 /* normal accounting */
3299 num_missing++;
3300 }
3301 }
3302 if ((parity_type == '1') && (c%2 == 1)) {
3303 /* Just did an even component, and we didn't
3304 bail.. reset the even_pair_failed flag,
3305 and go on to the next component.... */
3306 even_pair_failed = 0;
3307 }
3308 }
3309
3310 clabel = cset->ac->clabel;
3311
3312 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3313 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3314 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3315 /* XXX this needs to be made *much* more general */
3316 /* Too many failures */
3317 return(0);
3318 }
3319 /* otherwise, all is well, and we've got enough to take a kick
3320 at autoconfiguring this set */
3321 return(1);
3322 }
3323
3324 void
3325 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3326 RF_Raid_t *raidPtr)
3327 {
3328 RF_ComponentLabel_t *clabel;
3329 int i;
3330
3331 clabel = ac->clabel;
3332
3333 /* 1. Fill in the common stuff */
3334 config->numCol = clabel->num_columns;
3335 config->numSpare = 0; /* XXX should this be set here? */
3336 config->sectPerSU = clabel->sectPerSU;
3337 config->SUsPerPU = clabel->SUsPerPU;
3338 config->SUsPerRU = clabel->SUsPerRU;
3339 config->parityConfig = clabel->parityConfig;
3340 /* XXX... */
3341 strcpy(config->diskQueueType,"fifo");
3342 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3343 config->layoutSpecificSize = 0; /* XXX ?? */
3344
3345 while(ac!=NULL) {
3346 /* row/col values will be in range due to the checks
3347 in reasonable_label() */
3348 strcpy(config->devnames[0][ac->clabel->column],
3349 ac->devname);
3350 ac = ac->next;
3351 }
3352
3353 for(i=0;i<RF_MAXDBGV;i++) {
3354 config->debugVars[i][0] = 0;
3355 }
3356 }
3357
3358 int
3359 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3360 {
3361 RF_ComponentLabel_t *clabel;
3362 int column;
3363 int sparecol;
3364
3365 raidPtr->autoconfigure = new_value;
3366
3367 for(column=0; column<raidPtr->numCol; column++) {
3368 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3369 clabel = raidget_component_label(raidPtr, column);
3370 clabel->autoconfigure = new_value;
3371 raidflush_component_label(raidPtr, column);
3372 }
3373 }
3374 for(column = 0; column < raidPtr->numSpare ; column++) {
3375 sparecol = raidPtr->numCol + column;
3376 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3377 clabel = raidget_component_label(raidPtr, sparecol);
3378 clabel->autoconfigure = new_value;
3379 raidflush_component_label(raidPtr, sparecol);
3380 }
3381 }
3382 return(new_value);
3383 }
3384
3385 int
3386 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3387 {
3388 RF_ComponentLabel_t *clabel;
3389 int column;
3390 int sparecol;
3391
3392 raidPtr->root_partition = new_value;
3393 for(column=0; column<raidPtr->numCol; column++) {
3394 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3395 clabel = raidget_component_label(raidPtr, column);
3396 clabel->root_partition = new_value;
3397 raidflush_component_label(raidPtr, column);
3398 }
3399 }
3400 for(column = 0; column < raidPtr->numSpare ; column++) {
3401 sparecol = raidPtr->numCol + column;
3402 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3403 clabel = raidget_component_label(raidPtr, sparecol);
3404 clabel->root_partition = new_value;
3405 raidflush_component_label(raidPtr, sparecol);
3406 }
3407 }
3408 return(new_value);
3409 }
3410
3411 void
3412 rf_release_all_vps(RF_ConfigSet_t *cset)
3413 {
3414 RF_AutoConfig_t *ac;
3415
3416 ac = cset->ac;
3417 while(ac!=NULL) {
3418 /* Close the vp, and give it back */
3419 if (ac->vp) {
3420 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3421 VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
3422 vput(ac->vp);
3423 ac->vp = NULL;
3424 }
3425 ac = ac->next;
3426 }
3427 }
3428
3429
3430 void
3431 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3432 {
3433 RF_AutoConfig_t *ac;
3434 RF_AutoConfig_t *next_ac;
3435
3436 ac = cset->ac;
3437 while(ac!=NULL) {
3438 next_ac = ac->next;
3439 /* nuke the label */
3440 free(ac->clabel, M_RAIDFRAME);
3441 /* cleanup the config structure */
3442 free(ac, M_RAIDFRAME);
3443 /* "next.." */
3444 ac = next_ac;
3445 }
3446 /* and, finally, nuke the config set */
3447 free(cset, M_RAIDFRAME);
3448 }
3449
3450
3451 void
3452 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3453 {
3454 /* current version number */
3455 clabel->version = RF_COMPONENT_LABEL_VERSION;
3456 clabel->serial_number = raidPtr->serial_number;
3457 clabel->mod_counter = raidPtr->mod_counter;
3458
3459 clabel->num_rows = 1;
3460 clabel->num_columns = raidPtr->numCol;
3461 clabel->clean = RF_RAID_DIRTY; /* not clean */
3462 clabel->status = rf_ds_optimal; /* "It's good!" */
3463
3464 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3465 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3466 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3467
3468 clabel->blockSize = raidPtr->bytesPerSector;
3469 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3470
3471 /* XXX not portable */
3472 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3473 clabel->maxOutstanding = raidPtr->maxOutstanding;
3474 clabel->autoconfigure = raidPtr->autoconfigure;
3475 clabel->root_partition = raidPtr->root_partition;
3476 clabel->last_unit = raidPtr->raidid;
3477 clabel->config_order = raidPtr->config_order;
3478
3479 #ifndef RF_NO_PARITY_MAP
3480 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3481 #endif
3482 }
3483
3484 struct raid_softc *
3485 rf_auto_config_set(RF_ConfigSet_t *cset)
3486 {
3487 RF_Raid_t *raidPtr;
3488 RF_Config_t *config;
3489 int raidID;
3490 struct raid_softc *sc;
3491
3492 #ifdef DEBUG
3493 printf("RAID autoconfigure\n");
3494 #endif
3495
3496 /* 1. Create a config structure */
3497 config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO);
3498 if (config == NULL) {
3499 printf("%s: Out of mem - config!?!?\n", __func__);
3500 /* XXX do something more intelligent here. */
3501 return NULL;
3502 }
3503
3504 /*
3505 2. Figure out what RAID ID this one is supposed to live at
3506 See if we can get the same RAID dev that it was configured
3507 on last time..
3508 */
3509
3510 raidID = cset->ac->clabel->last_unit;
3511 for (sc = raidget(raidID, false); sc && sc->sc_r.valid != 0;
3512 sc = raidget(++raidID, false))
3513 continue;
3514 #ifdef DEBUG
3515 printf("Configuring raid%d:\n",raidID);
3516 #endif
3517
3518 if (sc == NULL)
3519 sc = raidget(raidID, true);
3520 if (sc == NULL) {
3521 printf("%s: Out of mem - softc!?!?\n", __func__);
3522 /* XXX do something more intelligent here. */
3523 free(config, M_RAIDFRAME);
3524 return NULL;
3525 }
3526
3527 raidPtr = &sc->sc_r;
3528
3529 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3530 raidPtr->softc = sc;
3531 raidPtr->raidid = raidID;
3532 raidPtr->openings = RAIDOUTSTANDING;
3533
3534 /* 3. Build the configuration structure */
3535 rf_create_configuration(cset->ac, config, raidPtr);
3536
3537 /* 4. Do the configuration */
3538 if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3539 raidinit(sc);
3540
3541 rf_markalldirty(raidPtr);
3542 raidPtr->autoconfigure = 1; /* XXX do this here? */
3543 switch (cset->ac->clabel->root_partition) {
3544 case 1: /* Force Root */
3545 case 2: /* Soft Root: root when boot partition part of raid */
3546 /*
3547 * everything configured just fine. Make a note
3548 * that this set is eligible to be root,
3549 * or forced to be root
3550 */
3551 cset->rootable = cset->ac->clabel->root_partition;
3552 /* XXX do this here? */
3553 raidPtr->root_partition = cset->rootable;
3554 break;
3555 default:
3556 break;
3557 }
3558 } else {
3559 raidput(sc);
3560 sc = NULL;
3561 }
3562
3563 /* 5. Cleanup */
3564 free(config, M_RAIDFRAME);
3565 return sc;
3566 }
3567
3568 void
3569 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3570 size_t xmin, size_t xmax)
3571 {
3572 int error;
3573
3574 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3575 pool_sethiwat(p, xmax);
3576 if ((error = pool_prime(p, xmin)) != 0)
3577 panic("%s: failed to prime pool: %d", __func__, error);
3578 pool_setlowat(p, xmin);
3579 }
3580
3581 /*
3582 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buffer queue
3583 * to see if there is IO pending and if that IO could possibly be done
3584 * for a given RAID set. Returns 0 if IO is waiting and can be done, 1
3585 * otherwise.
3586 *
3587 */
3588 int
3589 rf_buf_queue_check(RF_Raid_t *raidPtr)
3590 {
3591 struct raid_softc *rs;
3592 struct dk_softc *dksc;
3593
3594 rs = raidPtr->softc;
3595 dksc = &rs->sc_dksc;
3596
3597 if ((rs->sc_flags & RAIDF_INITED) == 0)
3598 return 1;
3599
3600 if (dk_strategy_pending(dksc) && raidPtr->openings > 0) {
3601 /* there is work to do */
3602 return 0;
3603 }
3604 /* default is nothing to do */
3605 return 1;
3606 }
3607
3608 int
3609 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3610 {
3611 uint64_t numsecs;
3612 unsigned secsize;
3613 int error;
3614
3615 error = getdisksize(vp, &numsecs, &secsize);
3616 if (error == 0) {
3617 diskPtr->blockSize = secsize;
3618 diskPtr->numBlocks = numsecs - rf_protectedSectors;
3619 diskPtr->partitionSize = numsecs;
3620 return 0;
3621 }
3622 return error;
3623 }
3624
3625 static int
3626 raid_match(device_t self, cfdata_t cfdata, void *aux)
3627 {
3628 return 1;
3629 }
3630
3631 static void
3632 raid_attach(device_t parent, device_t self, void *aux)
3633 {
3634 }
3635
3636
3637 static int
3638 raid_detach(device_t self, int flags)
3639 {
3640 int error;
3641 struct raid_softc *rs = raidsoftc(self);
3642
3643 if (rs == NULL)
3644 return ENXIO;
3645
3646 if ((error = raidlock(rs)) != 0)
3647 return (error);
3648
3649 error = raid_detach_unlocked(rs);
3650
3651 raidunlock(rs);
3652
3653 /* XXX raid can be referenced here */
3654
3655 if (error)
3656 return error;
3657
3658 /* Free the softc */
3659 raidput(rs);
3660
3661 return 0;
3662 }
3663
3664 static void
3665 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
3666 {
3667 struct dk_softc *dksc = &rs->sc_dksc;
3668 struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
3669
3670 memset(dg, 0, sizeof(*dg));
3671
3672 dg->dg_secperunit = raidPtr->totalSectors;
3673 dg->dg_secsize = raidPtr->bytesPerSector;
3674 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
3675 dg->dg_ntracks = 4 * raidPtr->numCol;
3676
3677 disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, NULL);
3678 }
3679
3680 /*
3681 * Get cache info for all the components (including spares).
3682 * Returns intersection of all the cache flags of all disks, or first
3683 * error if any encountered.
3684 * XXXfua feature flags can change as spares are added - lock down somehow
3685 */
3686 static int
3687 rf_get_component_caches(RF_Raid_t *raidPtr, int *data)
3688 {
3689 int c;
3690 int error;
3691 int dkwhole = 0, dkpart;
3692
3693 for (c = 0; c < raidPtr->numCol + raidPtr->numSpare; c++) {
3694 /*
3695 * Check any non-dead disk, even when currently being
3696 * reconstructed.
3697 */
3698 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)
3699 || raidPtr->Disks[c].status == rf_ds_reconstructing) {
3700 error = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp,
3701 DIOCGCACHE, &dkpart, FREAD, NOCRED);
3702 if (error) {
3703 if (error != ENODEV) {
3704 printf("raid%d: get cache for component %s failed\n",
3705 raidPtr->raidid,
3706 raidPtr->Disks[c].devname);
3707 }
3708
3709 return error;
3710 }
3711
3712 if (c == 0)
3713 dkwhole = dkpart;
3714 else
3715 dkwhole = DKCACHE_COMBINE(dkwhole, dkpart);
3716 }
3717 }
3718
3719 *data = dkwhole;
3720
3721 return 0;
3722 }
3723
3724 /*
3725 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3726 * We end up returning whatever error was returned by the first cache flush
3727 * that fails.
3728 */
3729
3730 int
3731 rf_sync_component_caches(RF_Raid_t *raidPtr)
3732 {
3733 int c, sparecol;
3734 int e,error;
3735 int force = 1;
3736
3737 error = 0;
3738 for (c = 0; c < raidPtr->numCol; c++) {
3739 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3740 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3741 &force, FWRITE, NOCRED);
3742 if (e) {
3743 if (e != ENODEV)
3744 printf("raid%d: cache flush to component %s failed.\n",
3745 raidPtr->raidid, raidPtr->Disks[c].devname);
3746 if (error == 0) {
3747 error = e;
3748 }
3749 }
3750 }
3751 }
3752
3753 for( c = 0; c < raidPtr->numSpare ; c++) {
3754 sparecol = raidPtr->numCol + c;
3755 /* Need to ensure that the reconstruct actually completed! */
3756 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3757 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3758 DIOCCACHESYNC, &force, FWRITE, NOCRED);
3759 if (e) {
3760 if (e != ENODEV)
3761 printf("raid%d: cache flush to component %s failed.\n",
3762 raidPtr->raidid, raidPtr->Disks[sparecol].devname);
3763 if (error == 0) {
3764 error = e;
3765 }
3766 }
3767 }
3768 }
3769 return error;
3770 }
3771
3772 /* Fill in info with the current status */
3773 void
3774 rf_check_recon_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3775 {
3776
3777 if (raidPtr->status != rf_rs_reconstructing) {
3778 info->total = 100;
3779 info->completed = 100;
3780 } else {
3781 info->total = raidPtr->reconControl->numRUsTotal;
3782 info->completed = raidPtr->reconControl->numRUsComplete;
3783 }
3784 info->remaining = info->total - info->completed;
3785 }
3786
3787 /* Fill in info with the current status */
3788 void
3789 rf_check_parityrewrite_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3790 {
3791
3792 if (raidPtr->parity_rewrite_in_progress == 1) {
3793 info->total = raidPtr->Layout.numStripe;
3794 info->completed = raidPtr->parity_rewrite_stripes_done;
3795 } else {
3796 info->completed = 100;
3797 info->total = 100;
3798 }
3799 info->remaining = info->total - info->completed;
3800 }
3801
3802 /* Fill in info with the current status */
3803 void
3804 rf_check_copyback_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3805 {
3806
3807 if (raidPtr->copyback_in_progress == 1) {
3808 info->total = raidPtr->Layout.numStripe;
3809 info->completed = raidPtr->copyback_stripes_done;
3810 info->remaining = info->total - info->completed;
3811 } else {
3812 info->remaining = 0;
3813 info->completed = 100;
3814 info->total = 100;
3815 }
3816 }
3817
3818 /* Fill in config with the current info */
3819 int
3820 rf_get_info(RF_Raid_t *raidPtr, RF_DeviceConfig_t *config)
3821 {
3822 int d, i, j;
3823
3824 if (!raidPtr->valid)
3825 return (ENODEV);
3826 config->cols = raidPtr->numCol;
3827 config->ndevs = raidPtr->numCol;
3828 if (config->ndevs >= RF_MAX_DISKS)
3829 return (ENOMEM);
3830 config->nspares = raidPtr->numSpare;
3831 if (config->nspares >= RF_MAX_DISKS)
3832 return (ENOMEM);
3833 config->maxqdepth = raidPtr->maxQueueDepth;
3834 d = 0;
3835 for (j = 0; j < config->cols; j++) {
3836 config->devs[d] = raidPtr->Disks[j];
3837 d++;
3838 }
3839 for (j = config->cols, i = 0; i < config->nspares; i++, j++) {
3840 config->spares[i] = raidPtr->Disks[j];
3841 if (config->spares[i].status == rf_ds_rebuilding_spare) {
3842 /* XXX: raidctl(8) expects to see this as a used spare */
3843 config->spares[i].status = rf_ds_used_spare;
3844 }
3845 }
3846 return 0;
3847 }
3848
3849 int
3850 rf_get_component_label(RF_Raid_t *raidPtr, void *data)
3851 {
3852 RF_ComponentLabel_t *clabel = (RF_ComponentLabel_t *)data;
3853 RF_ComponentLabel_t *raid_clabel;
3854 int column = clabel->column;
3855
3856 if ((column < 0) || (column >= raidPtr->numCol + raidPtr->numSpare))
3857 return EINVAL;
3858 raid_clabel = raidget_component_label(raidPtr, column);
3859 memcpy(clabel, raid_clabel, sizeof *clabel);
3860
3861 return 0;
3862 }
3863
3864 /*
3865 * Module interface
3866 */
3867
3868 MODULE(MODULE_CLASS_DRIVER, raid, "dk_subr,bufq_fcfs");
3869
3870 #ifdef _MODULE
3871 CFDRIVER_DECL(raid, DV_DISK, NULL);
3872 #endif
3873
3874 static int raid_modcmd(modcmd_t, void *);
3875 static int raid_modcmd_init(void);
3876 static int raid_modcmd_fini(void);
3877
3878 static int
3879 raid_modcmd(modcmd_t cmd, void *data)
3880 {
3881 int error;
3882
3883 error = 0;
3884 switch (cmd) {
3885 case MODULE_CMD_INIT:
3886 error = raid_modcmd_init();
3887 break;
3888 case MODULE_CMD_FINI:
3889 error = raid_modcmd_fini();
3890 break;
3891 default:
3892 error = ENOTTY;
3893 break;
3894 }
3895 return error;
3896 }
3897
3898 static int
3899 raid_modcmd_init(void)
3900 {
3901 int error;
3902 int bmajor, cmajor;
3903
3904 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
3905 mutex_enter(&raid_lock);
3906 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
3907 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
3908 rf_init_cond2(rf_sparet_wait_cv, "sparetw");
3909 rf_init_cond2(rf_sparet_resp_cv, "rfgst");
3910
3911 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
3912 #endif
3913
3914 bmajor = cmajor = -1;
3915 error = devsw_attach("raid", &raid_bdevsw, &bmajor,
3916 &raid_cdevsw, &cmajor);
3917 if (error != 0 && error != EEXIST) {
3918 aprint_error("%s: devsw_attach failed %d\n", __func__, error);
3919 mutex_exit(&raid_lock);
3920 return error;
3921 }
3922 #ifdef _MODULE
3923 error = config_cfdriver_attach(&raid_cd);
3924 if (error != 0) {
3925 aprint_error("%s: config_cfdriver_attach failed %d\n",
3926 __func__, error);
3927 devsw_detach(&raid_bdevsw, &raid_cdevsw);
3928 mutex_exit(&raid_lock);
3929 return error;
3930 }
3931 #endif
3932 error = config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3933 if (error != 0) {
3934 aprint_error("%s: config_cfattach_attach failed %d\n",
3935 __func__, error);
3936 #ifdef _MODULE
3937 config_cfdriver_detach(&raid_cd);
3938 #endif
3939 devsw_detach(&raid_bdevsw, &raid_cdevsw);
3940 mutex_exit(&raid_lock);
3941 return error;
3942 }
3943
3944 raidautoconfigdone = false;
3945
3946 mutex_exit(&raid_lock);
3947
3948 if (error == 0) {
3949 if (rf_BootRaidframe(true) == 0)
3950 aprint_verbose("Kernelized RAIDframe activated\n");
3951 else
3952 panic("Serious error activating RAID!!");
3953 }
3954
3955 /*
3956 * Register a finalizer which will be used to auto-config RAID
3957 * sets once all real hardware devices have been found.
3958 */
3959 error = config_finalize_register(NULL, rf_autoconfig);
3960 if (error != 0) {
3961 aprint_error("WARNING: unable to register RAIDframe "
3962 "finalizer\n");
3963 error = 0;
3964 }
3965
3966 return error;
3967 }
3968
3969 static int
3970 raid_modcmd_fini(void)
3971 {
3972 int error;
3973
3974 mutex_enter(&raid_lock);
3975
3976 /* Don't allow unload if raid device(s) exist. */
3977 if (!LIST_EMPTY(&raids)) {
3978 mutex_exit(&raid_lock);
3979 return EBUSY;
3980 }
3981
3982 error = config_cfattach_detach(raid_cd.cd_name, &raid_ca);
3983 if (error != 0) {
3984 aprint_error("%s: cannot detach cfattach\n",__func__);
3985 mutex_exit(&raid_lock);
3986 return error;
3987 }
3988 #ifdef _MODULE
3989 error = config_cfdriver_detach(&raid_cd);
3990 if (error != 0) {
3991 aprint_error("%s: cannot detach cfdriver\n",__func__);
3992 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3993 mutex_exit(&raid_lock);
3994 return error;
3995 }
3996 #endif
3997 error = devsw_detach(&raid_bdevsw, &raid_cdevsw);
3998 if (error != 0) {
3999 aprint_error("%s: cannot detach devsw\n",__func__);
4000 #ifdef _MODULE
4001 config_cfdriver_attach(&raid_cd);
4002 #endif
4003 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
4004 mutex_exit(&raid_lock);
4005 return error;
4006 }
4007 rf_BootRaidframe(false);
4008 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
4009 rf_destroy_mutex2(rf_sparet_wait_mutex);
4010 rf_destroy_cond2(rf_sparet_wait_cv);
4011 rf_destroy_cond2(rf_sparet_resp_cv);
4012 #endif
4013 mutex_exit(&raid_lock);
4014 mutex_destroy(&raid_lock);
4015
4016 return error;
4017 }
4018