rf_netbsdkintf.c revision 1.366 1 /* $NetBSD: rf_netbsdkintf.c,v 1.366 2019/02/05 17:13:37 christos Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: cd.c 1.6 90/11/28$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
89 * School of Computer Science
90 * Carnegie Mellon University
91 * Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97 /***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.366 2019/02/05 17:13:37 christos Exp $");
105
106 #ifdef _KERNEL_OPT
107 #include "opt_raid_autoconfig.h"
108 #include "opt_compat_netbsd32.h"
109 #endif
110
111 #include <sys/param.h>
112 #include <sys/errno.h>
113 #include <sys/pool.h>
114 #include <sys/proc.h>
115 #include <sys/queue.h>
116 #include <sys/disk.h>
117 #include <sys/device.h>
118 #include <sys/stat.h>
119 #include <sys/ioctl.h>
120 #include <sys/fcntl.h>
121 #include <sys/systm.h>
122 #include <sys/vnode.h>
123 #include <sys/disklabel.h>
124 #include <sys/conf.h>
125 #include <sys/buf.h>
126 #include <sys/bufq.h>
127 #include <sys/reboot.h>
128 #include <sys/kauth.h>
129 #include <sys/module.h>
130 #include <sys/compat_stub.h>
131
132 #include <prop/proplib.h>
133
134 #include <dev/raidframe/raidframevar.h>
135 #include <dev/raidframe/raidframeio.h>
136 #include <dev/raidframe/rf_paritymap.h>
137
138 #include "rf_raid.h"
139 #include "rf_copyback.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_desc.h"
143 #include "rf_diskqueue.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_kintf.h"
147 #include "rf_options.h"
148 #include "rf_driver.h"
149 #include "rf_parityscan.h"
150 #include "rf_threadstuff.h"
151
152 #include "rf_compat50.h"
153 #include "rf_compat80.h"
154
155 #ifdef COMPAT_NETBSD32
156 #ifdef _LP64
157 #include "rf_compat32.h"
158 #define RAID_COMPAT32
159 #endif
160 #endif
161
162 #include "ioconf.h"
163
164 #ifdef DEBUG
165 int rf_kdebug_level = 0;
166 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
167 #else /* DEBUG */
168 #define db1_printf(a) { }
169 #endif /* DEBUG */
170
171 #ifdef DEBUG_ROOT
172 #define DPRINTF(a, ...) printf(a, __VA_ARGS__)
173 #else
174 #define DPRINTF(a, ...)
175 #endif
176
177 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
178 static rf_declare_mutex2(rf_sparet_wait_mutex);
179 static rf_declare_cond2(rf_sparet_wait_cv);
180 static rf_declare_cond2(rf_sparet_resp_cv);
181
182 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
183 * spare table */
184 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
185 * installation process */
186 #endif
187
188 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
189
190 /* prototypes */
191 static void KernelWakeupFunc(struct buf *);
192 static void InitBP(struct buf *, struct vnode *, unsigned,
193 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
194 void *, int, struct proc *);
195 struct raid_softc;
196 static void raidinit(struct raid_softc *);
197 static int raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp);
198 static int rf_get_component_caches(RF_Raid_t *raidPtr, int *);
199
200 static int raid_match(device_t, cfdata_t, void *);
201 static void raid_attach(device_t, device_t, void *);
202 static int raid_detach(device_t, int);
203
204 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
205 daddr_t, daddr_t);
206 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
207 daddr_t, daddr_t, int);
208
209 static int raidwrite_component_label(unsigned,
210 dev_t, struct vnode *, RF_ComponentLabel_t *);
211 static int raidread_component_label(unsigned,
212 dev_t, struct vnode *, RF_ComponentLabel_t *);
213
214 static int raid_diskstart(device_t, struct buf *bp);
215 static int raid_dumpblocks(device_t, void *, daddr_t, int);
216 static int raid_lastclose(device_t);
217
218 static dev_type_open(raidopen);
219 static dev_type_close(raidclose);
220 static dev_type_read(raidread);
221 static dev_type_write(raidwrite);
222 static dev_type_ioctl(raidioctl);
223 static dev_type_strategy(raidstrategy);
224 static dev_type_dump(raiddump);
225 static dev_type_size(raidsize);
226
227 const struct bdevsw raid_bdevsw = {
228 .d_open = raidopen,
229 .d_close = raidclose,
230 .d_strategy = raidstrategy,
231 .d_ioctl = raidioctl,
232 .d_dump = raiddump,
233 .d_psize = raidsize,
234 .d_discard = nodiscard,
235 .d_flag = D_DISK
236 };
237
238 const struct cdevsw raid_cdevsw = {
239 .d_open = raidopen,
240 .d_close = raidclose,
241 .d_read = raidread,
242 .d_write = raidwrite,
243 .d_ioctl = raidioctl,
244 .d_stop = nostop,
245 .d_tty = notty,
246 .d_poll = nopoll,
247 .d_mmap = nommap,
248 .d_kqfilter = nokqfilter,
249 .d_discard = nodiscard,
250 .d_flag = D_DISK
251 };
252
253 static struct dkdriver rf_dkdriver = {
254 .d_open = raidopen,
255 .d_close = raidclose,
256 .d_strategy = raidstrategy,
257 .d_diskstart = raid_diskstart,
258 .d_dumpblocks = raid_dumpblocks,
259 .d_lastclose = raid_lastclose,
260 .d_minphys = minphys
261 };
262
263 struct raid_softc {
264 struct dk_softc sc_dksc;
265 int sc_unit;
266 int sc_flags; /* flags */
267 int sc_cflags; /* configuration flags */
268 kmutex_t sc_mutex; /* interlock mutex */
269 kcondvar_t sc_cv; /* and the condvar */
270 uint64_t sc_size; /* size of the raid device */
271 char sc_xname[20]; /* XXX external name */
272 RF_Raid_t sc_r;
273 LIST_ENTRY(raid_softc) sc_link;
274 };
275 /* sc_flags */
276 #define RAIDF_INITED 0x01 /* unit has been initialized */
277 #define RAIDF_SHUTDOWN 0x02 /* unit is being shutdown */
278 #define RAIDF_DETACH 0x04 /* detach after final close */
279 #define RAIDF_WANTED 0x08 /* someone waiting to obtain a lock */
280 #define RAIDF_LOCKED 0x10 /* unit is locked */
281 #define RAIDF_UNIT_CHANGED 0x20 /* unit is being changed */
282
283 #define raidunit(x) DISKUNIT(x)
284 #define raidsoftc(dev) (((struct raid_softc *)device_private(dev))->sc_r.softc)
285
286 extern struct cfdriver raid_cd;
287 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
288 raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
289 DVF_DETACH_SHUTDOWN);
290
291 /* Internal representation of a rf_recon_req */
292 struct rf_recon_req_internal {
293 RF_RowCol_t col;
294 RF_ReconReqFlags_t flags;
295 void *raidPtr;
296 };
297
298 /*
299 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
300 * Be aware that large numbers can allow the driver to consume a lot of
301 * kernel memory, especially on writes, and in degraded mode reads.
302 *
303 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
304 * a single 64K write will typically require 64K for the old data,
305 * 64K for the old parity, and 64K for the new parity, for a total
306 * of 192K (if the parity buffer is not re-used immediately).
307 * Even it if is used immediately, that's still 128K, which when multiplied
308 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
309 *
310 * Now in degraded mode, for example, a 64K read on the above setup may
311 * require data reconstruction, which will require *all* of the 4 remaining
312 * disks to participate -- 4 * 32K/disk == 128K again.
313 */
314
315 #ifndef RAIDOUTSTANDING
316 #define RAIDOUTSTANDING 6
317 #endif
318
319 #define RAIDLABELDEV(dev) \
320 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
321
322 /* declared here, and made public, for the benefit of KVM stuff.. */
323
324 static int raidlock(struct raid_softc *);
325 static void raidunlock(struct raid_softc *);
326
327 static int raid_detach_unlocked(struct raid_softc *);
328
329 static void rf_markalldirty(RF_Raid_t *);
330 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
331
332 void rf_ReconThread(struct rf_recon_req_internal *);
333 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
334 void rf_CopybackThread(RF_Raid_t *raidPtr);
335 void rf_ReconstructInPlaceThread(struct rf_recon_req_internal *);
336 int rf_autoconfig(device_t);
337 void rf_buildroothack(RF_ConfigSet_t *);
338
339 RF_AutoConfig_t *rf_find_raid_components(void);
340 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
341 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
342 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
343 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
344 int rf_set_autoconfig(RF_Raid_t *, int);
345 int rf_set_rootpartition(RF_Raid_t *, int);
346 void rf_release_all_vps(RF_ConfigSet_t *);
347 void rf_cleanup_config_set(RF_ConfigSet_t *);
348 int rf_have_enough_components(RF_ConfigSet_t *);
349 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
350 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
351
352 /*
353 * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
354 * Note that this is overridden by having RAID_AUTOCONFIG as an option
355 * in the kernel config file.
356 */
357 #ifdef RAID_AUTOCONFIG
358 int raidautoconfig = 1;
359 #else
360 int raidautoconfig = 0;
361 #endif
362 static bool raidautoconfigdone = false;
363
364 struct RF_Pools_s rf_pools;
365
366 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
367 static kmutex_t raid_lock;
368
369 static struct raid_softc *
370 raidcreate(int unit) {
371 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
372 sc->sc_unit = unit;
373 cv_init(&sc->sc_cv, "raidunit");
374 mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_NONE);
375 return sc;
376 }
377
378 static void
379 raiddestroy(struct raid_softc *sc) {
380 cv_destroy(&sc->sc_cv);
381 mutex_destroy(&sc->sc_mutex);
382 kmem_free(sc, sizeof(*sc));
383 }
384
385 static struct raid_softc *
386 raidget(int unit, bool create) {
387 struct raid_softc *sc;
388 if (unit < 0) {
389 #ifdef DIAGNOSTIC
390 panic("%s: unit %d!", __func__, unit);
391 #endif
392 return NULL;
393 }
394 mutex_enter(&raid_lock);
395 LIST_FOREACH(sc, &raids, sc_link) {
396 if (sc->sc_unit == unit) {
397 mutex_exit(&raid_lock);
398 return sc;
399 }
400 }
401 mutex_exit(&raid_lock);
402 if (!create)
403 return NULL;
404 if ((sc = raidcreate(unit)) == NULL)
405 return NULL;
406 mutex_enter(&raid_lock);
407 LIST_INSERT_HEAD(&raids, sc, sc_link);
408 mutex_exit(&raid_lock);
409 return sc;
410 }
411
412 static void
413 raidput(struct raid_softc *sc) {
414 mutex_enter(&raid_lock);
415 LIST_REMOVE(sc, sc_link);
416 mutex_exit(&raid_lock);
417 raiddestroy(sc);
418 }
419
420 void
421 raidattach(int num)
422 {
423
424 /*
425 * Device attachment and associated initialization now occurs
426 * as part of the module initialization.
427 */
428 }
429
430 int
431 rf_autoconfig(device_t self)
432 {
433 RF_AutoConfig_t *ac_list;
434 RF_ConfigSet_t *config_sets;
435
436 if (!raidautoconfig || raidautoconfigdone == true)
437 return (0);
438
439 /* XXX This code can only be run once. */
440 raidautoconfigdone = true;
441
442 #ifdef __HAVE_CPU_BOOTCONF
443 /*
444 * 0. find the boot device if needed first so we can use it later
445 * this needs to be done before we autoconfigure any raid sets,
446 * because if we use wedges we are not going to be able to open
447 * the boot device later
448 */
449 if (booted_device == NULL)
450 cpu_bootconf();
451 #endif
452 /* 1. locate all RAID components on the system */
453 aprint_debug("Searching for RAID components...\n");
454 ac_list = rf_find_raid_components();
455
456 /* 2. Sort them into their respective sets. */
457 config_sets = rf_create_auto_sets(ac_list);
458
459 /*
460 * 3. Evaluate each set and configure the valid ones.
461 * This gets done in rf_buildroothack().
462 */
463 rf_buildroothack(config_sets);
464
465 return 1;
466 }
467
468 static int
469 rf_containsboot(RF_Raid_t *r, device_t bdv) {
470 const char *bootname;
471 size_t len;
472
473 /* if bdv is NULL, the set can't contain it. exit early. */
474 if (bdv == NULL)
475 return 0;
476
477 bootname = device_xname(bdv);
478 len = strlen(bootname);
479
480 for (int col = 0; col < r->numCol; col++) {
481 const char *devname = r->Disks[col].devname;
482 devname += sizeof("/dev/") - 1;
483 if (strncmp(devname, "dk", 2) == 0) {
484 const char *parent =
485 dkwedge_get_parent_name(r->Disks[col].dev);
486 if (parent != NULL)
487 devname = parent;
488 }
489 if (strncmp(devname, bootname, len) == 0) {
490 struct raid_softc *sc = r->softc;
491 aprint_debug("raid%d includes boot device %s\n",
492 sc->sc_unit, devname);
493 return 1;
494 }
495 }
496 return 0;
497 }
498
499 void
500 rf_buildroothack(RF_ConfigSet_t *config_sets)
501 {
502 RF_ConfigSet_t *cset;
503 RF_ConfigSet_t *next_cset;
504 int num_root;
505 struct raid_softc *sc, *rsc;
506 struct dk_softc *dksc;
507
508 sc = rsc = NULL;
509 num_root = 0;
510 cset = config_sets;
511 while (cset != NULL) {
512 next_cset = cset->next;
513 if (rf_have_enough_components(cset) &&
514 cset->ac->clabel->autoconfigure == 1) {
515 sc = rf_auto_config_set(cset);
516 if (sc != NULL) {
517 aprint_debug("raid%d: configured ok, rootable %d\n",
518 sc->sc_unit, cset->rootable);
519 if (cset->rootable) {
520 rsc = sc;
521 num_root++;
522 }
523 } else {
524 /* The autoconfig didn't work :( */
525 aprint_debug("Autoconfig failed\n");
526 rf_release_all_vps(cset);
527 }
528 } else {
529 /* we're not autoconfiguring this set...
530 release the associated resources */
531 rf_release_all_vps(cset);
532 }
533 /* cleanup */
534 rf_cleanup_config_set(cset);
535 cset = next_cset;
536 }
537 dksc = &rsc->sc_dksc;
538
539 /* if the user has specified what the root device should be
540 then we don't touch booted_device or boothowto... */
541
542 if (rootspec != NULL) {
543 DPRINTF("%s: rootspec %s\n", __func__, rootspec);
544 return;
545 }
546
547 /* we found something bootable... */
548
549 /*
550 * XXX: The following code assumes that the root raid
551 * is the first ('a') partition. This is about the best
552 * we can do with a BSD disklabel, but we might be able
553 * to do better with a GPT label, by setting a specified
554 * attribute to indicate the root partition. We can then
555 * stash the partition number in the r->root_partition
556 * high bits (the bottom 2 bits are already used). For
557 * now we just set booted_partition to 0 when we override
558 * root.
559 */
560 if (num_root == 1) {
561 device_t candidate_root;
562 if (dksc->sc_dkdev.dk_nwedges != 0) {
563 char cname[sizeof(cset->ac->devname)];
564 /* XXX: assume partition 'a' first */
565 snprintf(cname, sizeof(cname), "%s%c",
566 device_xname(dksc->sc_dev), 'a');
567 candidate_root = dkwedge_find_by_wname(cname);
568 DPRINTF("%s: candidate wedge root=%s\n", __func__,
569 cname);
570 if (candidate_root == NULL) {
571 /*
572 * If that is not found, because we don't use
573 * disklabel, return the first dk child
574 * XXX: we can skip the 'a' check above
575 * and always do this...
576 */
577 size_t i = 0;
578 candidate_root = dkwedge_find_by_parent(
579 device_xname(dksc->sc_dev), &i);
580 }
581 DPRINTF("%s: candidate wedge root=%p\n", __func__,
582 candidate_root);
583 } else
584 candidate_root = dksc->sc_dev;
585 DPRINTF("%s: candidate root=%p\n", __func__, candidate_root);
586 DPRINTF("%s: booted_device=%p root_partition=%d "
587 "contains_boot=%d",
588 __func__, booted_device, rsc->sc_r.root_partition,
589 rf_containsboot(&rsc->sc_r, booted_device));
590 /* XXX the check for booted_device == NULL can probably be
591 * dropped, now that rf_containsboot handles that case.
592 */
593 if (booted_device == NULL ||
594 rsc->sc_r.root_partition == 1 ||
595 rf_containsboot(&rsc->sc_r, booted_device)) {
596 booted_device = candidate_root;
597 booted_method = "raidframe/single";
598 booted_partition = 0; /* XXX assume 'a' */
599 }
600 } else if (num_root > 1) {
601 DPRINTF("%s: many roots=%d, %p\n", __func__, num_root,
602 booted_device);
603
604 /*
605 * Maybe the MD code can help. If it cannot, then
606 * setroot() will discover that we have no
607 * booted_device and will ask the user if nothing was
608 * hardwired in the kernel config file
609 */
610 if (booted_device == NULL)
611 return;
612
613 num_root = 0;
614 mutex_enter(&raid_lock);
615 LIST_FOREACH(sc, &raids, sc_link) {
616 RF_Raid_t *r = &sc->sc_r;
617 if (r->valid == 0)
618 continue;
619
620 if (r->root_partition == 0)
621 continue;
622
623 if (rf_containsboot(r, booted_device)) {
624 num_root++;
625 rsc = sc;
626 dksc = &rsc->sc_dksc;
627 }
628 }
629 mutex_exit(&raid_lock);
630
631 if (num_root == 1) {
632 booted_device = dksc->sc_dev;
633 booted_method = "raidframe/multi";
634 booted_partition = 0; /* XXX assume 'a' */
635 } else {
636 /* we can't guess.. require the user to answer... */
637 boothowto |= RB_ASKNAME;
638 }
639 }
640 }
641
642 static int
643 raidsize(dev_t dev)
644 {
645 struct raid_softc *rs;
646 struct dk_softc *dksc;
647 unsigned int unit;
648
649 unit = raidunit(dev);
650 if ((rs = raidget(unit, false)) == NULL)
651 return -1;
652 dksc = &rs->sc_dksc;
653
654 if ((rs->sc_flags & RAIDF_INITED) == 0)
655 return -1;
656
657 return dk_size(dksc, dev);
658 }
659
660 static int
661 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
662 {
663 unsigned int unit;
664 struct raid_softc *rs;
665 struct dk_softc *dksc;
666
667 unit = raidunit(dev);
668 if ((rs = raidget(unit, false)) == NULL)
669 return ENXIO;
670 dksc = &rs->sc_dksc;
671
672 if ((rs->sc_flags & RAIDF_INITED) == 0)
673 return ENODEV;
674
675 /*
676 Note that blkno is relative to this particular partition.
677 By adding adding RF_PROTECTED_SECTORS, we get a value that
678 is relative to the partition used for the underlying component.
679 */
680 blkno += RF_PROTECTED_SECTORS;
681
682 return dk_dump(dksc, dev, blkno, va, size);
683 }
684
685 static int
686 raid_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk)
687 {
688 struct raid_softc *rs = raidsoftc(dev);
689 const struct bdevsw *bdev;
690 RF_Raid_t *raidPtr;
691 int c, sparecol, j, scol, dumpto;
692 int error = 0;
693
694 raidPtr = &rs->sc_r;
695
696 /* we only support dumping to RAID 1 sets */
697 if (raidPtr->Layout.numDataCol != 1 ||
698 raidPtr->Layout.numParityCol != 1)
699 return EINVAL;
700
701 if ((error = raidlock(rs)) != 0)
702 return error;
703
704 /* figure out what device is alive.. */
705
706 /*
707 Look for a component to dump to. The preference for the
708 component to dump to is as follows:
709 1) the master
710 2) a used_spare of the master
711 3) the slave
712 4) a used_spare of the slave
713 */
714
715 dumpto = -1;
716 for (c = 0; c < raidPtr->numCol; c++) {
717 if (raidPtr->Disks[c].status == rf_ds_optimal) {
718 /* this might be the one */
719 dumpto = c;
720 break;
721 }
722 }
723
724 /*
725 At this point we have possibly selected a live master or a
726 live slave. We now check to see if there is a spared
727 master (or a spared slave), if we didn't find a live master
728 or a live slave.
729 */
730
731 for (c = 0; c < raidPtr->numSpare; c++) {
732 sparecol = raidPtr->numCol + c;
733 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
734 /* How about this one? */
735 scol = -1;
736 for(j=0;j<raidPtr->numCol;j++) {
737 if (raidPtr->Disks[j].spareCol == sparecol) {
738 scol = j;
739 break;
740 }
741 }
742 if (scol == 0) {
743 /*
744 We must have found a spared master!
745 We'll take that over anything else
746 found so far. (We couldn't have
747 found a real master before, since
748 this is a used spare, and it's
749 saying that it's replacing the
750 master.) On reboot (with
751 autoconfiguration turned on)
752 sparecol will become the 1st
753 component (component0) of this set.
754 */
755 dumpto = sparecol;
756 break;
757 } else if (scol != -1) {
758 /*
759 Must be a spared slave. We'll dump
760 to that if we havn't found anything
761 else so far.
762 */
763 if (dumpto == -1)
764 dumpto = sparecol;
765 }
766 }
767 }
768
769 if (dumpto == -1) {
770 /* we couldn't find any live components to dump to!?!?
771 */
772 error = EINVAL;
773 goto out;
774 }
775
776 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
777 if (bdev == NULL) {
778 error = ENXIO;
779 goto out;
780 }
781
782 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
783 blkno, va, nblk * raidPtr->bytesPerSector);
784
785 out:
786 raidunlock(rs);
787
788 return error;
789 }
790
791 /* ARGSUSED */
792 static int
793 raidopen(dev_t dev, int flags, int fmt,
794 struct lwp *l)
795 {
796 int unit = raidunit(dev);
797 struct raid_softc *rs;
798 struct dk_softc *dksc;
799 int error = 0;
800 int part, pmask;
801
802 if ((rs = raidget(unit, true)) == NULL)
803 return ENXIO;
804 if ((error = raidlock(rs)) != 0)
805 return (error);
806
807 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
808 error = EBUSY;
809 goto bad;
810 }
811
812 dksc = &rs->sc_dksc;
813
814 part = DISKPART(dev);
815 pmask = (1 << part);
816
817 if (!DK_BUSY(dksc, pmask) &&
818 ((rs->sc_flags & RAIDF_INITED) != 0)) {
819 /* First one... mark things as dirty... Note that we *MUST*
820 have done a configure before this. I DO NOT WANT TO BE
821 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
822 THAT THEY BELONG TOGETHER!!!!! */
823 /* XXX should check to see if we're only open for reading
824 here... If so, we needn't do this, but then need some
825 other way of keeping track of what's happened.. */
826
827 rf_markalldirty(&rs->sc_r);
828 }
829
830 if ((rs->sc_flags & RAIDF_INITED) != 0)
831 error = dk_open(dksc, dev, flags, fmt, l);
832
833 bad:
834 raidunlock(rs);
835
836 return (error);
837
838
839 }
840
841 static int
842 raid_lastclose(device_t self)
843 {
844 struct raid_softc *rs = raidsoftc(self);
845
846 /* Last one... device is not unconfigured yet.
847 Device shutdown has taken care of setting the
848 clean bits if RAIDF_INITED is not set
849 mark things as clean... */
850
851 rf_update_component_labels(&rs->sc_r,
852 RF_FINAL_COMPONENT_UPDATE);
853
854 /* pass to unlocked code */
855 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
856 rs->sc_flags |= RAIDF_DETACH;
857
858 return 0;
859 }
860
861 /* ARGSUSED */
862 static int
863 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
864 {
865 int unit = raidunit(dev);
866 struct raid_softc *rs;
867 struct dk_softc *dksc;
868 cfdata_t cf;
869 int error = 0, do_detach = 0, do_put = 0;
870
871 if ((rs = raidget(unit, false)) == NULL)
872 return ENXIO;
873 dksc = &rs->sc_dksc;
874
875 if ((error = raidlock(rs)) != 0)
876 return (error);
877
878 if ((rs->sc_flags & RAIDF_INITED) != 0) {
879 error = dk_close(dksc, dev, flags, fmt, l);
880 if ((rs->sc_flags & RAIDF_DETACH) != 0)
881 do_detach = 1;
882 } else if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
883 do_put = 1;
884
885 raidunlock(rs);
886
887 if (do_detach) {
888 /* free the pseudo device attach bits */
889 cf = device_cfdata(dksc->sc_dev);
890 error = config_detach(dksc->sc_dev, 0);
891 if (error == 0)
892 free(cf, M_RAIDFRAME);
893 } else if (do_put) {
894 raidput(rs);
895 }
896
897 return (error);
898
899 }
900
901 static void
902 raid_wakeup(RF_Raid_t *raidPtr)
903 {
904 rf_lock_mutex2(raidPtr->iodone_lock);
905 rf_signal_cond2(raidPtr->iodone_cv);
906 rf_unlock_mutex2(raidPtr->iodone_lock);
907 }
908
909 static void
910 raidstrategy(struct buf *bp)
911 {
912 unsigned int unit;
913 struct raid_softc *rs;
914 struct dk_softc *dksc;
915 RF_Raid_t *raidPtr;
916
917 unit = raidunit(bp->b_dev);
918 if ((rs = raidget(unit, false)) == NULL) {
919 bp->b_error = ENXIO;
920 goto fail;
921 }
922 if ((rs->sc_flags & RAIDF_INITED) == 0) {
923 bp->b_error = ENXIO;
924 goto fail;
925 }
926 dksc = &rs->sc_dksc;
927 raidPtr = &rs->sc_r;
928
929 /* Queue IO only */
930 if (dk_strategy_defer(dksc, bp))
931 goto done;
932
933 /* schedule the IO to happen at the next convenient time */
934 raid_wakeup(raidPtr);
935
936 done:
937 return;
938
939 fail:
940 bp->b_resid = bp->b_bcount;
941 biodone(bp);
942 }
943
944 static int
945 raid_diskstart(device_t dev, struct buf *bp)
946 {
947 struct raid_softc *rs = raidsoftc(dev);
948 RF_Raid_t *raidPtr;
949
950 raidPtr = &rs->sc_r;
951 if (!raidPtr->valid) {
952 db1_printf(("raid is not valid..\n"));
953 return ENODEV;
954 }
955
956 /* XXX */
957 bp->b_resid = 0;
958
959 return raiddoaccess(raidPtr, bp);
960 }
961
962 void
963 raiddone(RF_Raid_t *raidPtr, struct buf *bp)
964 {
965 struct raid_softc *rs;
966 struct dk_softc *dksc;
967
968 rs = raidPtr->softc;
969 dksc = &rs->sc_dksc;
970
971 dk_done(dksc, bp);
972
973 rf_lock_mutex2(raidPtr->mutex);
974 raidPtr->openings++;
975 rf_unlock_mutex2(raidPtr->mutex);
976
977 /* schedule more IO */
978 raid_wakeup(raidPtr);
979 }
980
981 /* ARGSUSED */
982 static int
983 raidread(dev_t dev, struct uio *uio, int flags)
984 {
985 int unit = raidunit(dev);
986 struct raid_softc *rs;
987
988 if ((rs = raidget(unit, false)) == NULL)
989 return ENXIO;
990
991 if ((rs->sc_flags & RAIDF_INITED) == 0)
992 return (ENXIO);
993
994 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
995
996 }
997
998 /* ARGSUSED */
999 static int
1000 raidwrite(dev_t dev, struct uio *uio, int flags)
1001 {
1002 int unit = raidunit(dev);
1003 struct raid_softc *rs;
1004
1005 if ((rs = raidget(unit, false)) == NULL)
1006 return ENXIO;
1007
1008 if ((rs->sc_flags & RAIDF_INITED) == 0)
1009 return (ENXIO);
1010
1011 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
1012
1013 }
1014
1015 static int
1016 raid_detach_unlocked(struct raid_softc *rs)
1017 {
1018 struct dk_softc *dksc = &rs->sc_dksc;
1019 RF_Raid_t *raidPtr;
1020 int error;
1021
1022 raidPtr = &rs->sc_r;
1023
1024 if (DK_BUSY(dksc, 0) ||
1025 raidPtr->recon_in_progress != 0 ||
1026 raidPtr->parity_rewrite_in_progress != 0 ||
1027 raidPtr->copyback_in_progress != 0)
1028 return EBUSY;
1029
1030 if ((rs->sc_flags & RAIDF_INITED) == 0)
1031 return 0;
1032
1033 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1034
1035 if ((error = rf_Shutdown(raidPtr)) != 0)
1036 return error;
1037
1038 rs->sc_flags &= ~RAIDF_INITED;
1039
1040 /* Kill off any queued buffers */
1041 dk_drain(dksc);
1042 bufq_free(dksc->sc_bufq);
1043
1044 /* Detach the disk. */
1045 dkwedge_delall(&dksc->sc_dkdev);
1046 disk_detach(&dksc->sc_dkdev);
1047 disk_destroy(&dksc->sc_dkdev);
1048 dk_detach(dksc);
1049
1050 return 0;
1051 }
1052
1053 static bool
1054 rf_must_be_initialized(const struct raid_softc *rs, u_long cmd)
1055 {
1056 switch (cmd) {
1057 case RAIDFRAME_ADD_HOT_SPARE:
1058 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1059 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1060 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT80:
1061 case RAIDFRAME_CHECK_PARITY:
1062 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1063 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1064 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT80:
1065 case RAIDFRAME_CHECK_RECON_STATUS:
1066 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1067 case RAIDFRAME_CHECK_RECON_STATUS_EXT80:
1068 case RAIDFRAME_COPYBACK:
1069 case RAIDFRAME_DELETE_COMPONENT:
1070 case RAIDFRAME_FAIL_DISK:
1071 case RAIDFRAME_FAIL_DISK80:
1072 case RAIDFRAME_GET_ACCTOTALS:
1073 case RAIDFRAME_GET_COMPONENT_LABEL:
1074 case RAIDFRAME_GET_COMPONENT_LABEL80:
1075 case RAIDFRAME_GET_INFO:
1076 #ifdef RAID_COMPAT32
1077 case RAIDFRAME_GET_INFO32:
1078 #endif
1079 case RAIDFRAME_GET_INFO50:
1080 case RAIDFRAME_GET_INFO80:
1081 case RAIDFRAME_GET_SIZE:
1082 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1083 case RAIDFRAME_INIT_LABELS:
1084 case RAIDFRAME_KEEP_ACCTOTALS:
1085 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1086 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1087 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1088 case RAIDFRAME_PARITYMAP_STATUS:
1089 case RAIDFRAME_REBUILD_IN_PLACE:
1090 case RAIDFRAME_REMOVE_HOT_SPARE:
1091 case RAIDFRAME_RESET_ACCTOTALS:
1092 case RAIDFRAME_REWRITEPARITY:
1093 case RAIDFRAME_SET_AUTOCONFIG:
1094 case RAIDFRAME_SET_COMPONENT_LABEL:
1095 case RAIDFRAME_SET_ROOT:
1096 return (rs->sc_flags & RAIDF_INITED) != 0;
1097 }
1098 return false;
1099 }
1100
1101 /*
1102 * Really this should be done as part of the default in the ioctl
1103 * switch like other compat code, but it is too messy to do that
1104 * right now, so we list all the compat ioctls we know about,
1105 * and load appropriately.
1106 *
1107 * XXX[1] what about combinations of compat32 and compat80 ioctls?
1108 * XXX[2] what about autoloading the compat32 code? Is there a compat32
1109 * ioctl module? Should there be one?
1110 */
1111 static int
1112 rf_handle_compat(struct raid_softc *rs, int unit, u_long cmd, void *data,
1113 RF_Config_t **k_cfg)
1114 {
1115 RF_Raid_t *raidPtr = &rs->sc_r;
1116 int retcode = EPASSTHROUGH;
1117 switch (cmd) {
1118 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT80:
1119 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT80:
1120 case RAIDFRAME_CHECK_RECON_STATUS_EXT80:
1121 case RAIDFRAME_CONFIGURE80:
1122 case RAIDFRAME_FAIL_DISK80:
1123 case RAIDFRAME_GET_COMPONENT_LABEL80:
1124 case RAIDFRAME_GET_INFO80:
1125 module_autoload("compat_raid_80", MODULE_CLASS_EXEC);
1126 MODULE_CALL_HOOK(raidframe_ioctl_80_hook, (cmd,
1127 (rs->sc_flags & RAIDF_INITED), raidPtr, unit, data, k_cfg),
1128 enosys(), retcode);
1129 break;
1130 case RAIDFRAME_CONFIGURE50:
1131 case RAIDFRAME_GET_INFO50:
1132 module_autoload("compat_raid_50", MODULE_CLASS_EXEC);
1133 MODULE_CALL_HOOK(raidframe_ioctl_50_hook, (cmd,
1134 (rs->sc_flags & RAIDF_INITED), raidPtr, unit, data, k_cfg),
1135 enosys(), retcode);
1136 break;
1137 default:
1138 break;
1139 }
1140 return retcode;
1141 }
1142
1143 int
1144 rf_fail_disk(RF_Raid_t *raidPtr, struct rf_recon_req *rr)
1145 {
1146 struct rf_recon_req_internal *rrint;
1147
1148 if (raidPtr->Layout.map->faultsTolerated == 0) {
1149 /* Can't do this on a RAID 0!! */
1150 return EINVAL;
1151 }
1152
1153 if (rr->col < 0 || rr->col >= raidPtr->numCol) {
1154 /* bad column */
1155 return EINVAL;
1156 }
1157
1158 rf_lock_mutex2(raidPtr->mutex);
1159 if (raidPtr->status == rf_rs_reconstructing) {
1160 /* you can't fail a disk while we're reconstructing! */
1161 /* XXX wrong for RAID6 */
1162 goto out;
1163 }
1164 if ((raidPtr->Disks[rr->col].status == rf_ds_optimal) &&
1165 (raidPtr->numFailures > 0)) {
1166 /* some other component has failed. Let's not make
1167 things worse. XXX wrong for RAID6 */
1168 goto out;
1169 }
1170 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1171 /* Can't fail a spared disk! */
1172 goto out;
1173 }
1174 rf_unlock_mutex2(raidPtr->mutex);
1175
1176 /* make a copy of the recon request so that we don't rely on
1177 * the user's buffer */
1178 RF_Malloc(rrint, sizeof(*rrint), (struct rf_recon_req_internal *));
1179 if (rrint == NULL)
1180 return(ENOMEM);
1181 rrint->col = rr->col;
1182 rrint->flags = rr->flags;
1183 rrint->raidPtr = raidPtr;
1184
1185 return RF_CREATE_THREAD(raidPtr->recon_thread, rf_ReconThread,
1186 rrint, "raid_recon");
1187 out:
1188 rf_unlock_mutex2(raidPtr->mutex);
1189 return EINVAL;
1190 }
1191
1192 static int
1193 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1194 {
1195 int unit = raidunit(dev);
1196 int error = 0;
1197 int part, pmask;
1198 struct raid_softc *rs;
1199 struct dk_softc *dksc;
1200 RF_Config_t *k_cfg, *u_cfg;
1201 RF_Raid_t *raidPtr;
1202 RF_RaidDisk_t *diskPtr;
1203 RF_AccTotals_t *totals;
1204 RF_DeviceConfig_t *d_cfg, *ucfgp = data;
1205 u_char *specific_buf;
1206 int retcode = 0;
1207 int column;
1208 /* int raidid; */
1209 struct rf_recon_req_internal *rrint;
1210 RF_ComponentLabel_t *clabel;
1211 RF_ComponentLabel_t *ci_label;
1212 RF_SingleComponent_t *sparePtr,*componentPtr;
1213 RF_SingleComponent_t component;
1214 int d;
1215
1216 if ((rs = raidget(unit, false)) == NULL)
1217 return ENXIO;
1218
1219 dksc = &rs->sc_dksc;
1220 raidPtr = &rs->sc_r;
1221
1222 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1223 (int) DISKPART(dev), (int) unit, cmd));
1224
1225 /* Must be initialized for these... */
1226 if (rf_must_be_initialized(rs, cmd))
1227 return ENXIO;
1228
1229 switch (retcode = rf_handle_compat(rs, unit, cmd, data, &k_cfg)) {
1230 case EPASSTHROUGH:
1231 /* Not compat, keep going */
1232 retcode = 0;
1233 break;
1234 case EAGAIN:
1235 goto config;
1236 default:
1237 /* compat but could not handle it or load the module */
1238 return retcode;
1239 }
1240
1241 switch (cmd) {
1242 /* configure the system */
1243 case RAIDFRAME_CONFIGURE:
1244 #ifdef RAID_COMPAT32
1245 case RAIDFRAME_CONFIGURE32:
1246 #endif
1247 if (raidPtr->valid) {
1248 /* There is a valid RAID set running on this unit! */
1249 printf("raid%d: Device already configured!\n", unit);
1250 return(EINVAL);
1251 }
1252
1253 /* copy-in the configuration information */
1254 /* data points to a pointer to the configuration structure */
1255
1256 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1257 if (k_cfg == NULL) {
1258 return (ENOMEM);
1259 }
1260 #ifdef RAID_COMPAT32
1261 if (cmd == RAIDFRAME_CONFIGURE32 &&
1262 (l->l_proc->p_flag & PK_32) != 0)
1263 MODULE_CALL_HOOK(raidframe_netbsd32_config_hook,
1264 (data, k_cfg), enosys(), retcode);
1265 else
1266 #endif
1267 {
1268 u_cfg = *((RF_Config_t **) data);
1269 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1270 }
1271 if (retcode) {
1272 RF_Free(k_cfg, sizeof(RF_Config_t));
1273 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1274 retcode));
1275 goto no_config;
1276 }
1277 goto config;
1278 config:
1279 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1280
1281 /* allocate a buffer for the layout-specific data, and copy it
1282 * in */
1283 if (k_cfg->layoutSpecificSize) {
1284 if (k_cfg->layoutSpecificSize > 10000) {
1285 /* sanity check */
1286 RF_Free(k_cfg, sizeof(RF_Config_t));
1287 retcode = EINVAL;
1288 goto no_config;
1289 }
1290 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1291 (u_char *));
1292 if (specific_buf == NULL) {
1293 RF_Free(k_cfg, sizeof(RF_Config_t));
1294 retcode = ENOMEM;
1295 goto no_config;
1296 }
1297 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1298 k_cfg->layoutSpecificSize);
1299 if (retcode) {
1300 RF_Free(k_cfg, sizeof(RF_Config_t));
1301 RF_Free(specific_buf,
1302 k_cfg->layoutSpecificSize);
1303 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1304 retcode));
1305 goto no_config;
1306 }
1307 } else
1308 specific_buf = NULL;
1309 k_cfg->layoutSpecific = specific_buf;
1310
1311 /* should do some kind of sanity check on the configuration.
1312 * Store the sum of all the bytes in the last byte? */
1313
1314 /* configure the system */
1315
1316 /*
1317 * Clear the entire RAID descriptor, just to make sure
1318 * there is no stale data left in the case of a
1319 * reconfiguration
1320 */
1321 memset(raidPtr, 0, sizeof(*raidPtr));
1322 raidPtr->softc = rs;
1323 raidPtr->raidid = unit;
1324
1325 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1326
1327 if (retcode == 0) {
1328
1329 /* allow this many simultaneous IO's to
1330 this RAID device */
1331 raidPtr->openings = RAIDOUTSTANDING;
1332
1333 raidinit(rs);
1334 raid_wakeup(raidPtr);
1335 rf_markalldirty(raidPtr);
1336 }
1337 /* free the buffers. No return code here. */
1338 if (k_cfg->layoutSpecificSize) {
1339 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1340 }
1341 RF_Free(k_cfg, sizeof(RF_Config_t));
1342
1343 no_config:
1344 /*
1345 * If configuration failed, set sc_flags so that we
1346 * will detach the device when we close it.
1347 */
1348 if (retcode != 0)
1349 rs->sc_flags |= RAIDF_SHUTDOWN;
1350 return (retcode);
1351
1352 /* shutdown the system */
1353 case RAIDFRAME_SHUTDOWN:
1354
1355 part = DISKPART(dev);
1356 pmask = (1 << part);
1357
1358 if ((error = raidlock(rs)) != 0)
1359 return (error);
1360
1361 if (DK_BUSY(dksc, pmask) ||
1362 raidPtr->recon_in_progress != 0 ||
1363 raidPtr->parity_rewrite_in_progress != 0 ||
1364 raidPtr->copyback_in_progress != 0)
1365 retcode = EBUSY;
1366 else {
1367 /* detach and free on close */
1368 rs->sc_flags |= RAIDF_SHUTDOWN;
1369 retcode = 0;
1370 }
1371
1372 raidunlock(rs);
1373
1374 return (retcode);
1375 case RAIDFRAME_GET_COMPONENT_LABEL:
1376 return rf_get_component_label(raidPtr, data);
1377
1378 #if 0
1379 case RAIDFRAME_SET_COMPONENT_LABEL:
1380 clabel = (RF_ComponentLabel_t *) data;
1381
1382 /* XXX check the label for valid stuff... */
1383 /* Note that some things *should not* get modified --
1384 the user should be re-initing the labels instead of
1385 trying to patch things.
1386 */
1387
1388 raidid = raidPtr->raidid;
1389 #ifdef DEBUG
1390 printf("raid%d: Got component label:\n", raidid);
1391 printf("raid%d: Version: %d\n", raidid, clabel->version);
1392 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1393 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1394 printf("raid%d: Column: %d\n", raidid, clabel->column);
1395 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1396 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1397 printf("raid%d: Status: %d\n", raidid, clabel->status);
1398 #endif /* DEBUG */
1399 clabel->row = 0;
1400 column = clabel->column;
1401
1402 if ((column < 0) || (column >= raidPtr->numCol)) {
1403 return(EINVAL);
1404 }
1405
1406 /* XXX this isn't allowed to do anything for now :-) */
1407
1408 /* XXX and before it is, we need to fill in the rest
1409 of the fields!?!?!?! */
1410 memcpy(raidget_component_label(raidPtr, column),
1411 clabel, sizeof(*clabel));
1412 raidflush_component_label(raidPtr, column);
1413 return (0);
1414 #endif /* 0 */
1415
1416 case RAIDFRAME_INIT_LABELS:
1417 clabel = (RF_ComponentLabel_t *) data;
1418 /*
1419 we only want the serial number from
1420 the above. We get all the rest of the information
1421 from the config that was used to create this RAID
1422 set.
1423 */
1424
1425 raidPtr->serial_number = clabel->serial_number;
1426
1427 for(column=0;column<raidPtr->numCol;column++) {
1428 diskPtr = &raidPtr->Disks[column];
1429 if (!RF_DEAD_DISK(diskPtr->status)) {
1430 ci_label = raidget_component_label(raidPtr,
1431 column);
1432 /* Zeroing this is important. */
1433 memset(ci_label, 0, sizeof(*ci_label));
1434 raid_init_component_label(raidPtr, ci_label);
1435 ci_label->serial_number =
1436 raidPtr->serial_number;
1437 ci_label->row = 0; /* we dont' pretend to support more */
1438 rf_component_label_set_partitionsize(ci_label,
1439 diskPtr->partitionSize);
1440 ci_label->column = column;
1441 raidflush_component_label(raidPtr, column);
1442 }
1443 /* XXXjld what about the spares? */
1444 }
1445
1446 return (retcode);
1447 case RAIDFRAME_SET_AUTOCONFIG:
1448 d = rf_set_autoconfig(raidPtr, *(int *) data);
1449 printf("raid%d: New autoconfig value is: %d\n",
1450 raidPtr->raidid, d);
1451 *(int *) data = d;
1452 return (retcode);
1453
1454 case RAIDFRAME_SET_ROOT:
1455 d = rf_set_rootpartition(raidPtr, *(int *) data);
1456 printf("raid%d: New rootpartition value is: %d\n",
1457 raidPtr->raidid, d);
1458 *(int *) data = d;
1459 return (retcode);
1460
1461 /* initialize all parity */
1462 case RAIDFRAME_REWRITEPARITY:
1463
1464 if (raidPtr->Layout.map->faultsTolerated == 0) {
1465 /* Parity for RAID 0 is trivially correct */
1466 raidPtr->parity_good = RF_RAID_CLEAN;
1467 return(0);
1468 }
1469
1470 if (raidPtr->parity_rewrite_in_progress == 1) {
1471 /* Re-write is already in progress! */
1472 return(EINVAL);
1473 }
1474
1475 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1476 rf_RewriteParityThread,
1477 raidPtr,"raid_parity");
1478 return (retcode);
1479
1480
1481 case RAIDFRAME_ADD_HOT_SPARE:
1482 sparePtr = (RF_SingleComponent_t *) data;
1483 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1484 retcode = rf_add_hot_spare(raidPtr, &component);
1485 return(retcode);
1486
1487 case RAIDFRAME_REMOVE_HOT_SPARE:
1488 return(retcode);
1489
1490 case RAIDFRAME_DELETE_COMPONENT:
1491 componentPtr = (RF_SingleComponent_t *)data;
1492 memcpy( &component, componentPtr,
1493 sizeof(RF_SingleComponent_t));
1494 retcode = rf_delete_component(raidPtr, &component);
1495 return(retcode);
1496
1497 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1498 componentPtr = (RF_SingleComponent_t *)data;
1499 memcpy( &component, componentPtr,
1500 sizeof(RF_SingleComponent_t));
1501 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1502 return(retcode);
1503
1504 case RAIDFRAME_REBUILD_IN_PLACE:
1505
1506 if (raidPtr->Layout.map->faultsTolerated == 0) {
1507 /* Can't do this on a RAID 0!! */
1508 return(EINVAL);
1509 }
1510
1511 if (raidPtr->recon_in_progress == 1) {
1512 /* a reconstruct is already in progress! */
1513 return(EINVAL);
1514 }
1515
1516 componentPtr = (RF_SingleComponent_t *) data;
1517 memcpy( &component, componentPtr,
1518 sizeof(RF_SingleComponent_t));
1519 component.row = 0; /* we don't support any more */
1520 column = component.column;
1521
1522 if ((column < 0) || (column >= raidPtr->numCol)) {
1523 return(EINVAL);
1524 }
1525
1526 rf_lock_mutex2(raidPtr->mutex);
1527 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1528 (raidPtr->numFailures > 0)) {
1529 /* XXX 0 above shouldn't be constant!!! */
1530 /* some component other than this has failed.
1531 Let's not make things worse than they already
1532 are... */
1533 printf("raid%d: Unable to reconstruct to disk at:\n",
1534 raidPtr->raidid);
1535 printf("raid%d: Col: %d Too many failures.\n",
1536 raidPtr->raidid, column);
1537 rf_unlock_mutex2(raidPtr->mutex);
1538 return (EINVAL);
1539 }
1540 if (raidPtr->Disks[column].status ==
1541 rf_ds_reconstructing) {
1542 printf("raid%d: Unable to reconstruct to disk at:\n",
1543 raidPtr->raidid);
1544 printf("raid%d: Col: %d Reconstruction already occurring!\n", raidPtr->raidid, column);
1545
1546 rf_unlock_mutex2(raidPtr->mutex);
1547 return (EINVAL);
1548 }
1549 if (raidPtr->Disks[column].status == rf_ds_spared) {
1550 rf_unlock_mutex2(raidPtr->mutex);
1551 return (EINVAL);
1552 }
1553 rf_unlock_mutex2(raidPtr->mutex);
1554
1555 RF_Malloc(rrint, sizeof(*rrint), (struct rf_recon_req_internal *));
1556 if (rrint == NULL)
1557 return(ENOMEM);
1558
1559 rrint->col = column;
1560 rrint->raidPtr = raidPtr;
1561
1562 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1563 rf_ReconstructInPlaceThread,
1564 rrint, "raid_reconip");
1565 return(retcode);
1566
1567 #ifdef RAID_COMPAT32
1568 case RAIDFRAME_GET_INFO32:
1569 if (!raidframe_netbsd32_config_hook.hooked)
1570 return ENOSYS;
1571 ucfgp = NETBSD32PTR64(*(netbsd32_pointer_t *)data);
1572 /*FALLTHROUGH*/
1573 #endif
1574 case RAIDFRAME_GET_INFO:
1575 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1576 (RF_DeviceConfig_t *));
1577 if (d_cfg == NULL)
1578 return ENOMEM;
1579 retcode = rf_get_info(raidPtr, d_cfg);
1580 if (retcode == 0) {
1581 retcode = copyout(d_cfg, ucfgp, sizeof(*d_cfg));
1582 }
1583 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1584
1585 return retcode;
1586
1587 case RAIDFRAME_CHECK_PARITY:
1588 *(int *) data = raidPtr->parity_good;
1589 return (0);
1590
1591 case RAIDFRAME_PARITYMAP_STATUS:
1592 if (rf_paritymap_ineligible(raidPtr))
1593 return EINVAL;
1594 rf_paritymap_status(raidPtr->parity_map,
1595 (struct rf_pmstat *)data);
1596 return 0;
1597
1598 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1599 if (rf_paritymap_ineligible(raidPtr))
1600 return EINVAL;
1601 if (raidPtr->parity_map == NULL)
1602 return ENOENT; /* ??? */
1603 if (0 != rf_paritymap_set_params(raidPtr->parity_map,
1604 (struct rf_pmparams *)data, 1))
1605 return EINVAL;
1606 return 0;
1607
1608 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1609 if (rf_paritymap_ineligible(raidPtr))
1610 return EINVAL;
1611 *(int *) data = rf_paritymap_get_disable(raidPtr);
1612 return 0;
1613
1614 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1615 if (rf_paritymap_ineligible(raidPtr))
1616 return EINVAL;
1617 rf_paritymap_set_disable(raidPtr, *(int *)data);
1618 /* XXX should errors be passed up? */
1619 return 0;
1620
1621 case RAIDFRAME_RESET_ACCTOTALS:
1622 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1623 return (0);
1624
1625 case RAIDFRAME_GET_ACCTOTALS:
1626 totals = (RF_AccTotals_t *) data;
1627 *totals = raidPtr->acc_totals;
1628 return 0;
1629
1630 case RAIDFRAME_KEEP_ACCTOTALS:
1631 raidPtr->keep_acc_totals = *(int *)data;
1632 return 0;
1633
1634 case RAIDFRAME_GET_SIZE:
1635 *(int *) data = raidPtr->totalSectors;
1636 return 0;
1637
1638 case RAIDFRAME_FAIL_DISK:
1639 return rf_fail_disk(raidPtr, data);
1640
1641 /* invoke a copyback operation after recon on whatever disk
1642 * needs it, if any */
1643 case RAIDFRAME_COPYBACK:
1644
1645 if (raidPtr->Layout.map->faultsTolerated == 0) {
1646 /* This makes no sense on a RAID 0!! */
1647 return(EINVAL);
1648 }
1649
1650 if (raidPtr->copyback_in_progress == 1) {
1651 /* Copyback is already in progress! */
1652 return(EINVAL);
1653 }
1654
1655 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1656 rf_CopybackThread,
1657 raidPtr,"raid_copyback");
1658 return (retcode);
1659
1660 /* return the percentage completion of reconstruction */
1661 case RAIDFRAME_CHECK_RECON_STATUS:
1662 if (raidPtr->Layout.map->faultsTolerated == 0) {
1663 /* This makes no sense on a RAID 0, so tell the
1664 user it's done. */
1665 *(int *) data = 100;
1666 return(0);
1667 }
1668 if (raidPtr->status != rf_rs_reconstructing)
1669 *(int *) data = 100;
1670 else {
1671 if (raidPtr->reconControl->numRUsTotal > 0) {
1672 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1673 } else {
1674 *(int *) data = 0;
1675 }
1676 }
1677 return (0);
1678 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1679 rf_check_recon_status_ext(raidPtr, data);
1680 return (0);
1681
1682 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1683 if (raidPtr->Layout.map->faultsTolerated == 0) {
1684 /* This makes no sense on a RAID 0, so tell the
1685 user it's done. */
1686 *(int *) data = 100;
1687 return(0);
1688 }
1689 if (raidPtr->parity_rewrite_in_progress == 1) {
1690 *(int *) data = 100 *
1691 raidPtr->parity_rewrite_stripes_done /
1692 raidPtr->Layout.numStripe;
1693 } else {
1694 *(int *) data = 100;
1695 }
1696 return (0);
1697
1698 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1699 rf_check_parityrewrite_status_ext(raidPtr, data);
1700 return (0);
1701
1702 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1703 if (raidPtr->Layout.map->faultsTolerated == 0) {
1704 /* This makes no sense on a RAID 0 */
1705 *(int *) data = 100;
1706 return(0);
1707 }
1708 if (raidPtr->copyback_in_progress == 1) {
1709 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1710 raidPtr->Layout.numStripe;
1711 } else {
1712 *(int *) data = 100;
1713 }
1714 return (0);
1715
1716 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1717 rf_check_copyback_status_ext(raidPtr, data);
1718 return 0;
1719
1720 case RAIDFRAME_SET_LAST_UNIT:
1721 for (column = 0; column < raidPtr->numCol; column++)
1722 if (raidPtr->Disks[column].status != rf_ds_optimal)
1723 return EBUSY;
1724
1725 for (column = 0; column < raidPtr->numCol; column++) {
1726 clabel = raidget_component_label(raidPtr, column);
1727 clabel->last_unit = *(int *)data;
1728 raidflush_component_label(raidPtr, column);
1729 }
1730 rs->sc_cflags |= RAIDF_UNIT_CHANGED;
1731 return 0;
1732
1733 /* the sparetable daemon calls this to wait for the kernel to
1734 * need a spare table. this ioctl does not return until a
1735 * spare table is needed. XXX -- calling mpsleep here in the
1736 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1737 * -- I should either compute the spare table in the kernel,
1738 * or have a different -- XXX XXX -- interface (a different
1739 * character device) for delivering the table -- XXX */
1740 #if 0
1741 case RAIDFRAME_SPARET_WAIT:
1742 rf_lock_mutex2(rf_sparet_wait_mutex);
1743 while (!rf_sparet_wait_queue)
1744 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1745 waitreq = rf_sparet_wait_queue;
1746 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1747 rf_unlock_mutex2(rf_sparet_wait_mutex);
1748
1749 /* structure assignment */
1750 *((RF_SparetWait_t *) data) = *waitreq;
1751
1752 RF_Free(waitreq, sizeof(*waitreq));
1753 return (0);
1754
1755 /* wakes up a process waiting on SPARET_WAIT and puts an error
1756 * code in it that will cause the dameon to exit */
1757 case RAIDFRAME_ABORT_SPARET_WAIT:
1758 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1759 waitreq->fcol = -1;
1760 rf_lock_mutex2(rf_sparet_wait_mutex);
1761 waitreq->next = rf_sparet_wait_queue;
1762 rf_sparet_wait_queue = waitreq;
1763 rf_broadcast_conf2(rf_sparet_wait_cv);
1764 rf_unlock_mutex2(rf_sparet_wait_mutex);
1765 return (0);
1766
1767 /* used by the spare table daemon to deliver a spare table
1768 * into the kernel */
1769 case RAIDFRAME_SEND_SPARET:
1770
1771 /* install the spare table */
1772 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1773
1774 /* respond to the requestor. the return status of the spare
1775 * table installation is passed in the "fcol" field */
1776 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1777 waitreq->fcol = retcode;
1778 rf_lock_mutex2(rf_sparet_wait_mutex);
1779 waitreq->next = rf_sparet_resp_queue;
1780 rf_sparet_resp_queue = waitreq;
1781 rf_broadcast_cond2(rf_sparet_resp_cv);
1782 rf_unlock_mutex2(rf_sparet_wait_mutex);
1783
1784 return (retcode);
1785 #endif
1786
1787 default:
1788 break; /* fall through to the os-specific code below */
1789
1790 }
1791
1792 if (!raidPtr->valid)
1793 return (EINVAL);
1794
1795 /*
1796 * Add support for "regular" device ioctls here.
1797 */
1798
1799 switch (cmd) {
1800 case DIOCGCACHE:
1801 retcode = rf_get_component_caches(raidPtr, (int *)data);
1802 break;
1803
1804 case DIOCCACHESYNC:
1805 retcode = rf_sync_component_caches(raidPtr);
1806 break;
1807
1808 default:
1809 retcode = dk_ioctl(dksc, dev, cmd, data, flag, l);
1810 break;
1811 }
1812
1813 return (retcode);
1814
1815 }
1816
1817
1818 /* raidinit -- complete the rest of the initialization for the
1819 RAIDframe device. */
1820
1821
1822 static void
1823 raidinit(struct raid_softc *rs)
1824 {
1825 cfdata_t cf;
1826 unsigned int unit;
1827 struct dk_softc *dksc = &rs->sc_dksc;
1828 RF_Raid_t *raidPtr = &rs->sc_r;
1829 device_t dev;
1830
1831 unit = raidPtr->raidid;
1832
1833 /* XXX doesn't check bounds. */
1834 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%u", unit);
1835
1836 /* attach the pseudo device */
1837 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1838 cf->cf_name = raid_cd.cd_name;
1839 cf->cf_atname = raid_cd.cd_name;
1840 cf->cf_unit = unit;
1841 cf->cf_fstate = FSTATE_STAR;
1842
1843 dev = config_attach_pseudo(cf);
1844 if (dev == NULL) {
1845 printf("raid%d: config_attach_pseudo failed\n",
1846 raidPtr->raidid);
1847 free(cf, M_RAIDFRAME);
1848 return;
1849 }
1850
1851 /* provide a backpointer to the real softc */
1852 raidsoftc(dev) = rs;
1853
1854 /* disk_attach actually creates space for the CPU disklabel, among
1855 * other things, so it's critical to call this *BEFORE* we try putzing
1856 * with disklabels. */
1857 dk_init(dksc, dev, DKTYPE_RAID);
1858 disk_init(&dksc->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1859
1860 /* XXX There may be a weird interaction here between this, and
1861 * protectedSectors, as used in RAIDframe. */
1862
1863 rs->sc_size = raidPtr->totalSectors;
1864
1865 /* Attach dk and disk subsystems */
1866 dk_attach(dksc);
1867 disk_attach(&dksc->sc_dkdev);
1868 rf_set_geometry(rs, raidPtr);
1869
1870 bufq_alloc(&dksc->sc_bufq, "fcfs", BUFQ_SORT_RAWBLOCK);
1871
1872 /* mark unit as usuable */
1873 rs->sc_flags |= RAIDF_INITED;
1874
1875 dkwedge_discover(&dksc->sc_dkdev);
1876 }
1877
1878 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1879 /* wake up the daemon & tell it to get us a spare table
1880 * XXX
1881 * the entries in the queues should be tagged with the raidPtr
1882 * so that in the extremely rare case that two recons happen at once,
1883 * we know for which device were requesting a spare table
1884 * XXX
1885 *
1886 * XXX This code is not currently used. GO
1887 */
1888 int
1889 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1890 {
1891 int retcode;
1892
1893 rf_lock_mutex2(rf_sparet_wait_mutex);
1894 req->next = rf_sparet_wait_queue;
1895 rf_sparet_wait_queue = req;
1896 rf_broadcast_cond2(rf_sparet_wait_cv);
1897
1898 /* mpsleep unlocks the mutex */
1899 while (!rf_sparet_resp_queue) {
1900 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
1901 }
1902 req = rf_sparet_resp_queue;
1903 rf_sparet_resp_queue = req->next;
1904 rf_unlock_mutex2(rf_sparet_wait_mutex);
1905
1906 retcode = req->fcol;
1907 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1908 * alloc'd */
1909 return (retcode);
1910 }
1911 #endif
1912
1913 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1914 * bp & passes it down.
1915 * any calls originating in the kernel must use non-blocking I/O
1916 * do some extra sanity checking to return "appropriate" error values for
1917 * certain conditions (to make some standard utilities work)
1918 *
1919 * Formerly known as: rf_DoAccessKernel
1920 */
1921 void
1922 raidstart(RF_Raid_t *raidPtr)
1923 {
1924 struct raid_softc *rs;
1925 struct dk_softc *dksc;
1926
1927 rs = raidPtr->softc;
1928 dksc = &rs->sc_dksc;
1929 /* quick check to see if anything has died recently */
1930 rf_lock_mutex2(raidPtr->mutex);
1931 if (raidPtr->numNewFailures > 0) {
1932 rf_unlock_mutex2(raidPtr->mutex);
1933 rf_update_component_labels(raidPtr,
1934 RF_NORMAL_COMPONENT_UPDATE);
1935 rf_lock_mutex2(raidPtr->mutex);
1936 raidPtr->numNewFailures--;
1937 }
1938 rf_unlock_mutex2(raidPtr->mutex);
1939
1940 if ((rs->sc_flags & RAIDF_INITED) == 0) {
1941 printf("raid%d: raidstart not ready\n", raidPtr->raidid);
1942 return;
1943 }
1944
1945 dk_start(dksc, NULL);
1946 }
1947
1948 static int
1949 raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp)
1950 {
1951 RF_SectorCount_t num_blocks, pb, sum;
1952 RF_RaidAddr_t raid_addr;
1953 daddr_t blocknum;
1954 int do_async;
1955 int rc;
1956
1957 rf_lock_mutex2(raidPtr->mutex);
1958 if (raidPtr->openings == 0) {
1959 rf_unlock_mutex2(raidPtr->mutex);
1960 return EAGAIN;
1961 }
1962 rf_unlock_mutex2(raidPtr->mutex);
1963
1964 blocknum = bp->b_rawblkno;
1965
1966 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1967 (int) blocknum));
1968
1969 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1970 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1971
1972 /* *THIS* is where we adjust what block we're going to...
1973 * but DO NOT TOUCH bp->b_blkno!!! */
1974 raid_addr = blocknum;
1975
1976 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1977 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1978 sum = raid_addr + num_blocks + pb;
1979 if (1 || rf_debugKernelAccess) {
1980 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1981 (int) raid_addr, (int) sum, (int) num_blocks,
1982 (int) pb, (int) bp->b_resid));
1983 }
1984 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1985 || (sum < num_blocks) || (sum < pb)) {
1986 rc = ENOSPC;
1987 goto done;
1988 }
1989 /*
1990 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1991 */
1992
1993 if (bp->b_bcount & raidPtr->sectorMask) {
1994 rc = ENOSPC;
1995 goto done;
1996 }
1997 db1_printf(("Calling DoAccess..\n"));
1998
1999
2000 rf_lock_mutex2(raidPtr->mutex);
2001 raidPtr->openings--;
2002 rf_unlock_mutex2(raidPtr->mutex);
2003
2004 /*
2005 * Everything is async.
2006 */
2007 do_async = 1;
2008
2009 /* don't ever condition on bp->b_flags & B_WRITE.
2010 * always condition on B_READ instead */
2011
2012 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
2013 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
2014 do_async, raid_addr, num_blocks,
2015 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
2016
2017 done:
2018 return rc;
2019 }
2020
2021 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
2022
2023 int
2024 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
2025 {
2026 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2027 struct buf *bp;
2028
2029 req->queue = queue;
2030 bp = req->bp;
2031
2032 switch (req->type) {
2033 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2034 /* XXX need to do something extra here.. */
2035 /* I'm leaving this in, as I've never actually seen it used,
2036 * and I'd like folks to report it... GO */
2037 printf(("WAKEUP CALLED\n"));
2038 queue->numOutstanding++;
2039
2040 bp->b_flags = 0;
2041 bp->b_private = req;
2042
2043 KernelWakeupFunc(bp);
2044 break;
2045
2046 case RF_IO_TYPE_READ:
2047 case RF_IO_TYPE_WRITE:
2048 #if RF_ACC_TRACE > 0
2049 if (req->tracerec) {
2050 RF_ETIMER_START(req->tracerec->timer);
2051 }
2052 #endif
2053 InitBP(bp, queue->rf_cinfo->ci_vp,
2054 op, queue->rf_cinfo->ci_dev,
2055 req->sectorOffset, req->numSector,
2056 req->buf, KernelWakeupFunc, (void *) req,
2057 queue->raidPtr->logBytesPerSector, req->b_proc);
2058
2059 if (rf_debugKernelAccess) {
2060 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2061 (long) bp->b_blkno));
2062 }
2063 queue->numOutstanding++;
2064 queue->last_deq_sector = req->sectorOffset;
2065 /* acc wouldn't have been let in if there were any pending
2066 * reqs at any other priority */
2067 queue->curPriority = req->priority;
2068
2069 db1_printf(("Going for %c to unit %d col %d\n",
2070 req->type, queue->raidPtr->raidid,
2071 queue->col));
2072 db1_printf(("sector %d count %d (%d bytes) %d\n",
2073 (int) req->sectorOffset, (int) req->numSector,
2074 (int) (req->numSector <<
2075 queue->raidPtr->logBytesPerSector),
2076 (int) queue->raidPtr->logBytesPerSector));
2077
2078 /*
2079 * XXX: drop lock here since this can block at
2080 * least with backing SCSI devices. Retake it
2081 * to minimize fuss with calling interfaces.
2082 */
2083
2084 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2085 bdev_strategy(bp);
2086 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2087 break;
2088
2089 default:
2090 panic("bad req->type in rf_DispatchKernelIO");
2091 }
2092 db1_printf(("Exiting from DispatchKernelIO\n"));
2093
2094 return (0);
2095 }
2096 /* this is the callback function associated with a I/O invoked from
2097 kernel code.
2098 */
2099 static void
2100 KernelWakeupFunc(struct buf *bp)
2101 {
2102 RF_DiskQueueData_t *req = NULL;
2103 RF_DiskQueue_t *queue;
2104
2105 db1_printf(("recovering the request queue:\n"));
2106
2107 req = bp->b_private;
2108
2109 queue = (RF_DiskQueue_t *) req->queue;
2110
2111 rf_lock_mutex2(queue->raidPtr->iodone_lock);
2112
2113 #if RF_ACC_TRACE > 0
2114 if (req->tracerec) {
2115 RF_ETIMER_STOP(req->tracerec->timer);
2116 RF_ETIMER_EVAL(req->tracerec->timer);
2117 rf_lock_mutex2(rf_tracing_mutex);
2118 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2119 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2120 req->tracerec->num_phys_ios++;
2121 rf_unlock_mutex2(rf_tracing_mutex);
2122 }
2123 #endif
2124
2125 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2126 * ballistic, and mark the component as hosed... */
2127
2128 if (bp->b_error != 0) {
2129 /* Mark the disk as dead */
2130 /* but only mark it once... */
2131 /* and only if it wouldn't leave this RAID set
2132 completely broken */
2133 if (((queue->raidPtr->Disks[queue->col].status ==
2134 rf_ds_optimal) ||
2135 (queue->raidPtr->Disks[queue->col].status ==
2136 rf_ds_used_spare)) &&
2137 (queue->raidPtr->numFailures <
2138 queue->raidPtr->Layout.map->faultsTolerated)) {
2139 printf("raid%d: IO Error (%d). Marking %s as failed.\n",
2140 queue->raidPtr->raidid,
2141 bp->b_error,
2142 queue->raidPtr->Disks[queue->col].devname);
2143 queue->raidPtr->Disks[queue->col].status =
2144 rf_ds_failed;
2145 queue->raidPtr->status = rf_rs_degraded;
2146 queue->raidPtr->numFailures++;
2147 queue->raidPtr->numNewFailures++;
2148 } else { /* Disk is already dead... */
2149 /* printf("Disk already marked as dead!\n"); */
2150 }
2151
2152 }
2153
2154 /* Fill in the error value */
2155 req->error = bp->b_error;
2156
2157 /* Drop this one on the "finished" queue... */
2158 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2159
2160 /* Let the raidio thread know there is work to be done. */
2161 rf_signal_cond2(queue->raidPtr->iodone_cv);
2162
2163 rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2164 }
2165
2166
2167 /*
2168 * initialize a buf structure for doing an I/O in the kernel.
2169 */
2170 static void
2171 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2172 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2173 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2174 struct proc *b_proc)
2175 {
2176 /* bp->b_flags = B_PHYS | rw_flag; */
2177 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */
2178 bp->b_oflags = 0;
2179 bp->b_cflags = 0;
2180 bp->b_bcount = numSect << logBytesPerSector;
2181 bp->b_bufsize = bp->b_bcount;
2182 bp->b_error = 0;
2183 bp->b_dev = dev;
2184 bp->b_data = bf;
2185 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2186 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2187 if (bp->b_bcount == 0) {
2188 panic("bp->b_bcount is zero in InitBP!!");
2189 }
2190 bp->b_proc = b_proc;
2191 bp->b_iodone = cbFunc;
2192 bp->b_private = cbArg;
2193 }
2194
2195 /*
2196 * Wait interruptibly for an exclusive lock.
2197 *
2198 * XXX
2199 * Several drivers do this; it should be abstracted and made MP-safe.
2200 * (Hmm... where have we seen this warning before :-> GO )
2201 */
2202 static int
2203 raidlock(struct raid_softc *rs)
2204 {
2205 int error;
2206
2207 error = 0;
2208 mutex_enter(&rs->sc_mutex);
2209 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2210 rs->sc_flags |= RAIDF_WANTED;
2211 error = cv_wait_sig(&rs->sc_cv, &rs->sc_mutex);
2212 if (error != 0)
2213 goto done;
2214 }
2215 rs->sc_flags |= RAIDF_LOCKED;
2216 done:
2217 mutex_exit(&rs->sc_mutex);
2218 return (error);
2219 }
2220 /*
2221 * Unlock and wake up any waiters.
2222 */
2223 static void
2224 raidunlock(struct raid_softc *rs)
2225 {
2226
2227 mutex_enter(&rs->sc_mutex);
2228 rs->sc_flags &= ~RAIDF_LOCKED;
2229 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2230 rs->sc_flags &= ~RAIDF_WANTED;
2231 cv_broadcast(&rs->sc_cv);
2232 }
2233 mutex_exit(&rs->sc_mutex);
2234 }
2235
2236
2237 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2238 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2239 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2240
2241 static daddr_t
2242 rf_component_info_offset(void)
2243 {
2244
2245 return RF_COMPONENT_INFO_OFFSET;
2246 }
2247
2248 static daddr_t
2249 rf_component_info_size(unsigned secsize)
2250 {
2251 daddr_t info_size;
2252
2253 KASSERT(secsize);
2254 if (secsize > RF_COMPONENT_INFO_SIZE)
2255 info_size = secsize;
2256 else
2257 info_size = RF_COMPONENT_INFO_SIZE;
2258
2259 return info_size;
2260 }
2261
2262 static daddr_t
2263 rf_parity_map_offset(RF_Raid_t *raidPtr)
2264 {
2265 daddr_t map_offset;
2266
2267 KASSERT(raidPtr->bytesPerSector);
2268 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2269 map_offset = raidPtr->bytesPerSector;
2270 else
2271 map_offset = RF_COMPONENT_INFO_SIZE;
2272 map_offset += rf_component_info_offset();
2273
2274 return map_offset;
2275 }
2276
2277 static daddr_t
2278 rf_parity_map_size(RF_Raid_t *raidPtr)
2279 {
2280 daddr_t map_size;
2281
2282 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2283 map_size = raidPtr->bytesPerSector;
2284 else
2285 map_size = RF_PARITY_MAP_SIZE;
2286
2287 return map_size;
2288 }
2289
2290 int
2291 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2292 {
2293 RF_ComponentLabel_t *clabel;
2294
2295 clabel = raidget_component_label(raidPtr, col);
2296 clabel->clean = RF_RAID_CLEAN;
2297 raidflush_component_label(raidPtr, col);
2298 return(0);
2299 }
2300
2301
2302 int
2303 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2304 {
2305 RF_ComponentLabel_t *clabel;
2306
2307 clabel = raidget_component_label(raidPtr, col);
2308 clabel->clean = RF_RAID_DIRTY;
2309 raidflush_component_label(raidPtr, col);
2310 return(0);
2311 }
2312
2313 int
2314 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2315 {
2316 KASSERT(raidPtr->bytesPerSector);
2317 return raidread_component_label(raidPtr->bytesPerSector,
2318 raidPtr->Disks[col].dev,
2319 raidPtr->raid_cinfo[col].ci_vp,
2320 &raidPtr->raid_cinfo[col].ci_label);
2321 }
2322
2323 RF_ComponentLabel_t *
2324 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2325 {
2326 return &raidPtr->raid_cinfo[col].ci_label;
2327 }
2328
2329 int
2330 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2331 {
2332 RF_ComponentLabel_t *label;
2333
2334 label = &raidPtr->raid_cinfo[col].ci_label;
2335 label->mod_counter = raidPtr->mod_counter;
2336 #ifndef RF_NO_PARITY_MAP
2337 label->parity_map_modcount = label->mod_counter;
2338 #endif
2339 return raidwrite_component_label(raidPtr->bytesPerSector,
2340 raidPtr->Disks[col].dev,
2341 raidPtr->raid_cinfo[col].ci_vp, label);
2342 }
2343
2344
2345 static int
2346 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2347 RF_ComponentLabel_t *clabel)
2348 {
2349 return raidread_component_area(dev, b_vp, clabel,
2350 sizeof(RF_ComponentLabel_t),
2351 rf_component_info_offset(),
2352 rf_component_info_size(secsize));
2353 }
2354
2355 /* ARGSUSED */
2356 static int
2357 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2358 size_t msize, daddr_t offset, daddr_t dsize)
2359 {
2360 struct buf *bp;
2361 int error;
2362
2363 /* XXX should probably ensure that we don't try to do this if
2364 someone has changed rf_protected_sectors. */
2365
2366 if (b_vp == NULL) {
2367 /* For whatever reason, this component is not valid.
2368 Don't try to read a component label from it. */
2369 return(EINVAL);
2370 }
2371
2372 /* get a block of the appropriate size... */
2373 bp = geteblk((int)dsize);
2374 bp->b_dev = dev;
2375
2376 /* get our ducks in a row for the read */
2377 bp->b_blkno = offset / DEV_BSIZE;
2378 bp->b_bcount = dsize;
2379 bp->b_flags |= B_READ;
2380 bp->b_resid = dsize;
2381
2382 bdev_strategy(bp);
2383 error = biowait(bp);
2384
2385 if (!error) {
2386 memcpy(data, bp->b_data, msize);
2387 }
2388
2389 brelse(bp, 0);
2390 return(error);
2391 }
2392
2393
2394 static int
2395 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2396 RF_ComponentLabel_t *clabel)
2397 {
2398 return raidwrite_component_area(dev, b_vp, clabel,
2399 sizeof(RF_ComponentLabel_t),
2400 rf_component_info_offset(),
2401 rf_component_info_size(secsize), 0);
2402 }
2403
2404 /* ARGSUSED */
2405 static int
2406 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2407 size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2408 {
2409 struct buf *bp;
2410 int error;
2411
2412 /* get a block of the appropriate size... */
2413 bp = geteblk((int)dsize);
2414 bp->b_dev = dev;
2415
2416 /* get our ducks in a row for the write */
2417 bp->b_blkno = offset / DEV_BSIZE;
2418 bp->b_bcount = dsize;
2419 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2420 bp->b_resid = dsize;
2421
2422 memset(bp->b_data, 0, dsize);
2423 memcpy(bp->b_data, data, msize);
2424
2425 bdev_strategy(bp);
2426 if (asyncp)
2427 return 0;
2428 error = biowait(bp);
2429 brelse(bp, 0);
2430 if (error) {
2431 #if 1
2432 printf("Failed to write RAID component info!\n");
2433 #endif
2434 }
2435
2436 return(error);
2437 }
2438
2439 void
2440 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2441 {
2442 int c;
2443
2444 for (c = 0; c < raidPtr->numCol; c++) {
2445 /* Skip dead disks. */
2446 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2447 continue;
2448 /* XXXjld: what if an error occurs here? */
2449 raidwrite_component_area(raidPtr->Disks[c].dev,
2450 raidPtr->raid_cinfo[c].ci_vp, map,
2451 RF_PARITYMAP_NBYTE,
2452 rf_parity_map_offset(raidPtr),
2453 rf_parity_map_size(raidPtr), 0);
2454 }
2455 }
2456
2457 void
2458 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2459 {
2460 struct rf_paritymap_ondisk tmp;
2461 int c,first;
2462
2463 first=1;
2464 for (c = 0; c < raidPtr->numCol; c++) {
2465 /* Skip dead disks. */
2466 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2467 continue;
2468 raidread_component_area(raidPtr->Disks[c].dev,
2469 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2470 RF_PARITYMAP_NBYTE,
2471 rf_parity_map_offset(raidPtr),
2472 rf_parity_map_size(raidPtr));
2473 if (first) {
2474 memcpy(map, &tmp, sizeof(*map));
2475 first = 0;
2476 } else {
2477 rf_paritymap_merge(map, &tmp);
2478 }
2479 }
2480 }
2481
2482 void
2483 rf_markalldirty(RF_Raid_t *raidPtr)
2484 {
2485 RF_ComponentLabel_t *clabel;
2486 int sparecol;
2487 int c;
2488 int j;
2489 int scol = -1;
2490
2491 raidPtr->mod_counter++;
2492 for (c = 0; c < raidPtr->numCol; c++) {
2493 /* we don't want to touch (at all) a disk that has
2494 failed */
2495 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2496 clabel = raidget_component_label(raidPtr, c);
2497 if (clabel->status == rf_ds_spared) {
2498 /* XXX do something special...
2499 but whatever you do, don't
2500 try to access it!! */
2501 } else {
2502 raidmarkdirty(raidPtr, c);
2503 }
2504 }
2505 }
2506
2507 for( c = 0; c < raidPtr->numSpare ; c++) {
2508 sparecol = raidPtr->numCol + c;
2509 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2510 /*
2511
2512 we claim this disk is "optimal" if it's
2513 rf_ds_used_spare, as that means it should be
2514 directly substitutable for the disk it replaced.
2515 We note that too...
2516
2517 */
2518
2519 for(j=0;j<raidPtr->numCol;j++) {
2520 if (raidPtr->Disks[j].spareCol == sparecol) {
2521 scol = j;
2522 break;
2523 }
2524 }
2525
2526 clabel = raidget_component_label(raidPtr, sparecol);
2527 /* make sure status is noted */
2528
2529 raid_init_component_label(raidPtr, clabel);
2530
2531 clabel->row = 0;
2532 clabel->column = scol;
2533 /* Note: we *don't* change status from rf_ds_used_spare
2534 to rf_ds_optimal */
2535 /* clabel.status = rf_ds_optimal; */
2536
2537 raidmarkdirty(raidPtr, sparecol);
2538 }
2539 }
2540 }
2541
2542
2543 void
2544 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2545 {
2546 RF_ComponentLabel_t *clabel;
2547 int sparecol;
2548 int c;
2549 int j;
2550 int scol;
2551 struct raid_softc *rs = raidPtr->softc;
2552
2553 scol = -1;
2554
2555 /* XXX should do extra checks to make sure things really are clean,
2556 rather than blindly setting the clean bit... */
2557
2558 raidPtr->mod_counter++;
2559
2560 for (c = 0; c < raidPtr->numCol; c++) {
2561 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2562 clabel = raidget_component_label(raidPtr, c);
2563 /* make sure status is noted */
2564 clabel->status = rf_ds_optimal;
2565
2566 /* note what unit we are configured as */
2567 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2568 clabel->last_unit = raidPtr->raidid;
2569
2570 raidflush_component_label(raidPtr, c);
2571 if (final == RF_FINAL_COMPONENT_UPDATE) {
2572 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2573 raidmarkclean(raidPtr, c);
2574 }
2575 }
2576 }
2577 /* else we don't touch it.. */
2578 }
2579
2580 for( c = 0; c < raidPtr->numSpare ; c++) {
2581 sparecol = raidPtr->numCol + c;
2582 /* Need to ensure that the reconstruct actually completed! */
2583 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2584 /*
2585
2586 we claim this disk is "optimal" if it's
2587 rf_ds_used_spare, as that means it should be
2588 directly substitutable for the disk it replaced.
2589 We note that too...
2590
2591 */
2592
2593 for(j=0;j<raidPtr->numCol;j++) {
2594 if (raidPtr->Disks[j].spareCol == sparecol) {
2595 scol = j;
2596 break;
2597 }
2598 }
2599
2600 /* XXX shouldn't *really* need this... */
2601 clabel = raidget_component_label(raidPtr, sparecol);
2602 /* make sure status is noted */
2603
2604 raid_init_component_label(raidPtr, clabel);
2605
2606 clabel->column = scol;
2607 clabel->status = rf_ds_optimal;
2608 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2609 clabel->last_unit = raidPtr->raidid;
2610
2611 raidflush_component_label(raidPtr, sparecol);
2612 if (final == RF_FINAL_COMPONENT_UPDATE) {
2613 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2614 raidmarkclean(raidPtr, sparecol);
2615 }
2616 }
2617 }
2618 }
2619 }
2620
2621 void
2622 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2623 {
2624
2625 if (vp != NULL) {
2626 if (auto_configured == 1) {
2627 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2628 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2629 vput(vp);
2630
2631 } else {
2632 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2633 }
2634 }
2635 }
2636
2637
2638 void
2639 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2640 {
2641 int r,c;
2642 struct vnode *vp;
2643 int acd;
2644
2645
2646 /* We take this opportunity to close the vnodes like we should.. */
2647
2648 for (c = 0; c < raidPtr->numCol; c++) {
2649 vp = raidPtr->raid_cinfo[c].ci_vp;
2650 acd = raidPtr->Disks[c].auto_configured;
2651 rf_close_component(raidPtr, vp, acd);
2652 raidPtr->raid_cinfo[c].ci_vp = NULL;
2653 raidPtr->Disks[c].auto_configured = 0;
2654 }
2655
2656 for (r = 0; r < raidPtr->numSpare; r++) {
2657 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2658 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2659 rf_close_component(raidPtr, vp, acd);
2660 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2661 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2662 }
2663 }
2664
2665
2666 void
2667 rf_ReconThread(struct rf_recon_req_internal *req)
2668 {
2669 int s;
2670 RF_Raid_t *raidPtr;
2671
2672 s = splbio();
2673 raidPtr = (RF_Raid_t *) req->raidPtr;
2674 raidPtr->recon_in_progress = 1;
2675
2676 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2677 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2678
2679 RF_Free(req, sizeof(*req));
2680
2681 raidPtr->recon_in_progress = 0;
2682 splx(s);
2683
2684 /* That's all... */
2685 kthread_exit(0); /* does not return */
2686 }
2687
2688 void
2689 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2690 {
2691 int retcode;
2692 int s;
2693
2694 raidPtr->parity_rewrite_stripes_done = 0;
2695 raidPtr->parity_rewrite_in_progress = 1;
2696 s = splbio();
2697 retcode = rf_RewriteParity(raidPtr);
2698 splx(s);
2699 if (retcode) {
2700 printf("raid%d: Error re-writing parity (%d)!\n",
2701 raidPtr->raidid, retcode);
2702 } else {
2703 /* set the clean bit! If we shutdown correctly,
2704 the clean bit on each component label will get
2705 set */
2706 raidPtr->parity_good = RF_RAID_CLEAN;
2707 }
2708 raidPtr->parity_rewrite_in_progress = 0;
2709
2710 /* Anyone waiting for us to stop? If so, inform them... */
2711 if (raidPtr->waitShutdown) {
2712 rf_lock_mutex2(raidPtr->rad_lock);
2713 cv_broadcast(&raidPtr->parity_rewrite_cv);
2714 rf_unlock_mutex2(raidPtr->rad_lock);
2715 }
2716
2717 /* That's all... */
2718 kthread_exit(0); /* does not return */
2719 }
2720
2721
2722 void
2723 rf_CopybackThread(RF_Raid_t *raidPtr)
2724 {
2725 int s;
2726
2727 raidPtr->copyback_in_progress = 1;
2728 s = splbio();
2729 rf_CopybackReconstructedData(raidPtr);
2730 splx(s);
2731 raidPtr->copyback_in_progress = 0;
2732
2733 /* That's all... */
2734 kthread_exit(0); /* does not return */
2735 }
2736
2737
2738 void
2739 rf_ReconstructInPlaceThread(struct rf_recon_req_internal *req)
2740 {
2741 int s;
2742 RF_Raid_t *raidPtr;
2743
2744 s = splbio();
2745 raidPtr = req->raidPtr;
2746 raidPtr->recon_in_progress = 1;
2747 rf_ReconstructInPlace(raidPtr, req->col);
2748 RF_Free(req, sizeof(*req));
2749 raidPtr->recon_in_progress = 0;
2750 splx(s);
2751
2752 /* That's all... */
2753 kthread_exit(0); /* does not return */
2754 }
2755
2756 static RF_AutoConfig_t *
2757 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
2758 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
2759 unsigned secsize)
2760 {
2761 int good_one = 0;
2762 RF_ComponentLabel_t *clabel;
2763 RF_AutoConfig_t *ac;
2764
2765 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
2766 if (clabel == NULL) {
2767 oomem:
2768 while(ac_list) {
2769 ac = ac_list;
2770 if (ac->clabel)
2771 free(ac->clabel, M_RAIDFRAME);
2772 ac_list = ac_list->next;
2773 free(ac, M_RAIDFRAME);
2774 }
2775 printf("RAID auto config: out of memory!\n");
2776 return NULL; /* XXX probably should panic? */
2777 }
2778
2779 if (!raidread_component_label(secsize, dev, vp, clabel)) {
2780 /* Got the label. Does it look reasonable? */
2781 if (rf_reasonable_label(clabel, numsecs) &&
2782 (rf_component_label_partitionsize(clabel) <= size)) {
2783 #ifdef DEBUG
2784 printf("Component on: %s: %llu\n",
2785 cname, (unsigned long long)size);
2786 rf_print_component_label(clabel);
2787 #endif
2788 /* if it's reasonable, add it, else ignore it. */
2789 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
2790 M_NOWAIT);
2791 if (ac == NULL) {
2792 free(clabel, M_RAIDFRAME);
2793 goto oomem;
2794 }
2795 strlcpy(ac->devname, cname, sizeof(ac->devname));
2796 ac->dev = dev;
2797 ac->vp = vp;
2798 ac->clabel = clabel;
2799 ac->next = ac_list;
2800 ac_list = ac;
2801 good_one = 1;
2802 }
2803 }
2804 if (!good_one) {
2805 /* cleanup */
2806 free(clabel, M_RAIDFRAME);
2807 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2808 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2809 vput(vp);
2810 }
2811 return ac_list;
2812 }
2813
2814 RF_AutoConfig_t *
2815 rf_find_raid_components(void)
2816 {
2817 struct vnode *vp;
2818 struct disklabel label;
2819 device_t dv;
2820 deviter_t di;
2821 dev_t dev;
2822 int bmajor, bminor, wedge, rf_part_found;
2823 int error;
2824 int i;
2825 RF_AutoConfig_t *ac_list;
2826 uint64_t numsecs;
2827 unsigned secsize;
2828 int dowedges;
2829
2830 /* initialize the AutoConfig list */
2831 ac_list = NULL;
2832
2833 /*
2834 * we begin by trolling through *all* the devices on the system *twice*
2835 * first we scan for wedges, second for other devices. This avoids
2836 * using a raw partition instead of a wedge that covers the whole disk
2837 */
2838
2839 for (dowedges=1; dowedges>=0; --dowedges) {
2840 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
2841 dv = deviter_next(&di)) {
2842
2843 /* we are only interested in disks... */
2844 if (device_class(dv) != DV_DISK)
2845 continue;
2846
2847 /* we don't care about floppies... */
2848 if (device_is_a(dv, "fd")) {
2849 continue;
2850 }
2851
2852 /* we don't care about CD's... */
2853 if (device_is_a(dv, "cd")) {
2854 continue;
2855 }
2856
2857 /* we don't care about md's... */
2858 if (device_is_a(dv, "md")) {
2859 continue;
2860 }
2861
2862 /* hdfd is the Atari/Hades floppy driver */
2863 if (device_is_a(dv, "hdfd")) {
2864 continue;
2865 }
2866
2867 /* fdisa is the Atari/Milan floppy driver */
2868 if (device_is_a(dv, "fdisa")) {
2869 continue;
2870 }
2871
2872 /* are we in the wedges pass ? */
2873 wedge = device_is_a(dv, "dk");
2874 if (wedge != dowedges) {
2875 continue;
2876 }
2877
2878 /* need to find the device_name_to_block_device_major stuff */
2879 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
2880
2881 rf_part_found = 0; /*No raid partition as yet*/
2882
2883 /* get a vnode for the raw partition of this disk */
2884 bminor = minor(device_unit(dv));
2885 dev = wedge ? makedev(bmajor, bminor) :
2886 MAKEDISKDEV(bmajor, bminor, RAW_PART);
2887 if (bdevvp(dev, &vp))
2888 panic("RAID can't alloc vnode");
2889
2890 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
2891
2892 if (error) {
2893 /* "Who cares." Continue looking
2894 for something that exists*/
2895 vput(vp);
2896 continue;
2897 }
2898
2899 error = getdisksize(vp, &numsecs, &secsize);
2900 if (error) {
2901 /*
2902 * Pseudo devices like vnd and cgd can be
2903 * opened but may still need some configuration.
2904 * Ignore these quietly.
2905 */
2906 if (error != ENXIO)
2907 printf("RAIDframe: can't get disk size"
2908 " for dev %s (%d)\n",
2909 device_xname(dv), error);
2910 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2911 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2912 vput(vp);
2913 continue;
2914 }
2915 if (wedge) {
2916 struct dkwedge_info dkw;
2917 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
2918 NOCRED);
2919 if (error) {
2920 printf("RAIDframe: can't get wedge info for "
2921 "dev %s (%d)\n", device_xname(dv), error);
2922 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2923 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2924 vput(vp);
2925 continue;
2926 }
2927
2928 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
2929 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2930 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2931 vput(vp);
2932 continue;
2933 }
2934
2935 ac_list = rf_get_component(ac_list, dev, vp,
2936 device_xname(dv), dkw.dkw_size, numsecs, secsize);
2937 rf_part_found = 1; /*There is a raid component on this disk*/
2938 continue;
2939 }
2940
2941 /* Ok, the disk exists. Go get the disklabel. */
2942 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
2943 if (error) {
2944 /*
2945 * XXX can't happen - open() would
2946 * have errored out (or faked up one)
2947 */
2948 if (error != ENOTTY)
2949 printf("RAIDframe: can't get label for dev "
2950 "%s (%d)\n", device_xname(dv), error);
2951 }
2952
2953 /* don't need this any more. We'll allocate it again
2954 a little later if we really do... */
2955 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2956 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2957 vput(vp);
2958
2959 if (error)
2960 continue;
2961
2962 rf_part_found = 0; /*No raid partitions yet*/
2963 for (i = 0; i < label.d_npartitions; i++) {
2964 char cname[sizeof(ac_list->devname)];
2965
2966 /* We only support partitions marked as RAID */
2967 if (label.d_partitions[i].p_fstype != FS_RAID)
2968 continue;
2969
2970 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
2971 if (bdevvp(dev, &vp))
2972 panic("RAID can't alloc vnode");
2973
2974 error = VOP_OPEN(vp, FREAD, NOCRED);
2975 if (error) {
2976 /* Whatever... */
2977 vput(vp);
2978 continue;
2979 }
2980 snprintf(cname, sizeof(cname), "%s%c",
2981 device_xname(dv), 'a' + i);
2982 ac_list = rf_get_component(ac_list, dev, vp, cname,
2983 label.d_partitions[i].p_size, numsecs, secsize);
2984 rf_part_found = 1; /*There is at least one raid partition on this disk*/
2985 }
2986
2987 /*
2988 *If there is no raid component on this disk, either in a
2989 *disklabel or inside a wedge, check the raw partition as well,
2990 *as it is possible to configure raid components on raw disk
2991 *devices.
2992 */
2993
2994 if (!rf_part_found) {
2995 char cname[sizeof(ac_list->devname)];
2996
2997 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
2998 if (bdevvp(dev, &vp))
2999 panic("RAID can't alloc vnode");
3000
3001 error = VOP_OPEN(vp, FREAD, NOCRED);
3002 if (error) {
3003 /* Whatever... */
3004 vput(vp);
3005 continue;
3006 }
3007 snprintf(cname, sizeof(cname), "%s%c",
3008 device_xname(dv), 'a' + RAW_PART);
3009 ac_list = rf_get_component(ac_list, dev, vp, cname,
3010 label.d_partitions[RAW_PART].p_size, numsecs, secsize);
3011 }
3012 }
3013 deviter_release(&di);
3014 }
3015 return ac_list;
3016 }
3017
3018
3019 int
3020 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3021 {
3022
3023 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
3024 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
3025 ((clabel->clean == RF_RAID_CLEAN) ||
3026 (clabel->clean == RF_RAID_DIRTY)) &&
3027 clabel->row >=0 &&
3028 clabel->column >= 0 &&
3029 clabel->num_rows > 0 &&
3030 clabel->num_columns > 0 &&
3031 clabel->row < clabel->num_rows &&
3032 clabel->column < clabel->num_columns &&
3033 clabel->blockSize > 0 &&
3034 /*
3035 * numBlocksHi may contain garbage, but it is ok since
3036 * the type is unsigned. If it is really garbage,
3037 * rf_fix_old_label_size() will fix it.
3038 */
3039 rf_component_label_numblocks(clabel) > 0) {
3040 /*
3041 * label looks reasonable enough...
3042 * let's make sure it has no old garbage.
3043 */
3044 if (numsecs)
3045 rf_fix_old_label_size(clabel, numsecs);
3046 return(1);
3047 }
3048 return(0);
3049 }
3050
3051
3052 /*
3053 * For reasons yet unknown, some old component labels have garbage in
3054 * the newer numBlocksHi region, and this causes lossage. Since those
3055 * disks will also have numsecs set to less than 32 bits of sectors,
3056 * we can determine when this corruption has occurred, and fix it.
3057 *
3058 * The exact same problem, with the same unknown reason, happens to
3059 * the partitionSizeHi member as well.
3060 */
3061 static void
3062 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3063 {
3064
3065 if (numsecs < ((uint64_t)1 << 32)) {
3066 if (clabel->numBlocksHi) {
3067 printf("WARNING: total sectors < 32 bits, yet "
3068 "numBlocksHi set\n"
3069 "WARNING: resetting numBlocksHi to zero.\n");
3070 clabel->numBlocksHi = 0;
3071 }
3072
3073 if (clabel->partitionSizeHi) {
3074 printf("WARNING: total sectors < 32 bits, yet "
3075 "partitionSizeHi set\n"
3076 "WARNING: resetting partitionSizeHi to zero.\n");
3077 clabel->partitionSizeHi = 0;
3078 }
3079 }
3080 }
3081
3082
3083 #ifdef DEBUG
3084 void
3085 rf_print_component_label(RF_ComponentLabel_t *clabel)
3086 {
3087 uint64_t numBlocks;
3088 static const char *rp[] = {
3089 "No", "Force", "Soft", "*invalid*"
3090 };
3091
3092
3093 numBlocks = rf_component_label_numblocks(clabel);
3094
3095 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3096 clabel->row, clabel->column,
3097 clabel->num_rows, clabel->num_columns);
3098 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3099 clabel->version, clabel->serial_number,
3100 clabel->mod_counter);
3101 printf(" Clean: %s Status: %d\n",
3102 clabel->clean ? "Yes" : "No", clabel->status);
3103 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3104 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3105 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3106 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3107 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3108 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]);
3109 printf(" Last configured as: raid%d\n", clabel->last_unit);
3110 #if 0
3111 printf(" Config order: %d\n", clabel->config_order);
3112 #endif
3113
3114 }
3115 #endif
3116
3117 RF_ConfigSet_t *
3118 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3119 {
3120 RF_AutoConfig_t *ac;
3121 RF_ConfigSet_t *config_sets;
3122 RF_ConfigSet_t *cset;
3123 RF_AutoConfig_t *ac_next;
3124
3125
3126 config_sets = NULL;
3127
3128 /* Go through the AutoConfig list, and figure out which components
3129 belong to what sets. */
3130 ac = ac_list;
3131 while(ac!=NULL) {
3132 /* we're going to putz with ac->next, so save it here
3133 for use at the end of the loop */
3134 ac_next = ac->next;
3135
3136 if (config_sets == NULL) {
3137 /* will need at least this one... */
3138 config_sets = (RF_ConfigSet_t *)
3139 malloc(sizeof(RF_ConfigSet_t),
3140 M_RAIDFRAME, M_NOWAIT);
3141 if (config_sets == NULL) {
3142 panic("rf_create_auto_sets: No memory!");
3143 }
3144 /* this one is easy :) */
3145 config_sets->ac = ac;
3146 config_sets->next = NULL;
3147 config_sets->rootable = 0;
3148 ac->next = NULL;
3149 } else {
3150 /* which set does this component fit into? */
3151 cset = config_sets;
3152 while(cset!=NULL) {
3153 if (rf_does_it_fit(cset, ac)) {
3154 /* looks like it matches... */
3155 ac->next = cset->ac;
3156 cset->ac = ac;
3157 break;
3158 }
3159 cset = cset->next;
3160 }
3161 if (cset==NULL) {
3162 /* didn't find a match above... new set..*/
3163 cset = (RF_ConfigSet_t *)
3164 malloc(sizeof(RF_ConfigSet_t),
3165 M_RAIDFRAME, M_NOWAIT);
3166 if (cset == NULL) {
3167 panic("rf_create_auto_sets: No memory!");
3168 }
3169 cset->ac = ac;
3170 ac->next = NULL;
3171 cset->next = config_sets;
3172 cset->rootable = 0;
3173 config_sets = cset;
3174 }
3175 }
3176 ac = ac_next;
3177 }
3178
3179
3180 return(config_sets);
3181 }
3182
3183 static int
3184 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3185 {
3186 RF_ComponentLabel_t *clabel1, *clabel2;
3187
3188 /* If this one matches the *first* one in the set, that's good
3189 enough, since the other members of the set would have been
3190 through here too... */
3191 /* note that we are not checking partitionSize here..
3192
3193 Note that we are also not checking the mod_counters here.
3194 If everything else matches except the mod_counter, that's
3195 good enough for this test. We will deal with the mod_counters
3196 a little later in the autoconfiguration process.
3197
3198 (clabel1->mod_counter == clabel2->mod_counter) &&
3199
3200 The reason we don't check for this is that failed disks
3201 will have lower modification counts. If those disks are
3202 not added to the set they used to belong to, then they will
3203 form their own set, which may result in 2 different sets,
3204 for example, competing to be configured at raid0, and
3205 perhaps competing to be the root filesystem set. If the
3206 wrong ones get configured, or both attempt to become /,
3207 weird behaviour and or serious lossage will occur. Thus we
3208 need to bring them into the fold here, and kick them out at
3209 a later point.
3210
3211 */
3212
3213 clabel1 = cset->ac->clabel;
3214 clabel2 = ac->clabel;
3215 if ((clabel1->version == clabel2->version) &&
3216 (clabel1->serial_number == clabel2->serial_number) &&
3217 (clabel1->num_rows == clabel2->num_rows) &&
3218 (clabel1->num_columns == clabel2->num_columns) &&
3219 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3220 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3221 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3222 (clabel1->parityConfig == clabel2->parityConfig) &&
3223 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3224 (clabel1->blockSize == clabel2->blockSize) &&
3225 rf_component_label_numblocks(clabel1) ==
3226 rf_component_label_numblocks(clabel2) &&
3227 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3228 (clabel1->root_partition == clabel2->root_partition) &&
3229 (clabel1->last_unit == clabel2->last_unit) &&
3230 (clabel1->config_order == clabel2->config_order)) {
3231 /* if it get's here, it almost *has* to be a match */
3232 } else {
3233 /* it's not consistent with somebody in the set..
3234 punt */
3235 return(0);
3236 }
3237 /* all was fine.. it must fit... */
3238 return(1);
3239 }
3240
3241 int
3242 rf_have_enough_components(RF_ConfigSet_t *cset)
3243 {
3244 RF_AutoConfig_t *ac;
3245 RF_AutoConfig_t *auto_config;
3246 RF_ComponentLabel_t *clabel;
3247 int c;
3248 int num_cols;
3249 int num_missing;
3250 int mod_counter;
3251 int mod_counter_found;
3252 int even_pair_failed;
3253 char parity_type;
3254
3255
3256 /* check to see that we have enough 'live' components
3257 of this set. If so, we can configure it if necessary */
3258
3259 num_cols = cset->ac->clabel->num_columns;
3260 parity_type = cset->ac->clabel->parityConfig;
3261
3262 /* XXX Check for duplicate components!?!?!? */
3263
3264 /* Determine what the mod_counter is supposed to be for this set. */
3265
3266 mod_counter_found = 0;
3267 mod_counter = 0;
3268 ac = cset->ac;
3269 while(ac!=NULL) {
3270 if (mod_counter_found==0) {
3271 mod_counter = ac->clabel->mod_counter;
3272 mod_counter_found = 1;
3273 } else {
3274 if (ac->clabel->mod_counter > mod_counter) {
3275 mod_counter = ac->clabel->mod_counter;
3276 }
3277 }
3278 ac = ac->next;
3279 }
3280
3281 num_missing = 0;
3282 auto_config = cset->ac;
3283
3284 even_pair_failed = 0;
3285 for(c=0; c<num_cols; c++) {
3286 ac = auto_config;
3287 while(ac!=NULL) {
3288 if ((ac->clabel->column == c) &&
3289 (ac->clabel->mod_counter == mod_counter)) {
3290 /* it's this one... */
3291 #ifdef DEBUG
3292 printf("Found: %s at %d\n",
3293 ac->devname,c);
3294 #endif
3295 break;
3296 }
3297 ac=ac->next;
3298 }
3299 if (ac==NULL) {
3300 /* Didn't find one here! */
3301 /* special case for RAID 1, especially
3302 where there are more than 2
3303 components (where RAIDframe treats
3304 things a little differently :( ) */
3305 if (parity_type == '1') {
3306 if (c%2 == 0) { /* even component */
3307 even_pair_failed = 1;
3308 } else { /* odd component. If
3309 we're failed, and
3310 so is the even
3311 component, it's
3312 "Good Night, Charlie" */
3313 if (even_pair_failed == 1) {
3314 return(0);
3315 }
3316 }
3317 } else {
3318 /* normal accounting */
3319 num_missing++;
3320 }
3321 }
3322 if ((parity_type == '1') && (c%2 == 1)) {
3323 /* Just did an even component, and we didn't
3324 bail.. reset the even_pair_failed flag,
3325 and go on to the next component.... */
3326 even_pair_failed = 0;
3327 }
3328 }
3329
3330 clabel = cset->ac->clabel;
3331
3332 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3333 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3334 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3335 /* XXX this needs to be made *much* more general */
3336 /* Too many failures */
3337 return(0);
3338 }
3339 /* otherwise, all is well, and we've got enough to take a kick
3340 at autoconfiguring this set */
3341 return(1);
3342 }
3343
3344 void
3345 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3346 RF_Raid_t *raidPtr)
3347 {
3348 RF_ComponentLabel_t *clabel;
3349 int i;
3350
3351 clabel = ac->clabel;
3352
3353 /* 1. Fill in the common stuff */
3354 config->numCol = clabel->num_columns;
3355 config->numSpare = 0; /* XXX should this be set here? */
3356 config->sectPerSU = clabel->sectPerSU;
3357 config->SUsPerPU = clabel->SUsPerPU;
3358 config->SUsPerRU = clabel->SUsPerRU;
3359 config->parityConfig = clabel->parityConfig;
3360 /* XXX... */
3361 strcpy(config->diskQueueType,"fifo");
3362 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3363 config->layoutSpecificSize = 0; /* XXX ?? */
3364
3365 while(ac!=NULL) {
3366 /* row/col values will be in range due to the checks
3367 in reasonable_label() */
3368 strcpy(config->devnames[0][ac->clabel->column],
3369 ac->devname);
3370 ac = ac->next;
3371 }
3372
3373 for(i=0;i<RF_MAXDBGV;i++) {
3374 config->debugVars[i][0] = 0;
3375 }
3376 }
3377
3378 int
3379 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3380 {
3381 RF_ComponentLabel_t *clabel;
3382 int column;
3383 int sparecol;
3384
3385 raidPtr->autoconfigure = new_value;
3386
3387 for(column=0; column<raidPtr->numCol; column++) {
3388 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3389 clabel = raidget_component_label(raidPtr, column);
3390 clabel->autoconfigure = new_value;
3391 raidflush_component_label(raidPtr, column);
3392 }
3393 }
3394 for(column = 0; column < raidPtr->numSpare ; column++) {
3395 sparecol = raidPtr->numCol + column;
3396 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3397 clabel = raidget_component_label(raidPtr, sparecol);
3398 clabel->autoconfigure = new_value;
3399 raidflush_component_label(raidPtr, sparecol);
3400 }
3401 }
3402 return(new_value);
3403 }
3404
3405 int
3406 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3407 {
3408 RF_ComponentLabel_t *clabel;
3409 int column;
3410 int sparecol;
3411
3412 raidPtr->root_partition = new_value;
3413 for(column=0; column<raidPtr->numCol; column++) {
3414 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3415 clabel = raidget_component_label(raidPtr, column);
3416 clabel->root_partition = new_value;
3417 raidflush_component_label(raidPtr, column);
3418 }
3419 }
3420 for(column = 0; column < raidPtr->numSpare ; column++) {
3421 sparecol = raidPtr->numCol + column;
3422 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3423 clabel = raidget_component_label(raidPtr, sparecol);
3424 clabel->root_partition = new_value;
3425 raidflush_component_label(raidPtr, sparecol);
3426 }
3427 }
3428 return(new_value);
3429 }
3430
3431 void
3432 rf_release_all_vps(RF_ConfigSet_t *cset)
3433 {
3434 RF_AutoConfig_t *ac;
3435
3436 ac = cset->ac;
3437 while(ac!=NULL) {
3438 /* Close the vp, and give it back */
3439 if (ac->vp) {
3440 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3441 VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
3442 vput(ac->vp);
3443 ac->vp = NULL;
3444 }
3445 ac = ac->next;
3446 }
3447 }
3448
3449
3450 void
3451 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3452 {
3453 RF_AutoConfig_t *ac;
3454 RF_AutoConfig_t *next_ac;
3455
3456 ac = cset->ac;
3457 while(ac!=NULL) {
3458 next_ac = ac->next;
3459 /* nuke the label */
3460 free(ac->clabel, M_RAIDFRAME);
3461 /* cleanup the config structure */
3462 free(ac, M_RAIDFRAME);
3463 /* "next.." */
3464 ac = next_ac;
3465 }
3466 /* and, finally, nuke the config set */
3467 free(cset, M_RAIDFRAME);
3468 }
3469
3470
3471 void
3472 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3473 {
3474 /* current version number */
3475 clabel->version = RF_COMPONENT_LABEL_VERSION;
3476 clabel->serial_number = raidPtr->serial_number;
3477 clabel->mod_counter = raidPtr->mod_counter;
3478
3479 clabel->num_rows = 1;
3480 clabel->num_columns = raidPtr->numCol;
3481 clabel->clean = RF_RAID_DIRTY; /* not clean */
3482 clabel->status = rf_ds_optimal; /* "It's good!" */
3483
3484 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3485 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3486 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3487
3488 clabel->blockSize = raidPtr->bytesPerSector;
3489 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3490
3491 /* XXX not portable */
3492 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3493 clabel->maxOutstanding = raidPtr->maxOutstanding;
3494 clabel->autoconfigure = raidPtr->autoconfigure;
3495 clabel->root_partition = raidPtr->root_partition;
3496 clabel->last_unit = raidPtr->raidid;
3497 clabel->config_order = raidPtr->config_order;
3498
3499 #ifndef RF_NO_PARITY_MAP
3500 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3501 #endif
3502 }
3503
3504 struct raid_softc *
3505 rf_auto_config_set(RF_ConfigSet_t *cset)
3506 {
3507 RF_Raid_t *raidPtr;
3508 RF_Config_t *config;
3509 int raidID;
3510 struct raid_softc *sc;
3511
3512 #ifdef DEBUG
3513 printf("RAID autoconfigure\n");
3514 #endif
3515
3516 /* 1. Create a config structure */
3517 config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO);
3518 if (config == NULL) {
3519 printf("%s: Out of mem - config!?!?\n", __func__);
3520 /* XXX do something more intelligent here. */
3521 return NULL;
3522 }
3523
3524 /*
3525 2. Figure out what RAID ID this one is supposed to live at
3526 See if we can get the same RAID dev that it was configured
3527 on last time..
3528 */
3529
3530 raidID = cset->ac->clabel->last_unit;
3531 for (sc = raidget(raidID, false); sc && sc->sc_r.valid != 0;
3532 sc = raidget(++raidID, false))
3533 continue;
3534 #ifdef DEBUG
3535 printf("Configuring raid%d:\n",raidID);
3536 #endif
3537
3538 if (sc == NULL)
3539 sc = raidget(raidID, true);
3540 if (sc == NULL) {
3541 printf("%s: Out of mem - softc!?!?\n", __func__);
3542 /* XXX do something more intelligent here. */
3543 free(config, M_RAIDFRAME);
3544 return NULL;
3545 }
3546
3547 raidPtr = &sc->sc_r;
3548
3549 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3550 raidPtr->softc = sc;
3551 raidPtr->raidid = raidID;
3552 raidPtr->openings = RAIDOUTSTANDING;
3553
3554 /* 3. Build the configuration structure */
3555 rf_create_configuration(cset->ac, config, raidPtr);
3556
3557 /* 4. Do the configuration */
3558 if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3559 raidinit(sc);
3560
3561 rf_markalldirty(raidPtr);
3562 raidPtr->autoconfigure = 1; /* XXX do this here? */
3563 switch (cset->ac->clabel->root_partition) {
3564 case 1: /* Force Root */
3565 case 2: /* Soft Root: root when boot partition part of raid */
3566 /*
3567 * everything configured just fine. Make a note
3568 * that this set is eligible to be root,
3569 * or forced to be root
3570 */
3571 cset->rootable = cset->ac->clabel->root_partition;
3572 /* XXX do this here? */
3573 raidPtr->root_partition = cset->rootable;
3574 break;
3575 default:
3576 break;
3577 }
3578 } else {
3579 raidput(sc);
3580 sc = NULL;
3581 }
3582
3583 /* 5. Cleanup */
3584 free(config, M_RAIDFRAME);
3585 return sc;
3586 }
3587
3588 void
3589 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3590 size_t xmin, size_t xmax)
3591 {
3592 int error;
3593
3594 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3595 pool_sethiwat(p, xmax);
3596 if ((error = pool_prime(p, xmin)) != 0)
3597 panic("%s: failed to prime pool: %d", __func__, error);
3598 pool_setlowat(p, xmin);
3599 }
3600
3601 /*
3602 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buffer queue
3603 * to see if there is IO pending and if that IO could possibly be done
3604 * for a given RAID set. Returns 0 if IO is waiting and can be done, 1
3605 * otherwise.
3606 *
3607 */
3608 int
3609 rf_buf_queue_check(RF_Raid_t *raidPtr)
3610 {
3611 struct raid_softc *rs;
3612 struct dk_softc *dksc;
3613
3614 rs = raidPtr->softc;
3615 dksc = &rs->sc_dksc;
3616
3617 if ((rs->sc_flags & RAIDF_INITED) == 0)
3618 return 1;
3619
3620 if (dk_strategy_pending(dksc) && raidPtr->openings > 0) {
3621 /* there is work to do */
3622 return 0;
3623 }
3624 /* default is nothing to do */
3625 return 1;
3626 }
3627
3628 int
3629 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3630 {
3631 uint64_t numsecs;
3632 unsigned secsize;
3633 int error;
3634
3635 error = getdisksize(vp, &numsecs, &secsize);
3636 if (error == 0) {
3637 diskPtr->blockSize = secsize;
3638 diskPtr->numBlocks = numsecs - rf_protectedSectors;
3639 diskPtr->partitionSize = numsecs;
3640 return 0;
3641 }
3642 return error;
3643 }
3644
3645 static int
3646 raid_match(device_t self, cfdata_t cfdata, void *aux)
3647 {
3648 return 1;
3649 }
3650
3651 static void
3652 raid_attach(device_t parent, device_t self, void *aux)
3653 {
3654 }
3655
3656
3657 static int
3658 raid_detach(device_t self, int flags)
3659 {
3660 int error;
3661 struct raid_softc *rs = raidsoftc(self);
3662
3663 if (rs == NULL)
3664 return ENXIO;
3665
3666 if ((error = raidlock(rs)) != 0)
3667 return (error);
3668
3669 error = raid_detach_unlocked(rs);
3670
3671 raidunlock(rs);
3672
3673 /* XXX raid can be referenced here */
3674
3675 if (error)
3676 return error;
3677
3678 /* Free the softc */
3679 raidput(rs);
3680
3681 return 0;
3682 }
3683
3684 static void
3685 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
3686 {
3687 struct dk_softc *dksc = &rs->sc_dksc;
3688 struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
3689
3690 memset(dg, 0, sizeof(*dg));
3691
3692 dg->dg_secperunit = raidPtr->totalSectors;
3693 dg->dg_secsize = raidPtr->bytesPerSector;
3694 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
3695 dg->dg_ntracks = 4 * raidPtr->numCol;
3696
3697 disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, NULL);
3698 }
3699
3700 /*
3701 * Get cache info for all the components (including spares).
3702 * Returns intersection of all the cache flags of all disks, or first
3703 * error if any encountered.
3704 * XXXfua feature flags can change as spares are added - lock down somehow
3705 */
3706 static int
3707 rf_get_component_caches(RF_Raid_t *raidPtr, int *data)
3708 {
3709 int c;
3710 int error;
3711 int dkwhole = 0, dkpart;
3712
3713 for (c = 0; c < raidPtr->numCol + raidPtr->numSpare; c++) {
3714 /*
3715 * Check any non-dead disk, even when currently being
3716 * reconstructed.
3717 */
3718 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)
3719 || raidPtr->Disks[c].status == rf_ds_reconstructing) {
3720 error = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp,
3721 DIOCGCACHE, &dkpart, FREAD, NOCRED);
3722 if (error) {
3723 if (error != ENODEV) {
3724 printf("raid%d: get cache for component %s failed\n",
3725 raidPtr->raidid,
3726 raidPtr->Disks[c].devname);
3727 }
3728
3729 return error;
3730 }
3731
3732 if (c == 0)
3733 dkwhole = dkpart;
3734 else
3735 dkwhole = DKCACHE_COMBINE(dkwhole, dkpart);
3736 }
3737 }
3738
3739 *data = dkwhole;
3740
3741 return 0;
3742 }
3743
3744 /*
3745 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3746 * We end up returning whatever error was returned by the first cache flush
3747 * that fails.
3748 */
3749
3750 int
3751 rf_sync_component_caches(RF_Raid_t *raidPtr)
3752 {
3753 int c, sparecol;
3754 int e,error;
3755 int force = 1;
3756
3757 error = 0;
3758 for (c = 0; c < raidPtr->numCol; c++) {
3759 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3760 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3761 &force, FWRITE, NOCRED);
3762 if (e) {
3763 if (e != ENODEV)
3764 printf("raid%d: cache flush to component %s failed.\n",
3765 raidPtr->raidid, raidPtr->Disks[c].devname);
3766 if (error == 0) {
3767 error = e;
3768 }
3769 }
3770 }
3771 }
3772
3773 for( c = 0; c < raidPtr->numSpare ; c++) {
3774 sparecol = raidPtr->numCol + c;
3775 /* Need to ensure that the reconstruct actually completed! */
3776 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3777 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3778 DIOCCACHESYNC, &force, FWRITE, NOCRED);
3779 if (e) {
3780 if (e != ENODEV)
3781 printf("raid%d: cache flush to component %s failed.\n",
3782 raidPtr->raidid, raidPtr->Disks[sparecol].devname);
3783 if (error == 0) {
3784 error = e;
3785 }
3786 }
3787 }
3788 }
3789 return error;
3790 }
3791
3792 /* Fill in info with the current status */
3793 void
3794 rf_check_recon_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3795 {
3796
3797 if (raidPtr->status != rf_rs_reconstructing) {
3798 info->total = 100;
3799 info->completed = 100;
3800 } else {
3801 info->total = raidPtr->reconControl->numRUsTotal;
3802 info->completed = raidPtr->reconControl->numRUsComplete;
3803 }
3804 info->remaining = info->total - info->completed;
3805 }
3806
3807 /* Fill in info with the current status */
3808 void
3809 rf_check_parityrewrite_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3810 {
3811
3812 if (raidPtr->parity_rewrite_in_progress == 1) {
3813 info->total = raidPtr->Layout.numStripe;
3814 info->completed = raidPtr->parity_rewrite_stripes_done;
3815 } else {
3816 info->completed = 100;
3817 info->total = 100;
3818 }
3819 info->remaining = info->total - info->completed;
3820 }
3821
3822 /* Fill in info with the current status */
3823 void
3824 rf_check_copyback_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3825 {
3826
3827 if (raidPtr->copyback_in_progress == 1) {
3828 info->total = raidPtr->Layout.numStripe;
3829 info->completed = raidPtr->copyback_stripes_done;
3830 info->remaining = info->total - info->completed;
3831 } else {
3832 info->remaining = 0;
3833 info->completed = 100;
3834 info->total = 100;
3835 }
3836 }
3837
3838 /* Fill in config with the current info */
3839 int
3840 rf_get_info(RF_Raid_t *raidPtr, RF_DeviceConfig_t *config)
3841 {
3842 int d, i, j;
3843
3844 if (!raidPtr->valid)
3845 return (ENODEV);
3846 config->cols = raidPtr->numCol;
3847 config->ndevs = raidPtr->numCol;
3848 if (config->ndevs >= RF_MAX_DISKS)
3849 return (ENOMEM);
3850 config->nspares = raidPtr->numSpare;
3851 if (config->nspares >= RF_MAX_DISKS)
3852 return (ENOMEM);
3853 config->maxqdepth = raidPtr->maxQueueDepth;
3854 d = 0;
3855 for (j = 0; j < config->cols; j++) {
3856 config->devs[d] = raidPtr->Disks[j];
3857 d++;
3858 }
3859 for (j = config->cols, i = 0; i < config->nspares; i++, j++) {
3860 config->spares[i] = raidPtr->Disks[j];
3861 if (config->spares[i].status == rf_ds_rebuilding_spare) {
3862 /* XXX: raidctl(8) expects to see this as a used spare */
3863 config->spares[i].status = rf_ds_used_spare;
3864 }
3865 }
3866 return 0;
3867 }
3868
3869 int
3870 rf_get_component_label(RF_Raid_t *raidPtr, void *data)
3871 {
3872 RF_ComponentLabel_t *clabel = (RF_ComponentLabel_t *)data;
3873 RF_ComponentLabel_t *raid_clabel;
3874 int column = clabel->column;
3875
3876 if ((column < 0) || (column >= raidPtr->numCol + raidPtr->numSpare))
3877 return EINVAL;
3878 raid_clabel = raidget_component_label(raidPtr, column);
3879 memcpy(clabel, raid_clabel, sizeof *clabel);
3880
3881 return 0;
3882 }
3883
3884 /*
3885 * Module interface
3886 */
3887
3888 MODULE(MODULE_CLASS_DRIVER, raid, "dk_subr,bufq_fcfs");
3889
3890 #ifdef _MODULE
3891 CFDRIVER_DECL(raid, DV_DISK, NULL);
3892 #endif
3893
3894 static int raid_modcmd(modcmd_t, void *);
3895 static int raid_modcmd_init(void);
3896 static int raid_modcmd_fini(void);
3897
3898 static int
3899 raid_modcmd(modcmd_t cmd, void *data)
3900 {
3901 int error;
3902
3903 error = 0;
3904 switch (cmd) {
3905 case MODULE_CMD_INIT:
3906 error = raid_modcmd_init();
3907 break;
3908 case MODULE_CMD_FINI:
3909 error = raid_modcmd_fini();
3910 break;
3911 default:
3912 error = ENOTTY;
3913 break;
3914 }
3915 return error;
3916 }
3917
3918 static int
3919 raid_modcmd_init(void)
3920 {
3921 int error;
3922 int bmajor, cmajor;
3923
3924 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
3925 mutex_enter(&raid_lock);
3926 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
3927 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
3928 rf_init_cond2(rf_sparet_wait_cv, "sparetw");
3929 rf_init_cond2(rf_sparet_resp_cv, "rfgst");
3930
3931 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
3932 #endif
3933
3934 bmajor = cmajor = -1;
3935 error = devsw_attach("raid", &raid_bdevsw, &bmajor,
3936 &raid_cdevsw, &cmajor);
3937 if (error != 0 && error != EEXIST) {
3938 aprint_error("%s: devsw_attach failed %d\n", __func__, error);
3939 mutex_exit(&raid_lock);
3940 return error;
3941 }
3942 #ifdef _MODULE
3943 error = config_cfdriver_attach(&raid_cd);
3944 if (error != 0) {
3945 aprint_error("%s: config_cfdriver_attach failed %d\n",
3946 __func__, error);
3947 devsw_detach(&raid_bdevsw, &raid_cdevsw);
3948 mutex_exit(&raid_lock);
3949 return error;
3950 }
3951 #endif
3952 error = config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3953 if (error != 0) {
3954 aprint_error("%s: config_cfattach_attach failed %d\n",
3955 __func__, error);
3956 #ifdef _MODULE
3957 config_cfdriver_detach(&raid_cd);
3958 #endif
3959 devsw_detach(&raid_bdevsw, &raid_cdevsw);
3960 mutex_exit(&raid_lock);
3961 return error;
3962 }
3963
3964 raidautoconfigdone = false;
3965
3966 mutex_exit(&raid_lock);
3967
3968 if (error == 0) {
3969 if (rf_BootRaidframe(true) == 0)
3970 aprint_verbose("Kernelized RAIDframe activated\n");
3971 else
3972 panic("Serious error activating RAID!!");
3973 }
3974
3975 /*
3976 * Register a finalizer which will be used to auto-config RAID
3977 * sets once all real hardware devices have been found.
3978 */
3979 error = config_finalize_register(NULL, rf_autoconfig);
3980 if (error != 0) {
3981 aprint_error("WARNING: unable to register RAIDframe "
3982 "finalizer\n");
3983 error = 0;
3984 }
3985
3986 return error;
3987 }
3988
3989 static int
3990 raid_modcmd_fini(void)
3991 {
3992 int error;
3993
3994 mutex_enter(&raid_lock);
3995
3996 /* Don't allow unload if raid device(s) exist. */
3997 if (!LIST_EMPTY(&raids)) {
3998 mutex_exit(&raid_lock);
3999 return EBUSY;
4000 }
4001
4002 error = config_cfattach_detach(raid_cd.cd_name, &raid_ca);
4003 if (error != 0) {
4004 aprint_error("%s: cannot detach cfattach\n",__func__);
4005 mutex_exit(&raid_lock);
4006 return error;
4007 }
4008 #ifdef _MODULE
4009 error = config_cfdriver_detach(&raid_cd);
4010 if (error != 0) {
4011 aprint_error("%s: cannot detach cfdriver\n",__func__);
4012 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
4013 mutex_exit(&raid_lock);
4014 return error;
4015 }
4016 #endif
4017 error = devsw_detach(&raid_bdevsw, &raid_cdevsw);
4018 if (error != 0) {
4019 aprint_error("%s: cannot detach devsw\n",__func__);
4020 #ifdef _MODULE
4021 config_cfdriver_attach(&raid_cd);
4022 #endif
4023 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
4024 mutex_exit(&raid_lock);
4025 return error;
4026 }
4027 rf_BootRaidframe(false);
4028 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
4029 rf_destroy_mutex2(rf_sparet_wait_mutex);
4030 rf_destroy_cond2(rf_sparet_wait_cv);
4031 rf_destroy_cond2(rf_sparet_resp_cv);
4032 #endif
4033 mutex_exit(&raid_lock);
4034 mutex_destroy(&raid_lock);
4035
4036 return error;
4037 }
4038