rf_netbsdkintf.c revision 1.400 1 /* $NetBSD: rf_netbsdkintf.c,v 1.400 2021/08/28 16:00:52 oster Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: cd.c 1.6 90/11/28$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
89 * School of Computer Science
90 * Carnegie Mellon University
91 * Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97 /***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.400 2021/08/28 16:00:52 oster Exp $");
105
106 #ifdef _KERNEL_OPT
107 #include "opt_raid_autoconfig.h"
108 #include "opt_compat_netbsd32.h"
109 #endif
110
111 #include <sys/param.h>
112 #include <sys/errno.h>
113 #include <sys/pool.h>
114 #include <sys/proc.h>
115 #include <sys/queue.h>
116 #include <sys/disk.h>
117 #include <sys/device.h>
118 #include <sys/stat.h>
119 #include <sys/ioctl.h>
120 #include <sys/fcntl.h>
121 #include <sys/systm.h>
122 #include <sys/vnode.h>
123 #include <sys/disklabel.h>
124 #include <sys/conf.h>
125 #include <sys/buf.h>
126 #include <sys/bufq.h>
127 #include <sys/reboot.h>
128 #include <sys/kauth.h>
129 #include <sys/module.h>
130 #include <sys/compat_stub.h>
131
132 #include <prop/proplib.h>
133
134 #include <dev/raidframe/raidframevar.h>
135 #include <dev/raidframe/raidframeio.h>
136 #include <dev/raidframe/rf_paritymap.h>
137
138 #include "rf_raid.h"
139 #include "rf_copyback.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_desc.h"
143 #include "rf_diskqueue.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_kintf.h"
147 #include "rf_options.h"
148 #include "rf_driver.h"
149 #include "rf_parityscan.h"
150 #include "rf_threadstuff.h"
151
152 #include "ioconf.h"
153
154 #ifdef DEBUG
155 int rf_kdebug_level = 0;
156 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
157 #else /* DEBUG */
158 #define db1_printf(a) { }
159 #endif /* DEBUG */
160
161 #ifdef DEBUG_ROOT
162 #define DPRINTF(a, ...) printf(a, __VA_ARGS__)
163 #else
164 #define DPRINTF(a, ...)
165 #endif
166
167 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
168 static rf_declare_mutex2(rf_sparet_wait_mutex);
169 static rf_declare_cond2(rf_sparet_wait_cv);
170 static rf_declare_cond2(rf_sparet_resp_cv);
171
172 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
173 * spare table */
174 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
175 * installation process */
176 #endif
177
178 const int rf_b_pass = (B_PHYS|B_RAW|B_MEDIA_FLAGS);
179
180 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
181
182 /* prototypes */
183 static void KernelWakeupFunc(struct buf *);
184 static void InitBP(struct buf *, struct vnode *, unsigned,
185 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
186 void *, int);
187 static void raidinit(struct raid_softc *);
188 static int raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp);
189 static int rf_get_component_caches(RF_Raid_t *raidPtr, int *);
190
191 static int raid_match(device_t, cfdata_t, void *);
192 static void raid_attach(device_t, device_t, void *);
193 static int raid_detach(device_t, int);
194
195 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
196 daddr_t, daddr_t);
197 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
198 daddr_t, daddr_t, int);
199
200 static int raidwrite_component_label(unsigned,
201 dev_t, struct vnode *, RF_ComponentLabel_t *);
202 static int raidread_component_label(unsigned,
203 dev_t, struct vnode *, RF_ComponentLabel_t *);
204
205 static int raid_diskstart(device_t, struct buf *bp);
206 static int raid_dumpblocks(device_t, void *, daddr_t, int);
207 static int raid_lastclose(device_t);
208
209 static dev_type_open(raidopen);
210 static dev_type_close(raidclose);
211 static dev_type_read(raidread);
212 static dev_type_write(raidwrite);
213 static dev_type_ioctl(raidioctl);
214 static dev_type_strategy(raidstrategy);
215 static dev_type_dump(raiddump);
216 static dev_type_size(raidsize);
217
218 const struct bdevsw raid_bdevsw = {
219 .d_open = raidopen,
220 .d_close = raidclose,
221 .d_strategy = raidstrategy,
222 .d_ioctl = raidioctl,
223 .d_dump = raiddump,
224 .d_psize = raidsize,
225 .d_discard = nodiscard,
226 .d_flag = D_DISK
227 };
228
229 const struct cdevsw raid_cdevsw = {
230 .d_open = raidopen,
231 .d_close = raidclose,
232 .d_read = raidread,
233 .d_write = raidwrite,
234 .d_ioctl = raidioctl,
235 .d_stop = nostop,
236 .d_tty = notty,
237 .d_poll = nopoll,
238 .d_mmap = nommap,
239 .d_kqfilter = nokqfilter,
240 .d_discard = nodiscard,
241 .d_flag = D_DISK
242 };
243
244 static struct dkdriver rf_dkdriver = {
245 .d_open = raidopen,
246 .d_close = raidclose,
247 .d_strategy = raidstrategy,
248 .d_diskstart = raid_diskstart,
249 .d_dumpblocks = raid_dumpblocks,
250 .d_lastclose = raid_lastclose,
251 .d_minphys = minphys
252 };
253
254 #define raidunit(x) DISKUNIT(x)
255 #define raidsoftc(dev) (((struct raid_softc *)device_private(dev))->sc_r.softc)
256
257 extern struct cfdriver raid_cd;
258 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
259 raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
260 DVF_DETACH_SHUTDOWN);
261
262 /* Internal representation of a rf_recon_req */
263 struct rf_recon_req_internal {
264 RF_RowCol_t col;
265 RF_ReconReqFlags_t flags;
266 void *raidPtr;
267 };
268
269 /*
270 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
271 * Be aware that large numbers can allow the driver to consume a lot of
272 * kernel memory, especially on writes, and in degraded mode reads.
273 *
274 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
275 * a single 64K write will typically require 64K for the old data,
276 * 64K for the old parity, and 64K for the new parity, for a total
277 * of 192K (if the parity buffer is not re-used immediately).
278 * Even it if is used immediately, that's still 128K, which when multiplied
279 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
280 *
281 * Now in degraded mode, for example, a 64K read on the above setup may
282 * require data reconstruction, which will require *all* of the 4 remaining
283 * disks to participate -- 4 * 32K/disk == 128K again.
284 */
285
286 #ifndef RAIDOUTSTANDING
287 #define RAIDOUTSTANDING 6
288 #endif
289
290 #define RAIDLABELDEV(dev) \
291 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
292
293 /* declared here, and made public, for the benefit of KVM stuff.. */
294
295 static int raidlock(struct raid_softc *);
296 static void raidunlock(struct raid_softc *);
297
298 static int raid_detach_unlocked(struct raid_softc *);
299
300 static void rf_markalldirty(RF_Raid_t *);
301 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
302
303 static void rf_ReconThread(struct rf_recon_req_internal *);
304 static void rf_RewriteParityThread(RF_Raid_t *raidPtr);
305 static void rf_CopybackThread(RF_Raid_t *raidPtr);
306 static void rf_ReconstructInPlaceThread(struct rf_recon_req_internal *);
307 static int rf_autoconfig(device_t);
308 static int rf_rescan(void);
309 static void rf_buildroothack(RF_ConfigSet_t *);
310
311 static RF_AutoConfig_t *rf_find_raid_components(void);
312 static RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
313 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
314 static void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
315 static int rf_set_autoconfig(RF_Raid_t *, int);
316 static int rf_set_rootpartition(RF_Raid_t *, int);
317 static void rf_release_all_vps(RF_ConfigSet_t *);
318 static void rf_cleanup_config_set(RF_ConfigSet_t *);
319 static int rf_have_enough_components(RF_ConfigSet_t *);
320 static struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
321 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
322
323 /*
324 * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
325 * Note that this is overridden by having RAID_AUTOCONFIG as an option
326 * in the kernel config file.
327 */
328 #ifdef RAID_AUTOCONFIG
329 int raidautoconfig = 1;
330 #else
331 int raidautoconfig = 0;
332 #endif
333 static bool raidautoconfigdone = false;
334
335 struct pool rf_alloclist_pool; /* AllocList */
336
337 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
338 static kmutex_t raid_lock;
339
340 static struct raid_softc *
341 raidcreate(int unit) {
342 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
343 sc->sc_unit = unit;
344 cv_init(&sc->sc_cv, "raidunit");
345 mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_NONE);
346 return sc;
347 }
348
349 static void
350 raiddestroy(struct raid_softc *sc) {
351 cv_destroy(&sc->sc_cv);
352 mutex_destroy(&sc->sc_mutex);
353 kmem_free(sc, sizeof(*sc));
354 }
355
356 static struct raid_softc *
357 raidget(int unit, bool create) {
358 struct raid_softc *sc;
359 if (unit < 0) {
360 #ifdef DIAGNOSTIC
361 panic("%s: unit %d!", __func__, unit);
362 #endif
363 return NULL;
364 }
365 mutex_enter(&raid_lock);
366 LIST_FOREACH(sc, &raids, sc_link) {
367 if (sc->sc_unit == unit) {
368 mutex_exit(&raid_lock);
369 return sc;
370 }
371 }
372 mutex_exit(&raid_lock);
373 if (!create)
374 return NULL;
375 sc = raidcreate(unit);
376 mutex_enter(&raid_lock);
377 LIST_INSERT_HEAD(&raids, sc, sc_link);
378 mutex_exit(&raid_lock);
379 return sc;
380 }
381
382 static void
383 raidput(struct raid_softc *sc) {
384 mutex_enter(&raid_lock);
385 LIST_REMOVE(sc, sc_link);
386 mutex_exit(&raid_lock);
387 raiddestroy(sc);
388 }
389
390 void
391 raidattach(int num)
392 {
393
394 /*
395 * Device attachment and associated initialization now occurs
396 * as part of the module initialization.
397 */
398 }
399
400 static int
401 rf_autoconfig(device_t self)
402 {
403 RF_AutoConfig_t *ac_list;
404 RF_ConfigSet_t *config_sets;
405
406 if (!raidautoconfig || raidautoconfigdone == true)
407 return 0;
408
409 /* XXX This code can only be run once. */
410 raidautoconfigdone = true;
411
412 #ifdef __HAVE_CPU_BOOTCONF
413 /*
414 * 0. find the boot device if needed first so we can use it later
415 * this needs to be done before we autoconfigure any raid sets,
416 * because if we use wedges we are not going to be able to open
417 * the boot device later
418 */
419 if (booted_device == NULL)
420 cpu_bootconf();
421 #endif
422 /* 1. locate all RAID components on the system */
423 aprint_debug("Searching for RAID components...\n");
424 ac_list = rf_find_raid_components();
425
426 /* 2. Sort them into their respective sets. */
427 config_sets = rf_create_auto_sets(ac_list);
428
429 /*
430 * 3. Evaluate each set and configure the valid ones.
431 * This gets done in rf_buildroothack().
432 */
433 rf_buildroothack(config_sets);
434
435 return 1;
436 }
437
438 int
439 rf_inited(const struct raid_softc *rs) {
440 return (rs->sc_flags & RAIDF_INITED) != 0;
441 }
442
443 RF_Raid_t *
444 rf_get_raid(struct raid_softc *rs) {
445 return &rs->sc_r;
446 }
447
448 int
449 rf_get_unit(const struct raid_softc *rs) {
450 return rs->sc_unit;
451 }
452
453 static int
454 rf_containsboot(RF_Raid_t *r, device_t bdv) {
455 const char *bootname;
456 size_t len;
457
458 /* if bdv is NULL, the set can't contain it. exit early. */
459 if (bdv == NULL)
460 return 0;
461
462 bootname = device_xname(bdv);
463 len = strlen(bootname);
464
465 for (int col = 0; col < r->numCol; col++) {
466 const char *devname = r->Disks[col].devname;
467 devname += sizeof("/dev/") - 1;
468 if (strncmp(devname, "dk", 2) == 0) {
469 const char *parent =
470 dkwedge_get_parent_name(r->Disks[col].dev);
471 if (parent != NULL)
472 devname = parent;
473 }
474 if (strncmp(devname, bootname, len) == 0) {
475 struct raid_softc *sc = r->softc;
476 aprint_debug("raid%d includes boot device %s\n",
477 sc->sc_unit, devname);
478 return 1;
479 }
480 }
481 return 0;
482 }
483
484 static int
485 rf_rescan(void)
486 {
487 RF_AutoConfig_t *ac_list;
488 RF_ConfigSet_t *config_sets, *cset, *next_cset;
489 struct raid_softc *sc;
490 int raid_added;
491
492 ac_list = rf_find_raid_components();
493 config_sets = rf_create_auto_sets(ac_list);
494
495 raid_added = 1;
496 while (raid_added > 0) {
497 raid_added = 0;
498 cset = config_sets;
499 while (cset != NULL) {
500 next_cset = cset->next;
501 if (rf_have_enough_components(cset) &&
502 cset->ac->clabel->autoconfigure == 1) {
503 sc = rf_auto_config_set(cset);
504 if (sc != NULL) {
505 aprint_debug("raid%d: configured ok, rootable %d\n",
506 sc->sc_unit, cset->rootable);
507 /* We added one RAID set */
508 raid_added++;
509 } else {
510 /* The autoconfig didn't work :( */
511 aprint_debug("Autoconfig failed\n");
512 rf_release_all_vps(cset);
513 }
514 } else {
515 /* we're not autoconfiguring this set...
516 release the associated resources */
517 rf_release_all_vps(cset);
518 }
519 /* cleanup */
520 rf_cleanup_config_set(cset);
521 cset = next_cset;
522 }
523 if (raid_added > 0) {
524 /* We added at least one RAID set, so re-scan for recursive RAID */
525 ac_list = rf_find_raid_components();
526 config_sets = rf_create_auto_sets(ac_list);
527 }
528 }
529
530 return 0;
531 }
532
533
534 static void
535 rf_buildroothack(RF_ConfigSet_t *config_sets)
536 {
537 RF_AutoConfig_t *ac_list;
538 RF_ConfigSet_t *cset;
539 RF_ConfigSet_t *next_cset;
540 int num_root;
541 int raid_added;
542 struct raid_softc *sc, *rsc;
543 struct dk_softc *dksc = NULL; /* XXX gcc -Os: may be used uninit. */
544
545 sc = rsc = NULL;
546 num_root = 0;
547
548 raid_added = 1;
549 while (raid_added > 0) {
550 raid_added = 0;
551 cset = config_sets;
552 while (cset != NULL) {
553 next_cset = cset->next;
554 if (rf_have_enough_components(cset) &&
555 cset->ac->clabel->autoconfigure == 1) {
556 sc = rf_auto_config_set(cset);
557 if (sc != NULL) {
558 aprint_debug("raid%d: configured ok, rootable %d\n",
559 sc->sc_unit, cset->rootable);
560 /* We added one RAID set */
561 raid_added++;
562 if (cset->rootable) {
563 rsc = sc;
564 num_root++;
565 }
566 } else {
567 /* The autoconfig didn't work :( */
568 aprint_debug("Autoconfig failed\n");
569 rf_release_all_vps(cset);
570 }
571 } else {
572 /* we're not autoconfiguring this set...
573 release the associated resources */
574 rf_release_all_vps(cset);
575 }
576 /* cleanup */
577 rf_cleanup_config_set(cset);
578 cset = next_cset;
579 }
580 if (raid_added > 0) {
581 /* We added at least one RAID set, so re-scan for recursive RAID */
582 ac_list = rf_find_raid_components();
583 config_sets = rf_create_auto_sets(ac_list);
584 }
585 }
586
587 /* if the user has specified what the root device should be
588 then we don't touch booted_device or boothowto... */
589
590 if (rootspec != NULL) {
591 DPRINTF("%s: rootspec %s\n", __func__, rootspec);
592 return;
593 }
594
595 /* we found something bootable... */
596
597 /*
598 * XXX: The following code assumes that the root raid
599 * is the first ('a') partition. This is about the best
600 * we can do with a BSD disklabel, but we might be able
601 * to do better with a GPT label, by setting a specified
602 * attribute to indicate the root partition. We can then
603 * stash the partition number in the r->root_partition
604 * high bits (the bottom 2 bits are already used). For
605 * now we just set booted_partition to 0 when we override
606 * root.
607 */
608 if (num_root == 1) {
609 device_t candidate_root;
610 dksc = &rsc->sc_dksc;
611 if (dksc->sc_dkdev.dk_nwedges != 0) {
612 char cname[sizeof(cset->ac->devname)];
613 /* XXX: assume partition 'a' first */
614 snprintf(cname, sizeof(cname), "%s%c",
615 device_xname(dksc->sc_dev), 'a');
616 candidate_root = dkwedge_find_by_wname(cname);
617 DPRINTF("%s: candidate wedge root=%s\n", __func__,
618 cname);
619 if (candidate_root == NULL) {
620 /*
621 * If that is not found, because we don't use
622 * disklabel, return the first dk child
623 * XXX: we can skip the 'a' check above
624 * and always do this...
625 */
626 size_t i = 0;
627 candidate_root = dkwedge_find_by_parent(
628 device_xname(dksc->sc_dev), &i);
629 }
630 DPRINTF("%s: candidate wedge root=%p\n", __func__,
631 candidate_root);
632 } else
633 candidate_root = dksc->sc_dev;
634 DPRINTF("%s: candidate root=%p\n", __func__, candidate_root);
635 DPRINTF("%s: booted_device=%p root_partition=%d "
636 "contains_boot=%d",
637 __func__, booted_device, rsc->sc_r.root_partition,
638 rf_containsboot(&rsc->sc_r, booted_device));
639 /* XXX the check for booted_device == NULL can probably be
640 * dropped, now that rf_containsboot handles that case.
641 */
642 if (booted_device == NULL ||
643 rsc->sc_r.root_partition == 1 ||
644 rf_containsboot(&rsc->sc_r, booted_device)) {
645 booted_device = candidate_root;
646 booted_method = "raidframe/single";
647 booted_partition = 0; /* XXX assume 'a' */
648 DPRINTF("%s: set booted_device=%s(%p)\n", __func__,
649 device_xname(booted_device), booted_device);
650 }
651 } else if (num_root > 1) {
652 DPRINTF("%s: many roots=%d, %p\n", __func__, num_root,
653 booted_device);
654
655 /*
656 * Maybe the MD code can help. If it cannot, then
657 * setroot() will discover that we have no
658 * booted_device and will ask the user if nothing was
659 * hardwired in the kernel config file
660 */
661 if (booted_device == NULL)
662 return;
663
664 num_root = 0;
665 mutex_enter(&raid_lock);
666 LIST_FOREACH(sc, &raids, sc_link) {
667 RF_Raid_t *r = &sc->sc_r;
668 if (r->valid == 0)
669 continue;
670
671 if (r->root_partition == 0)
672 continue;
673
674 if (rf_containsboot(r, booted_device)) {
675 num_root++;
676 rsc = sc;
677 dksc = &rsc->sc_dksc;
678 }
679 }
680 mutex_exit(&raid_lock);
681
682 if (num_root == 1) {
683 booted_device = dksc->sc_dev;
684 booted_method = "raidframe/multi";
685 booted_partition = 0; /* XXX assume 'a' */
686 } else {
687 /* we can't guess.. require the user to answer... */
688 boothowto |= RB_ASKNAME;
689 }
690 }
691 }
692
693 static int
694 raidsize(dev_t dev)
695 {
696 struct raid_softc *rs;
697 struct dk_softc *dksc;
698 unsigned int unit;
699
700 unit = raidunit(dev);
701 if ((rs = raidget(unit, false)) == NULL)
702 return -1;
703 dksc = &rs->sc_dksc;
704
705 if ((rs->sc_flags & RAIDF_INITED) == 0)
706 return -1;
707
708 return dk_size(dksc, dev);
709 }
710
711 static int
712 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
713 {
714 unsigned int unit;
715 struct raid_softc *rs;
716 struct dk_softc *dksc;
717
718 unit = raidunit(dev);
719 if ((rs = raidget(unit, false)) == NULL)
720 return ENXIO;
721 dksc = &rs->sc_dksc;
722
723 if ((rs->sc_flags & RAIDF_INITED) == 0)
724 return ENODEV;
725
726 /*
727 Note that blkno is relative to this particular partition.
728 By adding adding RF_PROTECTED_SECTORS, we get a value that
729 is relative to the partition used for the underlying component.
730 */
731 blkno += RF_PROTECTED_SECTORS;
732
733 return dk_dump(dksc, dev, blkno, va, size, DK_DUMP_RECURSIVE);
734 }
735
736 static int
737 raid_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk)
738 {
739 struct raid_softc *rs = raidsoftc(dev);
740 const struct bdevsw *bdev;
741 RF_Raid_t *raidPtr;
742 int c, sparecol, j, scol, dumpto;
743 int error = 0;
744
745 raidPtr = &rs->sc_r;
746
747 /* we only support dumping to RAID 1 sets */
748 if (raidPtr->Layout.numDataCol != 1 ||
749 raidPtr->Layout.numParityCol != 1)
750 return EINVAL;
751
752 if ((error = raidlock(rs)) != 0)
753 return error;
754
755 /* figure out what device is alive.. */
756
757 /*
758 Look for a component to dump to. The preference for the
759 component to dump to is as follows:
760 1) the first component
761 2) a used_spare of the first component
762 3) the second component
763 4) a used_spare of the second component
764 */
765
766 dumpto = -1;
767 for (c = 0; c < raidPtr->numCol; c++) {
768 if (raidPtr->Disks[c].status == rf_ds_optimal) {
769 /* this might be the one */
770 dumpto = c;
771 break;
772 }
773 }
774
775 /*
776 At this point we have possibly selected a live component.
777 If we didn't find a live ocmponent, we now check to see
778 if there is a relevant spared component.
779 */
780
781 for (c = 0; c < raidPtr->numSpare; c++) {
782 sparecol = raidPtr->numCol + c;
783 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
784 /* How about this one? */
785 scol = -1;
786 for(j=0;j<raidPtr->numCol;j++) {
787 if (raidPtr->Disks[j].spareCol == sparecol) {
788 scol = j;
789 break;
790 }
791 }
792 if (scol == 0) {
793 /*
794 We must have found a spared first
795 component! We'll take that over
796 anything else found so far. (We
797 couldn't have found a real first
798 component before, since this is a
799 used spare, and it's saying that
800 it's replacing the first
801 component.) On reboot (with
802 autoconfiguration turned on)
803 sparecol will become the first
804 component (component0) of this set.
805 */
806 dumpto = sparecol;
807 break;
808 } else if (scol != -1) {
809 /*
810 Must be a spared second component.
811 We'll dump to that if we havn't found
812 anything else so far.
813 */
814 if (dumpto == -1)
815 dumpto = sparecol;
816 }
817 }
818 }
819
820 if (dumpto == -1) {
821 /* we couldn't find any live components to dump to!?!?
822 */
823 error = EINVAL;
824 goto out;
825 }
826
827 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
828 if (bdev == NULL) {
829 error = ENXIO;
830 goto out;
831 }
832
833 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
834 blkno, va, nblk * raidPtr->bytesPerSector);
835
836 out:
837 raidunlock(rs);
838
839 return error;
840 }
841
842 /* ARGSUSED */
843 static int
844 raidopen(dev_t dev, int flags, int fmt,
845 struct lwp *l)
846 {
847 int unit = raidunit(dev);
848 struct raid_softc *rs;
849 struct dk_softc *dksc;
850 int error = 0;
851 int part, pmask;
852
853 if ((rs = raidget(unit, true)) == NULL)
854 return ENXIO;
855 if ((error = raidlock(rs)) != 0)
856 return error;
857
858 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
859 error = EBUSY;
860 goto bad;
861 }
862
863 dksc = &rs->sc_dksc;
864
865 part = DISKPART(dev);
866 pmask = (1 << part);
867
868 if (!DK_BUSY(dksc, pmask) &&
869 ((rs->sc_flags & RAIDF_INITED) != 0)) {
870 /* First one... mark things as dirty... Note that we *MUST*
871 have done a configure before this. I DO NOT WANT TO BE
872 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
873 THAT THEY BELONG TOGETHER!!!!! */
874 /* XXX should check to see if we're only open for reading
875 here... If so, we needn't do this, but then need some
876 other way of keeping track of what's happened.. */
877
878 rf_markalldirty(&rs->sc_r);
879 }
880
881 if ((rs->sc_flags & RAIDF_INITED) != 0)
882 error = dk_open(dksc, dev, flags, fmt, l);
883
884 bad:
885 raidunlock(rs);
886
887 return error;
888
889
890 }
891
892 static int
893 raid_lastclose(device_t self)
894 {
895 struct raid_softc *rs = raidsoftc(self);
896
897 /* Last one... device is not unconfigured yet.
898 Device shutdown has taken care of setting the
899 clean bits if RAIDF_INITED is not set
900 mark things as clean... */
901
902 rf_update_component_labels(&rs->sc_r,
903 RF_FINAL_COMPONENT_UPDATE);
904
905 /* pass to unlocked code */
906 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
907 rs->sc_flags |= RAIDF_DETACH;
908
909 return 0;
910 }
911
912 /* ARGSUSED */
913 static int
914 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
915 {
916 int unit = raidunit(dev);
917 struct raid_softc *rs;
918 struct dk_softc *dksc;
919 cfdata_t cf;
920 int error = 0, do_detach = 0, do_put = 0;
921
922 if ((rs = raidget(unit, false)) == NULL)
923 return ENXIO;
924 dksc = &rs->sc_dksc;
925
926 if ((error = raidlock(rs)) != 0)
927 return error;
928
929 if ((rs->sc_flags & RAIDF_INITED) != 0) {
930 error = dk_close(dksc, dev, flags, fmt, l);
931 if ((rs->sc_flags & RAIDF_DETACH) != 0)
932 do_detach = 1;
933 } else if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
934 do_put = 1;
935
936 raidunlock(rs);
937
938 if (do_detach) {
939 /* free the pseudo device attach bits */
940 cf = device_cfdata(dksc->sc_dev);
941 error = config_detach(dksc->sc_dev, 0);
942 if (error == 0)
943 free(cf, M_RAIDFRAME);
944 } else if (do_put) {
945 raidput(rs);
946 }
947
948 return error;
949
950 }
951
952 static void
953 raid_wakeup(RF_Raid_t *raidPtr)
954 {
955 rf_lock_mutex2(raidPtr->iodone_lock);
956 rf_signal_cond2(raidPtr->iodone_cv);
957 rf_unlock_mutex2(raidPtr->iodone_lock);
958 }
959
960 static void
961 raidstrategy(struct buf *bp)
962 {
963 unsigned int unit;
964 struct raid_softc *rs;
965 struct dk_softc *dksc;
966 RF_Raid_t *raidPtr;
967
968 unit = raidunit(bp->b_dev);
969 if ((rs = raidget(unit, false)) == NULL) {
970 bp->b_error = ENXIO;
971 goto fail;
972 }
973 if ((rs->sc_flags & RAIDF_INITED) == 0) {
974 bp->b_error = ENXIO;
975 goto fail;
976 }
977 dksc = &rs->sc_dksc;
978 raidPtr = &rs->sc_r;
979
980 /* Queue IO only */
981 if (dk_strategy_defer(dksc, bp))
982 goto done;
983
984 /* schedule the IO to happen at the next convenient time */
985 raid_wakeup(raidPtr);
986
987 done:
988 return;
989
990 fail:
991 bp->b_resid = bp->b_bcount;
992 biodone(bp);
993 }
994
995 static int
996 raid_diskstart(device_t dev, struct buf *bp)
997 {
998 struct raid_softc *rs = raidsoftc(dev);
999 RF_Raid_t *raidPtr;
1000
1001 raidPtr = &rs->sc_r;
1002 if (!raidPtr->valid) {
1003 db1_printf(("raid is not valid..\n"));
1004 return ENODEV;
1005 }
1006
1007 /* XXX */
1008 bp->b_resid = 0;
1009
1010 return raiddoaccess(raidPtr, bp);
1011 }
1012
1013 void
1014 raiddone(RF_Raid_t *raidPtr, struct buf *bp)
1015 {
1016 struct raid_softc *rs;
1017 struct dk_softc *dksc;
1018
1019 rs = raidPtr->softc;
1020 dksc = &rs->sc_dksc;
1021
1022 dk_done(dksc, bp);
1023
1024 rf_lock_mutex2(raidPtr->mutex);
1025 raidPtr->openings++;
1026 rf_unlock_mutex2(raidPtr->mutex);
1027
1028 /* schedule more IO */
1029 raid_wakeup(raidPtr);
1030 }
1031
1032 /* ARGSUSED */
1033 static int
1034 raidread(dev_t dev, struct uio *uio, int flags)
1035 {
1036 int unit = raidunit(dev);
1037 struct raid_softc *rs;
1038
1039 if ((rs = raidget(unit, false)) == NULL)
1040 return ENXIO;
1041
1042 if ((rs->sc_flags & RAIDF_INITED) == 0)
1043 return ENXIO;
1044
1045 return physio(raidstrategy, NULL, dev, B_READ, minphys, uio);
1046
1047 }
1048
1049 /* ARGSUSED */
1050 static int
1051 raidwrite(dev_t dev, struct uio *uio, int flags)
1052 {
1053 int unit = raidunit(dev);
1054 struct raid_softc *rs;
1055
1056 if ((rs = raidget(unit, false)) == NULL)
1057 return ENXIO;
1058
1059 if ((rs->sc_flags & RAIDF_INITED) == 0)
1060 return ENXIO;
1061
1062 return physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio);
1063
1064 }
1065
1066 static int
1067 raid_detach_unlocked(struct raid_softc *rs)
1068 {
1069 struct dk_softc *dksc = &rs->sc_dksc;
1070 RF_Raid_t *raidPtr;
1071 int error;
1072
1073 raidPtr = &rs->sc_r;
1074
1075 if (DK_BUSY(dksc, 0) ||
1076 raidPtr->recon_in_progress != 0 ||
1077 raidPtr->parity_rewrite_in_progress != 0 ||
1078 raidPtr->copyback_in_progress != 0)
1079 return EBUSY;
1080
1081 if ((rs->sc_flags & RAIDF_INITED) == 0)
1082 return 0;
1083
1084 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1085
1086 if ((error = rf_Shutdown(raidPtr)) != 0)
1087 return error;
1088
1089 rs->sc_flags &= ~RAIDF_INITED;
1090
1091 /* Kill off any queued buffers */
1092 dk_drain(dksc);
1093 bufq_free(dksc->sc_bufq);
1094
1095 /* Detach the disk. */
1096 dkwedge_delall(&dksc->sc_dkdev);
1097 disk_detach(&dksc->sc_dkdev);
1098 disk_destroy(&dksc->sc_dkdev);
1099 dk_detach(dksc);
1100
1101 return 0;
1102 }
1103
1104 static bool
1105 rf_must_be_initialized(const struct raid_softc *rs, u_long cmd)
1106 {
1107 switch (cmd) {
1108 case RAIDFRAME_ADD_HOT_SPARE:
1109 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1110 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1111 case RAIDFRAME_CHECK_PARITY:
1112 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1113 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1114 case RAIDFRAME_CHECK_RECON_STATUS:
1115 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1116 case RAIDFRAME_COPYBACK:
1117 case RAIDFRAME_DELETE_COMPONENT:
1118 case RAIDFRAME_FAIL_DISK:
1119 case RAIDFRAME_GET_ACCTOTALS:
1120 case RAIDFRAME_GET_COMPONENT_LABEL:
1121 case RAIDFRAME_GET_INFO:
1122 case RAIDFRAME_GET_SIZE:
1123 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1124 case RAIDFRAME_INIT_LABELS:
1125 case RAIDFRAME_KEEP_ACCTOTALS:
1126 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1127 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1128 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1129 case RAIDFRAME_PARITYMAP_STATUS:
1130 case RAIDFRAME_REBUILD_IN_PLACE:
1131 case RAIDFRAME_REMOVE_HOT_SPARE:
1132 case RAIDFRAME_RESET_ACCTOTALS:
1133 case RAIDFRAME_REWRITEPARITY:
1134 case RAIDFRAME_SET_AUTOCONFIG:
1135 case RAIDFRAME_SET_COMPONENT_LABEL:
1136 case RAIDFRAME_SET_ROOT:
1137 return (rs->sc_flags & RAIDF_INITED) == 0;
1138 }
1139 return false;
1140 }
1141
1142 int
1143 rf_fail_disk(RF_Raid_t *raidPtr, struct rf_recon_req *rr)
1144 {
1145 struct rf_recon_req_internal *rrint;
1146
1147 if (raidPtr->Layout.map->faultsTolerated == 0) {
1148 /* Can't do this on a RAID 0!! */
1149 return EINVAL;
1150 }
1151
1152 if (rr->col < 0 || rr->col >= raidPtr->numCol) {
1153 /* bad column */
1154 return EINVAL;
1155 }
1156
1157 rf_lock_mutex2(raidPtr->mutex);
1158 if (raidPtr->status == rf_rs_reconstructing) {
1159 /* you can't fail a disk while we're reconstructing! */
1160 /* XXX wrong for RAID6 */
1161 goto out;
1162 }
1163 if ((raidPtr->Disks[rr->col].status == rf_ds_optimal) &&
1164 (raidPtr->numFailures > 0)) {
1165 /* some other component has failed. Let's not make
1166 things worse. XXX wrong for RAID6 */
1167 goto out;
1168 }
1169 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1170 /* Can't fail a spared disk! */
1171 goto out;
1172 }
1173 rf_unlock_mutex2(raidPtr->mutex);
1174
1175 /* make a copy of the recon request so that we don't rely on
1176 * the user's buffer */
1177 rrint = RF_Malloc(sizeof(*rrint));
1178 if (rrint == NULL)
1179 return(ENOMEM);
1180 rrint->col = rr->col;
1181 rrint->flags = rr->flags;
1182 rrint->raidPtr = raidPtr;
1183
1184 return RF_CREATE_THREAD(raidPtr->recon_thread, rf_ReconThread,
1185 rrint, "raid_recon");
1186 out:
1187 rf_unlock_mutex2(raidPtr->mutex);
1188 return EINVAL;
1189 }
1190
1191 static int
1192 rf_copyinspecificbuf(RF_Config_t *k_cfg)
1193 {
1194 /* allocate a buffer for the layout-specific data, and copy it in */
1195 if (k_cfg->layoutSpecificSize == 0)
1196 return 0;
1197
1198 if (k_cfg->layoutSpecificSize > 10000) {
1199 /* sanity check */
1200 return EINVAL;
1201 }
1202
1203 u_char *specific_buf;
1204 specific_buf = RF_Malloc(k_cfg->layoutSpecificSize);
1205 if (specific_buf == NULL)
1206 return ENOMEM;
1207
1208 int retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1209 k_cfg->layoutSpecificSize);
1210 if (retcode) {
1211 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1212 db1_printf(("%s: retcode=%d copyin.2\n", __func__, retcode));
1213 return retcode;
1214 }
1215
1216 k_cfg->layoutSpecific = specific_buf;
1217 return 0;
1218 }
1219
1220 static int
1221 rf_getConfiguration(struct raid_softc *rs, void *data, RF_Config_t **k_cfg)
1222 {
1223 RF_Config_t *u_cfg = *((RF_Config_t **) data);
1224
1225 if (rs->sc_r.valid) {
1226 /* There is a valid RAID set running on this unit! */
1227 printf("raid%d: Device already configured!\n", rs->sc_unit);
1228 return EINVAL;
1229 }
1230
1231 /* copy-in the configuration information */
1232 /* data points to a pointer to the configuration structure */
1233 *k_cfg = RF_Malloc(sizeof(**k_cfg));
1234 if (*k_cfg == NULL) {
1235 return ENOMEM;
1236 }
1237 int retcode = copyin(u_cfg, *k_cfg, sizeof(RF_Config_t));
1238 if (retcode == 0)
1239 return 0;
1240 RF_Free(*k_cfg, sizeof(RF_Config_t));
1241 db1_printf(("%s: retcode=%d copyin.1\n", __func__, retcode));
1242 rs->sc_flags |= RAIDF_SHUTDOWN;
1243 return retcode;
1244 }
1245
1246 int
1247 rf_construct(struct raid_softc *rs, RF_Config_t *k_cfg)
1248 {
1249 int retcode;
1250 RF_Raid_t *raidPtr = &rs->sc_r;
1251
1252 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1253
1254 if ((retcode = rf_copyinspecificbuf(k_cfg)) != 0)
1255 goto out;
1256
1257 /* should do some kind of sanity check on the configuration.
1258 * Store the sum of all the bytes in the last byte? */
1259
1260 /* configure the system */
1261
1262 /*
1263 * Clear the entire RAID descriptor, just to make sure
1264 * there is no stale data left in the case of a
1265 * reconfiguration
1266 */
1267 memset(raidPtr, 0, sizeof(*raidPtr));
1268 raidPtr->softc = rs;
1269 raidPtr->raidid = rs->sc_unit;
1270
1271 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1272
1273 if (retcode == 0) {
1274 /* allow this many simultaneous IO's to
1275 this RAID device */
1276 raidPtr->openings = RAIDOUTSTANDING;
1277
1278 raidinit(rs);
1279 raid_wakeup(raidPtr);
1280 rf_markalldirty(raidPtr);
1281 }
1282
1283 /* free the buffers. No return code here. */
1284 if (k_cfg->layoutSpecificSize) {
1285 RF_Free(k_cfg->layoutSpecific, k_cfg->layoutSpecificSize);
1286 }
1287 out:
1288 RF_Free(k_cfg, sizeof(RF_Config_t));
1289 if (retcode) {
1290 /*
1291 * If configuration failed, set sc_flags so that we
1292 * will detach the device when we close it.
1293 */
1294 rs->sc_flags |= RAIDF_SHUTDOWN;
1295 }
1296 return retcode;
1297 }
1298
1299 #if RF_DISABLED
1300 static int
1301 rf_set_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
1302 {
1303
1304 /* XXX check the label for valid stuff... */
1305 /* Note that some things *should not* get modified --
1306 the user should be re-initing the labels instead of
1307 trying to patch things.
1308 */
1309 #ifdef DEBUG
1310 int raidid = raidPtr->raidid;
1311 printf("raid%d: Got component label:\n", raidid);
1312 printf("raid%d: Version: %d\n", raidid, clabel->version);
1313 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1314 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1315 printf("raid%d: Column: %d\n", raidid, clabel->column);
1316 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1317 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1318 printf("raid%d: Status: %d\n", raidid, clabel->status);
1319 #endif /* DEBUG */
1320 clabel->row = 0;
1321 int column = clabel->column;
1322
1323 if ((column < 0) || (column >= raidPtr->numCol)) {
1324 return(EINVAL);
1325 }
1326
1327 /* XXX this isn't allowed to do anything for now :-) */
1328
1329 /* XXX and before it is, we need to fill in the rest
1330 of the fields!?!?!?! */
1331 memcpy(raidget_component_label(raidPtr, column),
1332 clabel, sizeof(*clabel));
1333 raidflush_component_label(raidPtr, column);
1334 return 0;
1335 }
1336 #endif
1337
1338 static int
1339 rf_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
1340 {
1341 /*
1342 we only want the serial number from
1343 the above. We get all the rest of the information
1344 from the config that was used to create this RAID
1345 set.
1346 */
1347
1348 raidPtr->serial_number = clabel->serial_number;
1349
1350 for (int column = 0; column < raidPtr->numCol; column++) {
1351 RF_RaidDisk_t *diskPtr = &raidPtr->Disks[column];
1352 if (RF_DEAD_DISK(diskPtr->status))
1353 continue;
1354 RF_ComponentLabel_t *ci_label = raidget_component_label(
1355 raidPtr, column);
1356 /* Zeroing this is important. */
1357 memset(ci_label, 0, sizeof(*ci_label));
1358 raid_init_component_label(raidPtr, ci_label);
1359 ci_label->serial_number = raidPtr->serial_number;
1360 ci_label->row = 0; /* we dont' pretend to support more */
1361 rf_component_label_set_partitionsize(ci_label,
1362 diskPtr->partitionSize);
1363 ci_label->column = column;
1364 raidflush_component_label(raidPtr, column);
1365 /* XXXjld what about the spares? */
1366 }
1367
1368 return 0;
1369 }
1370
1371 static int
1372 rf_rebuild_in_place(RF_Raid_t *raidPtr, RF_SingleComponent_t *componentPtr)
1373 {
1374
1375 if (raidPtr->Layout.map->faultsTolerated == 0) {
1376 /* Can't do this on a RAID 0!! */
1377 return EINVAL;
1378 }
1379
1380 if (raidPtr->recon_in_progress == 1) {
1381 /* a reconstruct is already in progress! */
1382 return EINVAL;
1383 }
1384
1385 RF_SingleComponent_t component;
1386 memcpy(&component, componentPtr, sizeof(RF_SingleComponent_t));
1387 component.row = 0; /* we don't support any more */
1388 int column = component.column;
1389
1390 if ((column < 0) || (column >= raidPtr->numCol)) {
1391 return EINVAL;
1392 }
1393
1394 rf_lock_mutex2(raidPtr->mutex);
1395 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1396 (raidPtr->numFailures > 0)) {
1397 /* XXX 0 above shouldn't be constant!!! */
1398 /* some component other than this has failed.
1399 Let's not make things worse than they already
1400 are... */
1401 printf("raid%d: Unable to reconstruct to disk at:\n",
1402 raidPtr->raidid);
1403 printf("raid%d: Col: %d Too many failures.\n",
1404 raidPtr->raidid, column);
1405 rf_unlock_mutex2(raidPtr->mutex);
1406 return EINVAL;
1407 }
1408
1409 if (raidPtr->Disks[column].status == rf_ds_reconstructing) {
1410 printf("raid%d: Unable to reconstruct to disk at:\n",
1411 raidPtr->raidid);
1412 printf("raid%d: Col: %d "
1413 "Reconstruction already occurring!\n",
1414 raidPtr->raidid, column);
1415
1416 rf_unlock_mutex2(raidPtr->mutex);
1417 return EINVAL;
1418 }
1419
1420 if (raidPtr->Disks[column].status == rf_ds_spared) {
1421 rf_unlock_mutex2(raidPtr->mutex);
1422 return EINVAL;
1423 }
1424
1425 rf_unlock_mutex2(raidPtr->mutex);
1426
1427 struct rf_recon_req_internal *rrint;
1428 rrint = RF_Malloc(sizeof(*rrint));
1429 if (rrint == NULL)
1430 return ENOMEM;
1431
1432 rrint->col = column;
1433 rrint->raidPtr = raidPtr;
1434
1435 return RF_CREATE_THREAD(raidPtr->recon_thread,
1436 rf_ReconstructInPlaceThread, rrint, "raid_reconip");
1437 }
1438
1439 static int
1440 rf_check_recon_status(RF_Raid_t *raidPtr, int *data)
1441 {
1442 /*
1443 * This makes no sense on a RAID 0, or if we are not reconstructing
1444 * so tell the user it's done.
1445 */
1446 if (raidPtr->Layout.map->faultsTolerated == 0 ||
1447 raidPtr->status != rf_rs_reconstructing) {
1448 *data = 100;
1449 return 0;
1450 }
1451 if (raidPtr->reconControl->numRUsTotal == 0) {
1452 *data = 0;
1453 return 0;
1454 }
1455 *data = (raidPtr->reconControl->numRUsComplete * 100
1456 / raidPtr->reconControl->numRUsTotal);
1457 return 0;
1458 }
1459
1460 static int
1461 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1462 {
1463 int unit = raidunit(dev);
1464 int part, pmask;
1465 struct raid_softc *rs;
1466 struct dk_softc *dksc;
1467 RF_Config_t *k_cfg;
1468 RF_Raid_t *raidPtr;
1469 RF_AccTotals_t *totals;
1470 RF_SingleComponent_t component;
1471 RF_DeviceConfig_t *d_cfg, *ucfgp;
1472 int retcode = 0;
1473 int column;
1474 RF_ComponentLabel_t *clabel;
1475 RF_SingleComponent_t *sparePtr,*componentPtr;
1476 int d;
1477
1478 if ((rs = raidget(unit, false)) == NULL)
1479 return ENXIO;
1480
1481 dksc = &rs->sc_dksc;
1482 raidPtr = &rs->sc_r;
1483
1484 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1485 (int) DISKPART(dev), (int) unit, cmd));
1486
1487 /* Must be initialized for these... */
1488 if (rf_must_be_initialized(rs, cmd))
1489 return ENXIO;
1490
1491 switch (cmd) {
1492 /* configure the system */
1493 case RAIDFRAME_CONFIGURE:
1494 if ((retcode = rf_getConfiguration(rs, data, &k_cfg)) != 0)
1495 return retcode;
1496 return rf_construct(rs, k_cfg);
1497
1498 /* shutdown the system */
1499 case RAIDFRAME_SHUTDOWN:
1500
1501 part = DISKPART(dev);
1502 pmask = (1 << part);
1503
1504 if ((retcode = raidlock(rs)) != 0)
1505 return retcode;
1506
1507 if (DK_BUSY(dksc, pmask) ||
1508 raidPtr->recon_in_progress != 0 ||
1509 raidPtr->parity_rewrite_in_progress != 0 ||
1510 raidPtr->copyback_in_progress != 0)
1511 retcode = EBUSY;
1512 else {
1513 /* detach and free on close */
1514 rs->sc_flags |= RAIDF_SHUTDOWN;
1515 retcode = 0;
1516 }
1517
1518 raidunlock(rs);
1519
1520 return retcode;
1521 case RAIDFRAME_GET_COMPONENT_LABEL:
1522 return rf_get_component_label(raidPtr, data);
1523
1524 #if RF_DISABLED
1525 case RAIDFRAME_SET_COMPONENT_LABEL:
1526 return rf_set_component_label(raidPtr, data);
1527 #endif
1528
1529 case RAIDFRAME_INIT_LABELS:
1530 return rf_init_component_label(raidPtr, data);
1531
1532 case RAIDFRAME_SET_AUTOCONFIG:
1533 d = rf_set_autoconfig(raidPtr, *(int *) data);
1534 printf("raid%d: New autoconfig value is: %d\n",
1535 raidPtr->raidid, d);
1536 *(int *) data = d;
1537 return retcode;
1538
1539 case RAIDFRAME_SET_ROOT:
1540 d = rf_set_rootpartition(raidPtr, *(int *) data);
1541 printf("raid%d: New rootpartition value is: %d\n",
1542 raidPtr->raidid, d);
1543 *(int *) data = d;
1544 return retcode;
1545
1546 /* initialize all parity */
1547 case RAIDFRAME_REWRITEPARITY:
1548
1549 if (raidPtr->Layout.map->faultsTolerated == 0) {
1550 /* Parity for RAID 0 is trivially correct */
1551 raidPtr->parity_good = RF_RAID_CLEAN;
1552 return 0;
1553 }
1554
1555 if (raidPtr->parity_rewrite_in_progress == 1) {
1556 /* Re-write is already in progress! */
1557 return EINVAL;
1558 }
1559
1560 return RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1561 rf_RewriteParityThread, raidPtr,"raid_parity");
1562
1563 case RAIDFRAME_ADD_HOT_SPARE:
1564 sparePtr = (RF_SingleComponent_t *) data;
1565 memcpy(&component, sparePtr, sizeof(RF_SingleComponent_t));
1566 return rf_add_hot_spare(raidPtr, &component);
1567
1568 case RAIDFRAME_REMOVE_HOT_SPARE:
1569 return retcode;
1570
1571 case RAIDFRAME_DELETE_COMPONENT:
1572 componentPtr = (RF_SingleComponent_t *)data;
1573 memcpy(&component, componentPtr, sizeof(RF_SingleComponent_t));
1574 return rf_delete_component(raidPtr, &component);
1575
1576 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1577 componentPtr = (RF_SingleComponent_t *)data;
1578 memcpy(&component, componentPtr, sizeof(RF_SingleComponent_t));
1579 return rf_incorporate_hot_spare(raidPtr, &component);
1580
1581 case RAIDFRAME_REBUILD_IN_PLACE:
1582 return rf_rebuild_in_place(raidPtr, data);
1583
1584 case RAIDFRAME_GET_INFO:
1585 ucfgp = *(RF_DeviceConfig_t **)data;
1586 d_cfg = RF_Malloc(sizeof(*d_cfg));
1587 if (d_cfg == NULL)
1588 return ENOMEM;
1589 retcode = rf_get_info(raidPtr, d_cfg);
1590 if (retcode == 0) {
1591 retcode = copyout(d_cfg, ucfgp, sizeof(*d_cfg));
1592 }
1593 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1594 return retcode;
1595
1596 case RAIDFRAME_CHECK_PARITY:
1597 *(int *) data = raidPtr->parity_good;
1598 return 0;
1599
1600 case RAIDFRAME_PARITYMAP_STATUS:
1601 if (rf_paritymap_ineligible(raidPtr))
1602 return EINVAL;
1603 rf_paritymap_status(raidPtr->parity_map, data);
1604 return 0;
1605
1606 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1607 if (rf_paritymap_ineligible(raidPtr))
1608 return EINVAL;
1609 if (raidPtr->parity_map == NULL)
1610 return ENOENT; /* ??? */
1611 if (rf_paritymap_set_params(raidPtr->parity_map, data, 1) != 0)
1612 return EINVAL;
1613 return 0;
1614
1615 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1616 if (rf_paritymap_ineligible(raidPtr))
1617 return EINVAL;
1618 *(int *) data = rf_paritymap_get_disable(raidPtr);
1619 return 0;
1620
1621 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1622 if (rf_paritymap_ineligible(raidPtr))
1623 return EINVAL;
1624 rf_paritymap_set_disable(raidPtr, *(int *)data);
1625 /* XXX should errors be passed up? */
1626 return 0;
1627
1628 case RAIDFRAME_RESCAN:
1629 return rf_rescan();
1630
1631 case RAIDFRAME_RESET_ACCTOTALS:
1632 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1633 return 0;
1634
1635 case RAIDFRAME_GET_ACCTOTALS:
1636 totals = (RF_AccTotals_t *) data;
1637 *totals = raidPtr->acc_totals;
1638 return 0;
1639
1640 case RAIDFRAME_KEEP_ACCTOTALS:
1641 raidPtr->keep_acc_totals = *(int *)data;
1642 return 0;
1643
1644 case RAIDFRAME_GET_SIZE:
1645 *(int *) data = raidPtr->totalSectors;
1646 return 0;
1647
1648 case RAIDFRAME_FAIL_DISK:
1649 return rf_fail_disk(raidPtr, data);
1650
1651 /* invoke a copyback operation after recon on whatever disk
1652 * needs it, if any */
1653 case RAIDFRAME_COPYBACK:
1654
1655 if (raidPtr->Layout.map->faultsTolerated == 0) {
1656 /* This makes no sense on a RAID 0!! */
1657 return EINVAL;
1658 }
1659
1660 if (raidPtr->copyback_in_progress == 1) {
1661 /* Copyback is already in progress! */
1662 return EINVAL;
1663 }
1664
1665 return RF_CREATE_THREAD(raidPtr->copyback_thread,
1666 rf_CopybackThread, raidPtr, "raid_copyback");
1667
1668 /* return the percentage completion of reconstruction */
1669 case RAIDFRAME_CHECK_RECON_STATUS:
1670 return rf_check_recon_status(raidPtr, data);
1671
1672 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1673 rf_check_recon_status_ext(raidPtr, data);
1674 return 0;
1675
1676 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1677 if (raidPtr->Layout.map->faultsTolerated == 0) {
1678 /* This makes no sense on a RAID 0, so tell the
1679 user it's done. */
1680 *(int *) data = 100;
1681 return 0;
1682 }
1683 if (raidPtr->parity_rewrite_in_progress == 1) {
1684 *(int *) data = 100 *
1685 raidPtr->parity_rewrite_stripes_done /
1686 raidPtr->Layout.numStripe;
1687 } else {
1688 *(int *) data = 100;
1689 }
1690 return 0;
1691
1692 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1693 rf_check_parityrewrite_status_ext(raidPtr, data);
1694 return 0;
1695
1696 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1697 if (raidPtr->Layout.map->faultsTolerated == 0) {
1698 /* This makes no sense on a RAID 0 */
1699 *(int *) data = 100;
1700 return 0;
1701 }
1702 if (raidPtr->copyback_in_progress == 1) {
1703 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1704 raidPtr->Layout.numStripe;
1705 } else {
1706 *(int *) data = 100;
1707 }
1708 return 0;
1709
1710 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1711 rf_check_copyback_status_ext(raidPtr, data);
1712 return 0;
1713
1714 case RAIDFRAME_SET_LAST_UNIT:
1715 for (column = 0; column < raidPtr->numCol; column++)
1716 if (raidPtr->Disks[column].status != rf_ds_optimal)
1717 return EBUSY;
1718
1719 for (column = 0; column < raidPtr->numCol; column++) {
1720 clabel = raidget_component_label(raidPtr, column);
1721 clabel->last_unit = *(int *)data;
1722 raidflush_component_label(raidPtr, column);
1723 }
1724 rs->sc_cflags |= RAIDF_UNIT_CHANGED;
1725 return 0;
1726
1727 /* the sparetable daemon calls this to wait for the kernel to
1728 * need a spare table. this ioctl does not return until a
1729 * spare table is needed. XXX -- calling mpsleep here in the
1730 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1731 * -- I should either compute the spare table in the kernel,
1732 * or have a different -- XXX XXX -- interface (a different
1733 * character device) for delivering the table -- XXX */
1734 #if RF_DISABLED
1735 case RAIDFRAME_SPARET_WAIT:
1736 rf_lock_mutex2(rf_sparet_wait_mutex);
1737 while (!rf_sparet_wait_queue)
1738 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1739 RF_SparetWait_t *waitreq = rf_sparet_wait_queue;
1740 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1741 rf_unlock_mutex2(rf_sparet_wait_mutex);
1742
1743 /* structure assignment */
1744 *((RF_SparetWait_t *) data) = *waitreq;
1745
1746 RF_Free(waitreq, sizeof(*waitreq));
1747 return 0;
1748
1749 /* wakes up a process waiting on SPARET_WAIT and puts an error
1750 * code in it that will cause the dameon to exit */
1751 case RAIDFRAME_ABORT_SPARET_WAIT:
1752 waitreq = RF_Malloc(sizeof(*waitreq));
1753 waitreq->fcol = -1;
1754 rf_lock_mutex2(rf_sparet_wait_mutex);
1755 waitreq->next = rf_sparet_wait_queue;
1756 rf_sparet_wait_queue = waitreq;
1757 rf_broadcast_cond2(rf_sparet_wait_cv);
1758 rf_unlock_mutex2(rf_sparet_wait_mutex);
1759 return 0;
1760
1761 /* used by the spare table daemon to deliver a spare table
1762 * into the kernel */
1763 case RAIDFRAME_SEND_SPARET:
1764
1765 /* install the spare table */
1766 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1767
1768 /* respond to the requestor. the return status of the spare
1769 * table installation is passed in the "fcol" field */
1770 waitred = RF_Malloc(sizeof(*waitreq));
1771 waitreq->fcol = retcode;
1772 rf_lock_mutex2(rf_sparet_wait_mutex);
1773 waitreq->next = rf_sparet_resp_queue;
1774 rf_sparet_resp_queue = waitreq;
1775 rf_broadcast_cond2(rf_sparet_resp_cv);
1776 rf_unlock_mutex2(rf_sparet_wait_mutex);
1777
1778 return retcode;
1779 #endif
1780 default:
1781 /*
1782 * Don't bother trying to load compat modules
1783 * if it is not our ioctl. This is more efficient
1784 * and makes rump tests not depend on compat code
1785 */
1786 if (IOCGROUP(cmd) != 'r')
1787 break;
1788 #ifdef _LP64
1789 if ((l->l_proc->p_flag & PK_32) != 0) {
1790 module_autoload("compat_netbsd32_raid",
1791 MODULE_CLASS_EXEC);
1792 MODULE_HOOK_CALL(raidframe_netbsd32_ioctl_hook,
1793 (rs, cmd, data), enosys(), retcode);
1794 if (retcode != EPASSTHROUGH)
1795 return retcode;
1796 }
1797 #endif
1798 module_autoload("compat_raid_80", MODULE_CLASS_EXEC);
1799 MODULE_HOOK_CALL(raidframe_ioctl_80_hook,
1800 (rs, cmd, data), enosys(), retcode);
1801 if (retcode != EPASSTHROUGH)
1802 return retcode;
1803
1804 module_autoload("compat_raid_50", MODULE_CLASS_EXEC);
1805 MODULE_HOOK_CALL(raidframe_ioctl_50_hook,
1806 (rs, cmd, data), enosys(), retcode);
1807 if (retcode != EPASSTHROUGH)
1808 return retcode;
1809 break; /* fall through to the os-specific code below */
1810
1811 }
1812
1813 if (!raidPtr->valid)
1814 return EINVAL;
1815
1816 /*
1817 * Add support for "regular" device ioctls here.
1818 */
1819
1820 switch (cmd) {
1821 case DIOCGCACHE:
1822 retcode = rf_get_component_caches(raidPtr, (int *)data);
1823 break;
1824
1825 case DIOCCACHESYNC:
1826 retcode = rf_sync_component_caches(raidPtr, *(int *)data);
1827 break;
1828
1829 default:
1830 retcode = dk_ioctl(dksc, dev, cmd, data, flag, l);
1831 break;
1832 }
1833
1834 return retcode;
1835
1836 }
1837
1838
1839 /* raidinit -- complete the rest of the initialization for the
1840 RAIDframe device. */
1841
1842
1843 static void
1844 raidinit(struct raid_softc *rs)
1845 {
1846 cfdata_t cf;
1847 unsigned int unit;
1848 struct dk_softc *dksc = &rs->sc_dksc;
1849 RF_Raid_t *raidPtr = &rs->sc_r;
1850 device_t dev;
1851
1852 unit = raidPtr->raidid;
1853
1854 /* XXX doesn't check bounds. */
1855 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%u", unit);
1856
1857 /* attach the pseudo device */
1858 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1859 cf->cf_name = raid_cd.cd_name;
1860 cf->cf_atname = raid_cd.cd_name;
1861 cf->cf_unit = unit;
1862 cf->cf_fstate = FSTATE_STAR;
1863
1864 dev = config_attach_pseudo(cf);
1865 if (dev == NULL) {
1866 printf("raid%d: config_attach_pseudo failed\n",
1867 raidPtr->raidid);
1868 free(cf, M_RAIDFRAME);
1869 return;
1870 }
1871
1872 /* provide a backpointer to the real softc */
1873 raidsoftc(dev) = rs;
1874
1875 /* disk_attach actually creates space for the CPU disklabel, among
1876 * other things, so it's critical to call this *BEFORE* we try putzing
1877 * with disklabels. */
1878 dk_init(dksc, dev, DKTYPE_RAID);
1879 disk_init(&dksc->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1880
1881 /* XXX There may be a weird interaction here between this, and
1882 * protectedSectors, as used in RAIDframe. */
1883
1884 rs->sc_size = raidPtr->totalSectors;
1885
1886 /* Attach dk and disk subsystems */
1887 dk_attach(dksc);
1888 disk_attach(&dksc->sc_dkdev);
1889 rf_set_geometry(rs, raidPtr);
1890
1891 bufq_alloc(&dksc->sc_bufq, "fcfs", BUFQ_SORT_RAWBLOCK);
1892
1893 /* mark unit as usuable */
1894 rs->sc_flags |= RAIDF_INITED;
1895
1896 dkwedge_discover(&dksc->sc_dkdev);
1897 }
1898
1899 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1900 /* wake up the daemon & tell it to get us a spare table
1901 * XXX
1902 * the entries in the queues should be tagged with the raidPtr
1903 * so that in the extremely rare case that two recons happen at once,
1904 * we know for which device were requesting a spare table
1905 * XXX
1906 *
1907 * XXX This code is not currently used. GO
1908 */
1909 int
1910 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1911 {
1912 int retcode;
1913
1914 rf_lock_mutex2(rf_sparet_wait_mutex);
1915 req->next = rf_sparet_wait_queue;
1916 rf_sparet_wait_queue = req;
1917 rf_broadcast_cond2(rf_sparet_wait_cv);
1918
1919 /* mpsleep unlocks the mutex */
1920 while (!rf_sparet_resp_queue) {
1921 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
1922 }
1923 req = rf_sparet_resp_queue;
1924 rf_sparet_resp_queue = req->next;
1925 rf_unlock_mutex2(rf_sparet_wait_mutex);
1926
1927 retcode = req->fcol;
1928 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1929 * alloc'd */
1930 return retcode;
1931 }
1932 #endif
1933
1934 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1935 * bp & passes it down.
1936 * any calls originating in the kernel must use non-blocking I/O
1937 * do some extra sanity checking to return "appropriate" error values for
1938 * certain conditions (to make some standard utilities work)
1939 *
1940 * Formerly known as: rf_DoAccessKernel
1941 */
1942 void
1943 raidstart(RF_Raid_t *raidPtr)
1944 {
1945 struct raid_softc *rs;
1946 struct dk_softc *dksc;
1947
1948 rs = raidPtr->softc;
1949 dksc = &rs->sc_dksc;
1950 /* quick check to see if anything has died recently */
1951 rf_lock_mutex2(raidPtr->mutex);
1952 if (raidPtr->numNewFailures > 0) {
1953 rf_unlock_mutex2(raidPtr->mutex);
1954 rf_update_component_labels(raidPtr,
1955 RF_NORMAL_COMPONENT_UPDATE);
1956 rf_lock_mutex2(raidPtr->mutex);
1957 raidPtr->numNewFailures--;
1958 }
1959 rf_unlock_mutex2(raidPtr->mutex);
1960
1961 if ((rs->sc_flags & RAIDF_INITED) == 0) {
1962 printf("raid%d: raidstart not ready\n", raidPtr->raidid);
1963 return;
1964 }
1965
1966 dk_start(dksc, NULL);
1967 }
1968
1969 static int
1970 raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp)
1971 {
1972 RF_SectorCount_t num_blocks, pb, sum;
1973 RF_RaidAddr_t raid_addr;
1974 daddr_t blocknum;
1975 int rc;
1976
1977 rf_lock_mutex2(raidPtr->mutex);
1978 if (raidPtr->openings == 0) {
1979 rf_unlock_mutex2(raidPtr->mutex);
1980 return EAGAIN;
1981 }
1982 rf_unlock_mutex2(raidPtr->mutex);
1983
1984 blocknum = bp->b_rawblkno;
1985
1986 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1987 (int) blocknum));
1988
1989 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1990 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1991
1992 /* *THIS* is where we adjust what block we're going to...
1993 * but DO NOT TOUCH bp->b_blkno!!! */
1994 raid_addr = blocknum;
1995
1996 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1997 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1998 sum = raid_addr + num_blocks + pb;
1999 if (1 || rf_debugKernelAccess) {
2000 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
2001 (int) raid_addr, (int) sum, (int) num_blocks,
2002 (int) pb, (int) bp->b_resid));
2003 }
2004 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
2005 || (sum < num_blocks) || (sum < pb)) {
2006 rc = ENOSPC;
2007 goto done;
2008 }
2009 /*
2010 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
2011 */
2012
2013 if (bp->b_bcount & raidPtr->sectorMask) {
2014 rc = ENOSPC;
2015 goto done;
2016 }
2017 db1_printf(("Calling DoAccess..\n"));
2018
2019
2020 rf_lock_mutex2(raidPtr->mutex);
2021 raidPtr->openings--;
2022 rf_unlock_mutex2(raidPtr->mutex);
2023
2024 /* don't ever condition on bp->b_flags & B_WRITE.
2025 * always condition on B_READ instead */
2026
2027 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
2028 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
2029 raid_addr, num_blocks,
2030 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
2031
2032 done:
2033 return rc;
2034 }
2035
2036 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
2037
2038 int
2039 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
2040 {
2041 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2042 struct buf *bp;
2043
2044 req->queue = queue;
2045 bp = req->bp;
2046
2047 switch (req->type) {
2048 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2049 /* XXX need to do something extra here.. */
2050 /* I'm leaving this in, as I've never actually seen it used,
2051 * and I'd like folks to report it... GO */
2052 printf("%s: WAKEUP CALLED\n", __func__);
2053 queue->numOutstanding++;
2054
2055 bp->b_flags = 0;
2056 bp->b_private = req;
2057
2058 KernelWakeupFunc(bp);
2059 break;
2060
2061 case RF_IO_TYPE_READ:
2062 case RF_IO_TYPE_WRITE:
2063 #if RF_ACC_TRACE > 0
2064 if (req->tracerec) {
2065 RF_ETIMER_START(req->tracerec->timer);
2066 }
2067 #endif
2068 InitBP(bp, queue->rf_cinfo->ci_vp,
2069 op, queue->rf_cinfo->ci_dev,
2070 req->sectorOffset, req->numSector,
2071 req->buf, KernelWakeupFunc, (void *) req,
2072 queue->raidPtr->logBytesPerSector);
2073
2074 if (rf_debugKernelAccess) {
2075 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2076 (long) bp->b_blkno));
2077 }
2078 queue->numOutstanding++;
2079 queue->last_deq_sector = req->sectorOffset;
2080 /* acc wouldn't have been let in if there were any pending
2081 * reqs at any other priority */
2082 queue->curPriority = req->priority;
2083
2084 db1_printf(("Going for %c to unit %d col %d\n",
2085 req->type, queue->raidPtr->raidid,
2086 queue->col));
2087 db1_printf(("sector %d count %d (%d bytes) %d\n",
2088 (int) req->sectorOffset, (int) req->numSector,
2089 (int) (req->numSector <<
2090 queue->raidPtr->logBytesPerSector),
2091 (int) queue->raidPtr->logBytesPerSector));
2092
2093 /*
2094 * XXX: drop lock here since this can block at
2095 * least with backing SCSI devices. Retake it
2096 * to minimize fuss with calling interfaces.
2097 */
2098
2099 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2100 bdev_strategy(bp);
2101 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2102 break;
2103
2104 default:
2105 panic("bad req->type in rf_DispatchKernelIO");
2106 }
2107 db1_printf(("Exiting from DispatchKernelIO\n"));
2108
2109 return 0;
2110 }
2111 /* this is the callback function associated with a I/O invoked from
2112 kernel code.
2113 */
2114 static void
2115 KernelWakeupFunc(struct buf *bp)
2116 {
2117 RF_DiskQueueData_t *req = NULL;
2118 RF_DiskQueue_t *queue;
2119
2120 db1_printf(("recovering the request queue:\n"));
2121
2122 req = bp->b_private;
2123
2124 queue = (RF_DiskQueue_t *) req->queue;
2125
2126 rf_lock_mutex2(queue->raidPtr->iodone_lock);
2127
2128 #if RF_ACC_TRACE > 0
2129 if (req->tracerec) {
2130 RF_ETIMER_STOP(req->tracerec->timer);
2131 RF_ETIMER_EVAL(req->tracerec->timer);
2132 rf_lock_mutex2(rf_tracing_mutex);
2133 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2134 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2135 req->tracerec->num_phys_ios++;
2136 rf_unlock_mutex2(rf_tracing_mutex);
2137 }
2138 #endif
2139
2140 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2141 * ballistic, and mark the component as hosed... */
2142
2143 if (bp->b_error != 0) {
2144 /* Mark the disk as dead */
2145 /* but only mark it once... */
2146 /* and only if it wouldn't leave this RAID set
2147 completely broken */
2148 if (((queue->raidPtr->Disks[queue->col].status ==
2149 rf_ds_optimal) ||
2150 (queue->raidPtr->Disks[queue->col].status ==
2151 rf_ds_used_spare)) &&
2152 (queue->raidPtr->numFailures <
2153 queue->raidPtr->Layout.map->faultsTolerated)) {
2154 printf("raid%d: IO Error (%d). Marking %s as failed.\n",
2155 queue->raidPtr->raidid,
2156 bp->b_error,
2157 queue->raidPtr->Disks[queue->col].devname);
2158 queue->raidPtr->Disks[queue->col].status =
2159 rf_ds_failed;
2160 queue->raidPtr->status = rf_rs_degraded;
2161 queue->raidPtr->numFailures++;
2162 queue->raidPtr->numNewFailures++;
2163 } else { /* Disk is already dead... */
2164 /* printf("Disk already marked as dead!\n"); */
2165 }
2166
2167 }
2168
2169 /* Fill in the error value */
2170 req->error = bp->b_error;
2171
2172 /* Drop this one on the "finished" queue... */
2173 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2174
2175 /* Let the raidio thread know there is work to be done. */
2176 rf_signal_cond2(queue->raidPtr->iodone_cv);
2177
2178 rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2179 }
2180
2181
2182 /*
2183 * initialize a buf structure for doing an I/O in the kernel.
2184 */
2185 static void
2186 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2187 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2188 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector)
2189 {
2190 bp->b_flags = rw_flag | (bp->b_flags & rf_b_pass);
2191 bp->b_oflags = 0;
2192 bp->b_cflags = 0;
2193 bp->b_bcount = numSect << logBytesPerSector;
2194 bp->b_bufsize = bp->b_bcount;
2195 bp->b_error = 0;
2196 bp->b_dev = dev;
2197 bp->b_data = bf;
2198 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2199 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2200 if (bp->b_bcount == 0) {
2201 panic("bp->b_bcount is zero in InitBP!!");
2202 }
2203 bp->b_iodone = cbFunc;
2204 bp->b_private = cbArg;
2205 }
2206
2207 /*
2208 * Wait interruptibly for an exclusive lock.
2209 *
2210 * XXX
2211 * Several drivers do this; it should be abstracted and made MP-safe.
2212 * (Hmm... where have we seen this warning before :-> GO )
2213 */
2214 static int
2215 raidlock(struct raid_softc *rs)
2216 {
2217 int error;
2218
2219 error = 0;
2220 mutex_enter(&rs->sc_mutex);
2221 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2222 rs->sc_flags |= RAIDF_WANTED;
2223 error = cv_wait_sig(&rs->sc_cv, &rs->sc_mutex);
2224 if (error != 0)
2225 goto done;
2226 }
2227 rs->sc_flags |= RAIDF_LOCKED;
2228 done:
2229 mutex_exit(&rs->sc_mutex);
2230 return error;
2231 }
2232 /*
2233 * Unlock and wake up any waiters.
2234 */
2235 static void
2236 raidunlock(struct raid_softc *rs)
2237 {
2238
2239 mutex_enter(&rs->sc_mutex);
2240 rs->sc_flags &= ~RAIDF_LOCKED;
2241 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2242 rs->sc_flags &= ~RAIDF_WANTED;
2243 cv_broadcast(&rs->sc_cv);
2244 }
2245 mutex_exit(&rs->sc_mutex);
2246 }
2247
2248
2249 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2250 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2251 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2252
2253 static daddr_t
2254 rf_component_info_offset(void)
2255 {
2256
2257 return RF_COMPONENT_INFO_OFFSET;
2258 }
2259
2260 static daddr_t
2261 rf_component_info_size(unsigned secsize)
2262 {
2263 daddr_t info_size;
2264
2265 KASSERT(secsize);
2266 if (secsize > RF_COMPONENT_INFO_SIZE)
2267 info_size = secsize;
2268 else
2269 info_size = RF_COMPONENT_INFO_SIZE;
2270
2271 return info_size;
2272 }
2273
2274 static daddr_t
2275 rf_parity_map_offset(RF_Raid_t *raidPtr)
2276 {
2277 daddr_t map_offset;
2278
2279 KASSERT(raidPtr->bytesPerSector);
2280 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2281 map_offset = raidPtr->bytesPerSector;
2282 else
2283 map_offset = RF_COMPONENT_INFO_SIZE;
2284 map_offset += rf_component_info_offset();
2285
2286 return map_offset;
2287 }
2288
2289 static daddr_t
2290 rf_parity_map_size(RF_Raid_t *raidPtr)
2291 {
2292 daddr_t map_size;
2293
2294 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2295 map_size = raidPtr->bytesPerSector;
2296 else
2297 map_size = RF_PARITY_MAP_SIZE;
2298
2299 return map_size;
2300 }
2301
2302 int
2303 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2304 {
2305 RF_ComponentLabel_t *clabel;
2306
2307 clabel = raidget_component_label(raidPtr, col);
2308 clabel->clean = RF_RAID_CLEAN;
2309 raidflush_component_label(raidPtr, col);
2310 return(0);
2311 }
2312
2313
2314 int
2315 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2316 {
2317 RF_ComponentLabel_t *clabel;
2318
2319 clabel = raidget_component_label(raidPtr, col);
2320 clabel->clean = RF_RAID_DIRTY;
2321 raidflush_component_label(raidPtr, col);
2322 return(0);
2323 }
2324
2325 int
2326 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2327 {
2328 KASSERT(raidPtr->bytesPerSector);
2329
2330 return raidread_component_label(raidPtr->bytesPerSector,
2331 raidPtr->Disks[col].dev,
2332 raidPtr->raid_cinfo[col].ci_vp,
2333 &raidPtr->raid_cinfo[col].ci_label);
2334 }
2335
2336 RF_ComponentLabel_t *
2337 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2338 {
2339 return &raidPtr->raid_cinfo[col].ci_label;
2340 }
2341
2342 int
2343 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2344 {
2345 RF_ComponentLabel_t *label;
2346
2347 label = &raidPtr->raid_cinfo[col].ci_label;
2348 label->mod_counter = raidPtr->mod_counter;
2349 #ifndef RF_NO_PARITY_MAP
2350 label->parity_map_modcount = label->mod_counter;
2351 #endif
2352 return raidwrite_component_label(raidPtr->bytesPerSector,
2353 raidPtr->Disks[col].dev,
2354 raidPtr->raid_cinfo[col].ci_vp, label);
2355 }
2356
2357 /*
2358 * Swap the label endianness.
2359 *
2360 * Everything in the component label is 4-byte-swapped except the version,
2361 * which is kept in the byte-swapped version at all times, and indicates
2362 * for the writer that a swap is necessary.
2363 *
2364 * For reads it is expected that out_label == clabel, but writes expect
2365 * separate labels so only the re-swapped label is written out to disk,
2366 * leaving the swapped-except-version internally.
2367 *
2368 * Only support swapping label version 2.
2369 */
2370 static void
2371 rf_swap_label(RF_ComponentLabel_t *clabel, RF_ComponentLabel_t *out_label)
2372 {
2373 int *in, *out, *in_last;
2374
2375 KASSERT(clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION));
2376
2377 /* Don't swap the label, but do copy it. */
2378 out_label->version = clabel->version;
2379
2380 in = &clabel->serial_number;
2381 in_last = &clabel->future_use2[42];
2382 out = &out_label->serial_number;
2383
2384 for (; in < in_last; in++, out++)
2385 *out = bswap32(*in);
2386 }
2387
2388 static int
2389 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2390 RF_ComponentLabel_t *clabel)
2391 {
2392 int error;
2393
2394 error = raidread_component_area(dev, b_vp, clabel,
2395 sizeof(RF_ComponentLabel_t),
2396 rf_component_info_offset(),
2397 rf_component_info_size(secsize));
2398
2399 if (error == 0 &&
2400 clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) {
2401 rf_swap_label(clabel, clabel);
2402 }
2403
2404 return error;
2405 }
2406
2407 /* ARGSUSED */
2408 static int
2409 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2410 size_t msize, daddr_t offset, daddr_t dsize)
2411 {
2412 struct buf *bp;
2413 int error;
2414
2415 /* XXX should probably ensure that we don't try to do this if
2416 someone has changed rf_protected_sectors. */
2417
2418 if (b_vp == NULL) {
2419 /* For whatever reason, this component is not valid.
2420 Don't try to read a component label from it. */
2421 return(EINVAL);
2422 }
2423
2424 /* get a block of the appropriate size... */
2425 bp = geteblk((int)dsize);
2426 bp->b_dev = dev;
2427
2428 /* get our ducks in a row for the read */
2429 bp->b_blkno = offset / DEV_BSIZE;
2430 bp->b_bcount = dsize;
2431 bp->b_flags |= B_READ;
2432 bp->b_resid = dsize;
2433
2434 bdev_strategy(bp);
2435 error = biowait(bp);
2436
2437 if (!error) {
2438 memcpy(data, bp->b_data, msize);
2439 }
2440
2441 brelse(bp, 0);
2442 return(error);
2443 }
2444
2445 static int
2446 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2447 RF_ComponentLabel_t *clabel)
2448 {
2449 RF_ComponentLabel_t *clabel_write = clabel;
2450 RF_ComponentLabel_t lclabel;
2451 int error;
2452
2453 if (clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) {
2454 clabel_write = &lclabel;
2455 rf_swap_label(clabel, clabel_write);
2456 }
2457 error = raidwrite_component_area(dev, b_vp, clabel_write,
2458 sizeof(RF_ComponentLabel_t),
2459 rf_component_info_offset(),
2460 rf_component_info_size(secsize), 0);
2461
2462 return error;
2463 }
2464
2465 /* ARGSUSED */
2466 static int
2467 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2468 size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2469 {
2470 struct buf *bp;
2471 int error;
2472
2473 /* get a block of the appropriate size... */
2474 bp = geteblk((int)dsize);
2475 bp->b_dev = dev;
2476
2477 /* get our ducks in a row for the write */
2478 bp->b_blkno = offset / DEV_BSIZE;
2479 bp->b_bcount = dsize;
2480 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2481 bp->b_resid = dsize;
2482
2483 memset(bp->b_data, 0, dsize);
2484 memcpy(bp->b_data, data, msize);
2485
2486 bdev_strategy(bp);
2487 if (asyncp)
2488 return 0;
2489 error = biowait(bp);
2490 brelse(bp, 0);
2491 if (error) {
2492 #if 1
2493 printf("Failed to write RAID component info!\n");
2494 #endif
2495 }
2496
2497 return(error);
2498 }
2499
2500 void
2501 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2502 {
2503 int c;
2504
2505 for (c = 0; c < raidPtr->numCol; c++) {
2506 /* Skip dead disks. */
2507 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2508 continue;
2509 /* XXXjld: what if an error occurs here? */
2510 raidwrite_component_area(raidPtr->Disks[c].dev,
2511 raidPtr->raid_cinfo[c].ci_vp, map,
2512 RF_PARITYMAP_NBYTE,
2513 rf_parity_map_offset(raidPtr),
2514 rf_parity_map_size(raidPtr), 0);
2515 }
2516 }
2517
2518 void
2519 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2520 {
2521 struct rf_paritymap_ondisk tmp;
2522 int c,first;
2523
2524 first=1;
2525 for (c = 0; c < raidPtr->numCol; c++) {
2526 /* Skip dead disks. */
2527 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2528 continue;
2529 raidread_component_area(raidPtr->Disks[c].dev,
2530 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2531 RF_PARITYMAP_NBYTE,
2532 rf_parity_map_offset(raidPtr),
2533 rf_parity_map_size(raidPtr));
2534 if (first) {
2535 memcpy(map, &tmp, sizeof(*map));
2536 first = 0;
2537 } else {
2538 rf_paritymap_merge(map, &tmp);
2539 }
2540 }
2541 }
2542
2543 void
2544 rf_markalldirty(RF_Raid_t *raidPtr)
2545 {
2546 RF_ComponentLabel_t *clabel;
2547 int sparecol;
2548 int c;
2549 int j;
2550 int scol = -1;
2551
2552 raidPtr->mod_counter++;
2553 for (c = 0; c < raidPtr->numCol; c++) {
2554 /* we don't want to touch (at all) a disk that has
2555 failed */
2556 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2557 clabel = raidget_component_label(raidPtr, c);
2558 if (clabel->status == rf_ds_spared) {
2559 /* XXX do something special...
2560 but whatever you do, don't
2561 try to access it!! */
2562 } else {
2563 raidmarkdirty(raidPtr, c);
2564 }
2565 }
2566 }
2567
2568 for( c = 0; c < raidPtr->numSpare ; c++) {
2569 sparecol = raidPtr->numCol + c;
2570 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2571 /*
2572
2573 we claim this disk is "optimal" if it's
2574 rf_ds_used_spare, as that means it should be
2575 directly substitutable for the disk it replaced.
2576 We note that too...
2577
2578 */
2579
2580 for(j=0;j<raidPtr->numCol;j++) {
2581 if (raidPtr->Disks[j].spareCol == sparecol) {
2582 scol = j;
2583 break;
2584 }
2585 }
2586
2587 clabel = raidget_component_label(raidPtr, sparecol);
2588 /* make sure status is noted */
2589
2590 raid_init_component_label(raidPtr, clabel);
2591
2592 clabel->row = 0;
2593 clabel->column = scol;
2594 /* Note: we *don't* change status from rf_ds_used_spare
2595 to rf_ds_optimal */
2596 /* clabel.status = rf_ds_optimal; */
2597
2598 raidmarkdirty(raidPtr, sparecol);
2599 }
2600 }
2601 }
2602
2603
2604 void
2605 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2606 {
2607 RF_ComponentLabel_t *clabel;
2608 int sparecol;
2609 int c;
2610 int j;
2611 int scol;
2612 struct raid_softc *rs = raidPtr->softc;
2613
2614 scol = -1;
2615
2616 /* XXX should do extra checks to make sure things really are clean,
2617 rather than blindly setting the clean bit... */
2618
2619 raidPtr->mod_counter++;
2620
2621 for (c = 0; c < raidPtr->numCol; c++) {
2622 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2623 clabel = raidget_component_label(raidPtr, c);
2624 /* make sure status is noted */
2625 clabel->status = rf_ds_optimal;
2626
2627 /* note what unit we are configured as */
2628 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2629 clabel->last_unit = raidPtr->raidid;
2630
2631 raidflush_component_label(raidPtr, c);
2632 if (final == RF_FINAL_COMPONENT_UPDATE) {
2633 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2634 raidmarkclean(raidPtr, c);
2635 }
2636 }
2637 }
2638 /* else we don't touch it.. */
2639 }
2640
2641 for( c = 0; c < raidPtr->numSpare ; c++) {
2642 sparecol = raidPtr->numCol + c;
2643 /* Need to ensure that the reconstruct actually completed! */
2644 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2645 /*
2646
2647 we claim this disk is "optimal" if it's
2648 rf_ds_used_spare, as that means it should be
2649 directly substitutable for the disk it replaced.
2650 We note that too...
2651
2652 */
2653
2654 for(j=0;j<raidPtr->numCol;j++) {
2655 if (raidPtr->Disks[j].spareCol == sparecol) {
2656 scol = j;
2657 break;
2658 }
2659 }
2660
2661 /* XXX shouldn't *really* need this... */
2662 clabel = raidget_component_label(raidPtr, sparecol);
2663 /* make sure status is noted */
2664
2665 raid_init_component_label(raidPtr, clabel);
2666
2667 clabel->column = scol;
2668 clabel->status = rf_ds_optimal;
2669 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2670 clabel->last_unit = raidPtr->raidid;
2671
2672 raidflush_component_label(raidPtr, sparecol);
2673 if (final == RF_FINAL_COMPONENT_UPDATE) {
2674 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2675 raidmarkclean(raidPtr, sparecol);
2676 }
2677 }
2678 }
2679 }
2680 }
2681
2682 void
2683 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2684 {
2685
2686 if (vp != NULL) {
2687 if (auto_configured == 1) {
2688 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2689 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2690 vput(vp);
2691
2692 } else {
2693 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2694 }
2695 }
2696 }
2697
2698
2699 void
2700 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2701 {
2702 int r,c;
2703 struct vnode *vp;
2704 int acd;
2705
2706
2707 /* We take this opportunity to close the vnodes like we should.. */
2708
2709 for (c = 0; c < raidPtr->numCol; c++) {
2710 vp = raidPtr->raid_cinfo[c].ci_vp;
2711 acd = raidPtr->Disks[c].auto_configured;
2712 rf_close_component(raidPtr, vp, acd);
2713 raidPtr->raid_cinfo[c].ci_vp = NULL;
2714 raidPtr->Disks[c].auto_configured = 0;
2715 }
2716
2717 for (r = 0; r < raidPtr->numSpare; r++) {
2718 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2719 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2720 rf_close_component(raidPtr, vp, acd);
2721 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2722 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2723 }
2724 }
2725
2726
2727 static void
2728 rf_ReconThread(struct rf_recon_req_internal *req)
2729 {
2730 int s;
2731 RF_Raid_t *raidPtr;
2732
2733 s = splbio();
2734 raidPtr = (RF_Raid_t *) req->raidPtr;
2735 raidPtr->recon_in_progress = 1;
2736
2737 if (req->flags & RF_FDFLAGS_RECON_FORCE) {
2738 raidPtr->forceRecon = 1;
2739 }
2740
2741 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2742 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2743
2744 if (req->flags & RF_FDFLAGS_RECON_FORCE) {
2745 raidPtr->forceRecon = 0;
2746 }
2747
2748 RF_Free(req, sizeof(*req));
2749
2750 raidPtr->recon_in_progress = 0;
2751 splx(s);
2752
2753 /* That's all... */
2754 kthread_exit(0); /* does not return */
2755 }
2756
2757 static void
2758 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2759 {
2760 int retcode;
2761 int s;
2762
2763 raidPtr->parity_rewrite_stripes_done = 0;
2764 raidPtr->parity_rewrite_in_progress = 1;
2765 s = splbio();
2766 retcode = rf_RewriteParity(raidPtr);
2767 splx(s);
2768 if (retcode) {
2769 printf("raid%d: Error re-writing parity (%d)!\n",
2770 raidPtr->raidid, retcode);
2771 } else {
2772 /* set the clean bit! If we shutdown correctly,
2773 the clean bit on each component label will get
2774 set */
2775 raidPtr->parity_good = RF_RAID_CLEAN;
2776 }
2777 raidPtr->parity_rewrite_in_progress = 0;
2778
2779 /* Anyone waiting for us to stop? If so, inform them... */
2780 if (raidPtr->waitShutdown) {
2781 rf_lock_mutex2(raidPtr->rad_lock);
2782 cv_broadcast(&raidPtr->parity_rewrite_cv);
2783 rf_unlock_mutex2(raidPtr->rad_lock);
2784 }
2785
2786 /* That's all... */
2787 kthread_exit(0); /* does not return */
2788 }
2789
2790
2791 static void
2792 rf_CopybackThread(RF_Raid_t *raidPtr)
2793 {
2794 int s;
2795
2796 raidPtr->copyback_in_progress = 1;
2797 s = splbio();
2798 rf_CopybackReconstructedData(raidPtr);
2799 splx(s);
2800 raidPtr->copyback_in_progress = 0;
2801
2802 /* That's all... */
2803 kthread_exit(0); /* does not return */
2804 }
2805
2806
2807 static void
2808 rf_ReconstructInPlaceThread(struct rf_recon_req_internal *req)
2809 {
2810 int s;
2811 RF_Raid_t *raidPtr;
2812
2813 s = splbio();
2814 raidPtr = req->raidPtr;
2815 raidPtr->recon_in_progress = 1;
2816
2817 if (req->flags & RF_FDFLAGS_RECON_FORCE) {
2818 raidPtr->forceRecon = 1;
2819 }
2820
2821 rf_ReconstructInPlace(raidPtr, req->col);
2822
2823 if (req->flags & RF_FDFLAGS_RECON_FORCE) {
2824 raidPtr->forceRecon = 0;
2825 }
2826
2827 RF_Free(req, sizeof(*req));
2828 raidPtr->recon_in_progress = 0;
2829 splx(s);
2830
2831 /* That's all... */
2832 kthread_exit(0); /* does not return */
2833 }
2834
2835 static RF_AutoConfig_t *
2836 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
2837 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
2838 unsigned secsize)
2839 {
2840 int good_one = 0;
2841 RF_ComponentLabel_t *clabel;
2842 RF_AutoConfig_t *ac;
2843
2844 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_WAITOK);
2845
2846 if (!raidread_component_label(secsize, dev, vp, clabel)) {
2847 /* Got the label. Does it look reasonable? */
2848 if (rf_reasonable_label(clabel, numsecs) &&
2849 (rf_component_label_partitionsize(clabel) <= size)) {
2850 #ifdef DEBUG
2851 printf("Component on: %s: %llu\n",
2852 cname, (unsigned long long)size);
2853 rf_print_component_label(clabel);
2854 #endif
2855 /* if it's reasonable, add it, else ignore it. */
2856 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
2857 M_WAITOK);
2858 strlcpy(ac->devname, cname, sizeof(ac->devname));
2859 ac->dev = dev;
2860 ac->vp = vp;
2861 ac->clabel = clabel;
2862 ac->next = ac_list;
2863 ac_list = ac;
2864 good_one = 1;
2865 }
2866 }
2867 if (!good_one) {
2868 /* cleanup */
2869 free(clabel, M_RAIDFRAME);
2870 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2871 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2872 vput(vp);
2873 }
2874 return ac_list;
2875 }
2876
2877 static RF_AutoConfig_t *
2878 rf_find_raid_components(void)
2879 {
2880 struct vnode *vp;
2881 struct disklabel label;
2882 device_t dv;
2883 deviter_t di;
2884 dev_t dev;
2885 int bmajor, bminor, wedge, rf_part_found;
2886 int error;
2887 int i;
2888 RF_AutoConfig_t *ac_list;
2889 uint64_t numsecs;
2890 unsigned secsize;
2891 int dowedges;
2892
2893 /* initialize the AutoConfig list */
2894 ac_list = NULL;
2895
2896 /*
2897 * we begin by trolling through *all* the devices on the system *twice*
2898 * first we scan for wedges, second for other devices. This avoids
2899 * using a raw partition instead of a wedge that covers the whole disk
2900 */
2901
2902 for (dowedges=1; dowedges>=0; --dowedges) {
2903 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
2904 dv = deviter_next(&di)) {
2905
2906 /* we are only interested in disks */
2907 if (device_class(dv) != DV_DISK)
2908 continue;
2909
2910 /* we don't care about floppies */
2911 if (device_is_a(dv, "fd")) {
2912 continue;
2913 }
2914
2915 /* we don't care about CDs. */
2916 if (device_is_a(dv, "cd")) {
2917 continue;
2918 }
2919
2920 /* we don't care about md. */
2921 if (device_is_a(dv, "md")) {
2922 continue;
2923 }
2924
2925 /* hdfd is the Atari/Hades floppy driver */
2926 if (device_is_a(dv, "hdfd")) {
2927 continue;
2928 }
2929
2930 /* fdisa is the Atari/Milan floppy driver */
2931 if (device_is_a(dv, "fdisa")) {
2932 continue;
2933 }
2934
2935 /* we don't care about spiflash */
2936 if (device_is_a(dv, "spiflash")) {
2937 continue;
2938 }
2939
2940 /* are we in the wedges pass ? */
2941 wedge = device_is_a(dv, "dk");
2942 if (wedge != dowedges) {
2943 continue;
2944 }
2945
2946 /* need to find the device_name_to_block_device_major stuff */
2947 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
2948
2949 rf_part_found = 0; /*No raid partition as yet*/
2950
2951 /* get a vnode for the raw partition of this disk */
2952 bminor = minor(device_unit(dv));
2953 dev = wedge ? makedev(bmajor, bminor) :
2954 MAKEDISKDEV(bmajor, bminor, RAW_PART);
2955 if (bdevvp(dev, &vp))
2956 panic("RAID can't alloc vnode");
2957
2958 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2959 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
2960
2961 if (error) {
2962 /* "Who cares." Continue looking
2963 for something that exists*/
2964 vput(vp);
2965 continue;
2966 }
2967
2968 error = getdisksize(vp, &numsecs, &secsize);
2969 if (error) {
2970 /*
2971 * Pseudo devices like vnd and cgd can be
2972 * opened but may still need some configuration.
2973 * Ignore these quietly.
2974 */
2975 if (error != ENXIO)
2976 printf("RAIDframe: can't get disk size"
2977 " for dev %s (%d)\n",
2978 device_xname(dv), error);
2979 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2980 vput(vp);
2981 continue;
2982 }
2983 if (wedge) {
2984 struct dkwedge_info dkw;
2985 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
2986 NOCRED);
2987 if (error) {
2988 printf("RAIDframe: can't get wedge info for "
2989 "dev %s (%d)\n", device_xname(dv), error);
2990 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2991 vput(vp);
2992 continue;
2993 }
2994
2995 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
2996 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2997 vput(vp);
2998 continue;
2999 }
3000
3001 VOP_UNLOCK(vp);
3002 ac_list = rf_get_component(ac_list, dev, vp,
3003 device_xname(dv), dkw.dkw_size, numsecs, secsize);
3004 rf_part_found = 1; /*There is a raid component on this disk*/
3005 continue;
3006 }
3007
3008 /* Ok, the disk exists. Go get the disklabel. */
3009 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
3010 if (error) {
3011 /*
3012 * XXX can't happen - open() would
3013 * have errored out (or faked up one)
3014 */
3015 if (error != ENOTTY)
3016 printf("RAIDframe: can't get label for dev "
3017 "%s (%d)\n", device_xname(dv), error);
3018 }
3019
3020 /* don't need this any more. We'll allocate it again
3021 a little later if we really do... */
3022 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3023 vput(vp);
3024
3025 if (error)
3026 continue;
3027
3028 rf_part_found = 0; /*No raid partitions yet*/
3029 for (i = 0; i < label.d_npartitions; i++) {
3030 char cname[sizeof(ac_list->devname)];
3031
3032 /* We only support partitions marked as RAID */
3033 if (label.d_partitions[i].p_fstype != FS_RAID)
3034 continue;
3035
3036 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
3037 if (bdevvp(dev, &vp))
3038 panic("RAID can't alloc vnode");
3039
3040 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3041 error = VOP_OPEN(vp, FREAD, NOCRED);
3042 if (error) {
3043 /* Not quite a 'whatever'. In
3044 * this situation we know
3045 * there is a FS_RAID
3046 * partition, but we can't
3047 * open it. The most likely
3048 * reason is that the
3049 * partition is already in
3050 * use by another RAID set.
3051 * So note that we've already
3052 * found a partition on this
3053 * disk so we don't attempt
3054 * to use the raw disk later. */
3055 rf_part_found = 1;
3056 vput(vp);
3057 continue;
3058 }
3059 VOP_UNLOCK(vp);
3060 snprintf(cname, sizeof(cname), "%s%c",
3061 device_xname(dv), 'a' + i);
3062 ac_list = rf_get_component(ac_list, dev, vp, cname,
3063 label.d_partitions[i].p_size, numsecs, secsize);
3064 rf_part_found = 1; /*There is at least one raid partition on this disk*/
3065 }
3066
3067 /*
3068 *If there is no raid component on this disk, either in a
3069 *disklabel or inside a wedge, check the raw partition as well,
3070 *as it is possible to configure raid components on raw disk
3071 *devices.
3072 */
3073
3074 if (!rf_part_found) {
3075 char cname[sizeof(ac_list->devname)];
3076
3077 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
3078 if (bdevvp(dev, &vp))
3079 panic("RAID can't alloc vnode");
3080
3081 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3082
3083 error = VOP_OPEN(vp, FREAD, NOCRED);
3084 if (error) {
3085 /* Whatever... */
3086 vput(vp);
3087 continue;
3088 }
3089 VOP_UNLOCK(vp);
3090 snprintf(cname, sizeof(cname), "%s%c",
3091 device_xname(dv), 'a' + RAW_PART);
3092 ac_list = rf_get_component(ac_list, dev, vp, cname,
3093 label.d_partitions[RAW_PART].p_size, numsecs, secsize);
3094 }
3095 }
3096 deviter_release(&di);
3097 }
3098 return ac_list;
3099 }
3100
3101 int
3102 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3103 {
3104
3105 if ((clabel->version==RF_COMPONENT_LABEL_VERSION_1 ||
3106 clabel->version==RF_COMPONENT_LABEL_VERSION ||
3107 clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) &&
3108 (clabel->clean == RF_RAID_CLEAN ||
3109 clabel->clean == RF_RAID_DIRTY) &&
3110 clabel->row >=0 &&
3111 clabel->column >= 0 &&
3112 clabel->num_rows > 0 &&
3113 clabel->num_columns > 0 &&
3114 clabel->row < clabel->num_rows &&
3115 clabel->column < clabel->num_columns &&
3116 clabel->blockSize > 0 &&
3117 /*
3118 * numBlocksHi may contain garbage, but it is ok since
3119 * the type is unsigned. If it is really garbage,
3120 * rf_fix_old_label_size() will fix it.
3121 */
3122 rf_component_label_numblocks(clabel) > 0) {
3123 /*
3124 * label looks reasonable enough...
3125 * let's make sure it has no old garbage.
3126 */
3127 if (numsecs)
3128 rf_fix_old_label_size(clabel, numsecs);
3129 return(1);
3130 }
3131 return(0);
3132 }
3133
3134
3135 /*
3136 * For reasons yet unknown, some old component labels have garbage in
3137 * the newer numBlocksHi region, and this causes lossage. Since those
3138 * disks will also have numsecs set to less than 32 bits of sectors,
3139 * we can determine when this corruption has occurred, and fix it.
3140 *
3141 * The exact same problem, with the same unknown reason, happens to
3142 * the partitionSizeHi member as well.
3143 */
3144 static void
3145 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3146 {
3147
3148 if (numsecs < ((uint64_t)1 << 32)) {
3149 if (clabel->numBlocksHi) {
3150 printf("WARNING: total sectors < 32 bits, yet "
3151 "numBlocksHi set\n"
3152 "WARNING: resetting numBlocksHi to zero.\n");
3153 clabel->numBlocksHi = 0;
3154 }
3155
3156 if (clabel->partitionSizeHi) {
3157 printf("WARNING: total sectors < 32 bits, yet "
3158 "partitionSizeHi set\n"
3159 "WARNING: resetting partitionSizeHi to zero.\n");
3160 clabel->partitionSizeHi = 0;
3161 }
3162 }
3163 }
3164
3165
3166 #ifdef DEBUG
3167 void
3168 rf_print_component_label(RF_ComponentLabel_t *clabel)
3169 {
3170 uint64_t numBlocks;
3171 static const char *rp[] = {
3172 "No", "Force", "Soft", "*invalid*"
3173 };
3174
3175
3176 numBlocks = rf_component_label_numblocks(clabel);
3177
3178 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3179 clabel->row, clabel->column,
3180 clabel->num_rows, clabel->num_columns);
3181 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3182 clabel->version, clabel->serial_number,
3183 clabel->mod_counter);
3184 printf(" Clean: %s Status: %d\n",
3185 clabel->clean ? "Yes" : "No", clabel->status);
3186 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3187 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3188 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3189 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3190 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3191 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]);
3192 printf(" Last configured as: raid%d\n", clabel->last_unit);
3193 #if 0
3194 printf(" Config order: %d\n", clabel->config_order);
3195 #endif
3196
3197 }
3198 #endif
3199
3200 static RF_ConfigSet_t *
3201 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3202 {
3203 RF_AutoConfig_t *ac;
3204 RF_ConfigSet_t *config_sets;
3205 RF_ConfigSet_t *cset;
3206 RF_AutoConfig_t *ac_next;
3207
3208
3209 config_sets = NULL;
3210
3211 /* Go through the AutoConfig list, and figure out which components
3212 belong to what sets. */
3213 ac = ac_list;
3214 while(ac!=NULL) {
3215 /* we're going to putz with ac->next, so save it here
3216 for use at the end of the loop */
3217 ac_next = ac->next;
3218
3219 if (config_sets == NULL) {
3220 /* will need at least this one... */
3221 config_sets = malloc(sizeof(RF_ConfigSet_t),
3222 M_RAIDFRAME, M_WAITOK);
3223 /* this one is easy :) */
3224 config_sets->ac = ac;
3225 config_sets->next = NULL;
3226 config_sets->rootable = 0;
3227 ac->next = NULL;
3228 } else {
3229 /* which set does this component fit into? */
3230 cset = config_sets;
3231 while(cset!=NULL) {
3232 if (rf_does_it_fit(cset, ac)) {
3233 /* looks like it matches... */
3234 ac->next = cset->ac;
3235 cset->ac = ac;
3236 break;
3237 }
3238 cset = cset->next;
3239 }
3240 if (cset==NULL) {
3241 /* didn't find a match above... new set..*/
3242 cset = malloc(sizeof(RF_ConfigSet_t),
3243 M_RAIDFRAME, M_WAITOK);
3244 cset->ac = ac;
3245 ac->next = NULL;
3246 cset->next = config_sets;
3247 cset->rootable = 0;
3248 config_sets = cset;
3249 }
3250 }
3251 ac = ac_next;
3252 }
3253
3254
3255 return(config_sets);
3256 }
3257
3258 static int
3259 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3260 {
3261 RF_ComponentLabel_t *clabel1, *clabel2;
3262
3263 /* If this one matches the *first* one in the set, that's good
3264 enough, since the other members of the set would have been
3265 through here too... */
3266 /* note that we are not checking partitionSize here..
3267
3268 Note that we are also not checking the mod_counters here.
3269 If everything else matches except the mod_counter, that's
3270 good enough for this test. We will deal with the mod_counters
3271 a little later in the autoconfiguration process.
3272
3273 (clabel1->mod_counter == clabel2->mod_counter) &&
3274
3275 The reason we don't check for this is that failed disks
3276 will have lower modification counts. If those disks are
3277 not added to the set they used to belong to, then they will
3278 form their own set, which may result in 2 different sets,
3279 for example, competing to be configured at raid0, and
3280 perhaps competing to be the root filesystem set. If the
3281 wrong ones get configured, or both attempt to become /,
3282 weird behaviour and or serious lossage will occur. Thus we
3283 need to bring them into the fold here, and kick them out at
3284 a later point.
3285
3286 */
3287
3288 clabel1 = cset->ac->clabel;
3289 clabel2 = ac->clabel;
3290 if ((clabel1->version == clabel2->version) &&
3291 (clabel1->serial_number == clabel2->serial_number) &&
3292 (clabel1->num_rows == clabel2->num_rows) &&
3293 (clabel1->num_columns == clabel2->num_columns) &&
3294 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3295 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3296 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3297 (clabel1->parityConfig == clabel2->parityConfig) &&
3298 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3299 (clabel1->blockSize == clabel2->blockSize) &&
3300 rf_component_label_numblocks(clabel1) ==
3301 rf_component_label_numblocks(clabel2) &&
3302 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3303 (clabel1->root_partition == clabel2->root_partition) &&
3304 (clabel1->last_unit == clabel2->last_unit) &&
3305 (clabel1->config_order == clabel2->config_order)) {
3306 /* if it get's here, it almost *has* to be a match */
3307 } else {
3308 /* it's not consistent with somebody in the set..
3309 punt */
3310 return(0);
3311 }
3312 /* all was fine.. it must fit... */
3313 return(1);
3314 }
3315
3316 static int
3317 rf_have_enough_components(RF_ConfigSet_t *cset)
3318 {
3319 RF_AutoConfig_t *ac;
3320 RF_AutoConfig_t *auto_config;
3321 RF_ComponentLabel_t *clabel;
3322 int c;
3323 int num_cols;
3324 int num_missing;
3325 int mod_counter;
3326 int mod_counter_found;
3327 int even_pair_failed;
3328 char parity_type;
3329
3330
3331 /* check to see that we have enough 'live' components
3332 of this set. If so, we can configure it if necessary */
3333
3334 num_cols = cset->ac->clabel->num_columns;
3335 parity_type = cset->ac->clabel->parityConfig;
3336
3337 /* XXX Check for duplicate components!?!?!? */
3338
3339 /* Determine what the mod_counter is supposed to be for this set. */
3340
3341 mod_counter_found = 0;
3342 mod_counter = 0;
3343 ac = cset->ac;
3344 while(ac!=NULL) {
3345 if (mod_counter_found==0) {
3346 mod_counter = ac->clabel->mod_counter;
3347 mod_counter_found = 1;
3348 } else {
3349 if (ac->clabel->mod_counter > mod_counter) {
3350 mod_counter = ac->clabel->mod_counter;
3351 }
3352 }
3353 ac = ac->next;
3354 }
3355
3356 num_missing = 0;
3357 auto_config = cset->ac;
3358
3359 even_pair_failed = 0;
3360 for(c=0; c<num_cols; c++) {
3361 ac = auto_config;
3362 while(ac!=NULL) {
3363 if ((ac->clabel->column == c) &&
3364 (ac->clabel->mod_counter == mod_counter)) {
3365 /* it's this one... */
3366 #ifdef DEBUG
3367 printf("Found: %s at %d\n",
3368 ac->devname,c);
3369 #endif
3370 break;
3371 }
3372 ac=ac->next;
3373 }
3374 if (ac==NULL) {
3375 /* Didn't find one here! */
3376 /* special case for RAID 1, especially
3377 where there are more than 2
3378 components (where RAIDframe treats
3379 things a little differently :( ) */
3380 if (parity_type == '1') {
3381 if (c%2 == 0) { /* even component */
3382 even_pair_failed = 1;
3383 } else { /* odd component. If
3384 we're failed, and
3385 so is the even
3386 component, it's
3387 "Good Night, Charlie" */
3388 if (even_pair_failed == 1) {
3389 return(0);
3390 }
3391 }
3392 } else {
3393 /* normal accounting */
3394 num_missing++;
3395 }
3396 }
3397 if ((parity_type == '1') && (c%2 == 1)) {
3398 /* Just did an even component, and we didn't
3399 bail.. reset the even_pair_failed flag,
3400 and go on to the next component.... */
3401 even_pair_failed = 0;
3402 }
3403 }
3404
3405 clabel = cset->ac->clabel;
3406
3407 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3408 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3409 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3410 /* XXX this needs to be made *much* more general */
3411 /* Too many failures */
3412 return(0);
3413 }
3414 /* otherwise, all is well, and we've got enough to take a kick
3415 at autoconfiguring this set */
3416 return(1);
3417 }
3418
3419 static void
3420 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3421 RF_Raid_t *raidPtr)
3422 {
3423 RF_ComponentLabel_t *clabel;
3424 int i;
3425
3426 clabel = ac->clabel;
3427
3428 /* 1. Fill in the common stuff */
3429 config->numCol = clabel->num_columns;
3430 config->numSpare = 0; /* XXX should this be set here? */
3431 config->sectPerSU = clabel->sectPerSU;
3432 config->SUsPerPU = clabel->SUsPerPU;
3433 config->SUsPerRU = clabel->SUsPerRU;
3434 config->parityConfig = clabel->parityConfig;
3435 /* XXX... */
3436 strcpy(config->diskQueueType,"fifo");
3437 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3438 config->layoutSpecificSize = 0; /* XXX ?? */
3439
3440 while(ac!=NULL) {
3441 /* row/col values will be in range due to the checks
3442 in reasonable_label() */
3443 strcpy(config->devnames[0][ac->clabel->column],
3444 ac->devname);
3445 ac = ac->next;
3446 }
3447
3448 for(i=0;i<RF_MAXDBGV;i++) {
3449 config->debugVars[i][0] = 0;
3450 }
3451 }
3452
3453 static int
3454 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3455 {
3456 RF_ComponentLabel_t *clabel;
3457 int column;
3458 int sparecol;
3459
3460 raidPtr->autoconfigure = new_value;
3461
3462 for(column=0; column<raidPtr->numCol; column++) {
3463 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3464 clabel = raidget_component_label(raidPtr, column);
3465 clabel->autoconfigure = new_value;
3466 raidflush_component_label(raidPtr, column);
3467 }
3468 }
3469 for(column = 0; column < raidPtr->numSpare ; column++) {
3470 sparecol = raidPtr->numCol + column;
3471 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3472 clabel = raidget_component_label(raidPtr, sparecol);
3473 clabel->autoconfigure = new_value;
3474 raidflush_component_label(raidPtr, sparecol);
3475 }
3476 }
3477 return(new_value);
3478 }
3479
3480 static int
3481 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3482 {
3483 RF_ComponentLabel_t *clabel;
3484 int column;
3485 int sparecol;
3486
3487 raidPtr->root_partition = new_value;
3488 for(column=0; column<raidPtr->numCol; column++) {
3489 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3490 clabel = raidget_component_label(raidPtr, column);
3491 clabel->root_partition = new_value;
3492 raidflush_component_label(raidPtr, column);
3493 }
3494 }
3495 for(column = 0; column < raidPtr->numSpare ; column++) {
3496 sparecol = raidPtr->numCol + column;
3497 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3498 clabel = raidget_component_label(raidPtr, sparecol);
3499 clabel->root_partition = new_value;
3500 raidflush_component_label(raidPtr, sparecol);
3501 }
3502 }
3503 return(new_value);
3504 }
3505
3506 static void
3507 rf_release_all_vps(RF_ConfigSet_t *cset)
3508 {
3509 RF_AutoConfig_t *ac;
3510
3511 ac = cset->ac;
3512 while(ac!=NULL) {
3513 /* Close the vp, and give it back */
3514 if (ac->vp) {
3515 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3516 VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
3517 vput(ac->vp);
3518 ac->vp = NULL;
3519 }
3520 ac = ac->next;
3521 }
3522 }
3523
3524
3525 static void
3526 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3527 {
3528 RF_AutoConfig_t *ac;
3529 RF_AutoConfig_t *next_ac;
3530
3531 ac = cset->ac;
3532 while(ac!=NULL) {
3533 next_ac = ac->next;
3534 /* nuke the label */
3535 free(ac->clabel, M_RAIDFRAME);
3536 /* cleanup the config structure */
3537 free(ac, M_RAIDFRAME);
3538 /* "next.." */
3539 ac = next_ac;
3540 }
3541 /* and, finally, nuke the config set */
3542 free(cset, M_RAIDFRAME);
3543 }
3544
3545
3546 void
3547 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3548 {
3549 /* avoid over-writing byteswapped version. */
3550 if (clabel->version != bswap32(RF_COMPONENT_LABEL_VERSION))
3551 clabel->version = RF_COMPONENT_LABEL_VERSION;
3552 clabel->serial_number = raidPtr->serial_number;
3553 clabel->mod_counter = raidPtr->mod_counter;
3554
3555 clabel->num_rows = 1;
3556 clabel->num_columns = raidPtr->numCol;
3557 clabel->clean = RF_RAID_DIRTY; /* not clean */
3558 clabel->status = rf_ds_optimal; /* "It's good!" */
3559
3560 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3561 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3562 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3563
3564 clabel->blockSize = raidPtr->bytesPerSector;
3565 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3566
3567 /* XXX not portable */
3568 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3569 clabel->maxOutstanding = raidPtr->maxOutstanding;
3570 clabel->autoconfigure = raidPtr->autoconfigure;
3571 clabel->root_partition = raidPtr->root_partition;
3572 clabel->last_unit = raidPtr->raidid;
3573 clabel->config_order = raidPtr->config_order;
3574
3575 #ifndef RF_NO_PARITY_MAP
3576 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3577 #endif
3578 }
3579
3580 static struct raid_softc *
3581 rf_auto_config_set(RF_ConfigSet_t *cset)
3582 {
3583 RF_Raid_t *raidPtr;
3584 RF_Config_t *config;
3585 int raidID;
3586 struct raid_softc *sc;
3587
3588 #ifdef DEBUG
3589 printf("RAID autoconfigure\n");
3590 #endif
3591
3592 /* 1. Create a config structure */
3593 config = malloc(sizeof(*config), M_RAIDFRAME, M_WAITOK|M_ZERO);
3594
3595 /*
3596 2. Figure out what RAID ID this one is supposed to live at
3597 See if we can get the same RAID dev that it was configured
3598 on last time..
3599 */
3600
3601 raidID = cset->ac->clabel->last_unit;
3602 for (sc = raidget(raidID, false); sc && sc->sc_r.valid != 0;
3603 sc = raidget(++raidID, false))
3604 continue;
3605 #ifdef DEBUG
3606 printf("Configuring raid%d:\n",raidID);
3607 #endif
3608
3609 if (sc == NULL)
3610 sc = raidget(raidID, true);
3611 raidPtr = &sc->sc_r;
3612
3613 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3614 raidPtr->softc = sc;
3615 raidPtr->raidid = raidID;
3616 raidPtr->openings = RAIDOUTSTANDING;
3617
3618 /* 3. Build the configuration structure */
3619 rf_create_configuration(cset->ac, config, raidPtr);
3620
3621 /* 4. Do the configuration */
3622 if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3623 raidinit(sc);
3624
3625 rf_markalldirty(raidPtr);
3626 raidPtr->autoconfigure = 1; /* XXX do this here? */
3627 switch (cset->ac->clabel->root_partition) {
3628 case 1: /* Force Root */
3629 case 2: /* Soft Root: root when boot partition part of raid */
3630 /*
3631 * everything configured just fine. Make a note
3632 * that this set is eligible to be root,
3633 * or forced to be root
3634 */
3635 cset->rootable = cset->ac->clabel->root_partition;
3636 /* XXX do this here? */
3637 raidPtr->root_partition = cset->rootable;
3638 break;
3639 default:
3640 break;
3641 }
3642 } else {
3643 raidput(sc);
3644 sc = NULL;
3645 }
3646
3647 /* 5. Cleanup */
3648 free(config, M_RAIDFRAME);
3649 return sc;
3650 }
3651
3652 void
3653 rf_pool_init(RF_Raid_t *raidPtr, char *w_chan, struct pool *p, size_t size, const char *pool_name,
3654 size_t xmin, size_t xmax)
3655 {
3656
3657 /* Format: raid%d_foo */
3658 snprintf(w_chan, RF_MAX_POOLNAMELEN, "raid%d_%s", raidPtr->raidid, pool_name);
3659
3660 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3661 pool_sethiwat(p, xmax);
3662 pool_prime(p, xmin);
3663 }
3664
3665
3666 /*
3667 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buffer queue
3668 * to see if there is IO pending and if that IO could possibly be done
3669 * for a given RAID set. Returns 0 if IO is waiting and can be done, 1
3670 * otherwise.
3671 *
3672 */
3673 int
3674 rf_buf_queue_check(RF_Raid_t *raidPtr)
3675 {
3676 struct raid_softc *rs;
3677 struct dk_softc *dksc;
3678
3679 rs = raidPtr->softc;
3680 dksc = &rs->sc_dksc;
3681
3682 if ((rs->sc_flags & RAIDF_INITED) == 0)
3683 return 1;
3684
3685 if (dk_strategy_pending(dksc) && raidPtr->openings > 0) {
3686 /* there is work to do */
3687 return 0;
3688 }
3689 /* default is nothing to do */
3690 return 1;
3691 }
3692
3693 int
3694 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3695 {
3696 uint64_t numsecs;
3697 unsigned secsize;
3698 int error;
3699
3700 error = getdisksize(vp, &numsecs, &secsize);
3701 if (error == 0) {
3702 diskPtr->blockSize = secsize;
3703 diskPtr->numBlocks = numsecs - rf_protectedSectors;
3704 diskPtr->partitionSize = numsecs;
3705 return 0;
3706 }
3707 return error;
3708 }
3709
3710 static int
3711 raid_match(device_t self, cfdata_t cfdata, void *aux)
3712 {
3713 return 1;
3714 }
3715
3716 static void
3717 raid_attach(device_t parent, device_t self, void *aux)
3718 {
3719 }
3720
3721
3722 static int
3723 raid_detach(device_t self, int flags)
3724 {
3725 int error;
3726 struct raid_softc *rs = raidsoftc(self);
3727
3728 if (rs == NULL)
3729 return ENXIO;
3730
3731 if ((error = raidlock(rs)) != 0)
3732 return error;
3733
3734 error = raid_detach_unlocked(rs);
3735
3736 raidunlock(rs);
3737
3738 /* XXX raid can be referenced here */
3739
3740 if (error)
3741 return error;
3742
3743 /* Free the softc */
3744 raidput(rs);
3745
3746 return 0;
3747 }
3748
3749 static void
3750 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
3751 {
3752 struct dk_softc *dksc = &rs->sc_dksc;
3753 struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
3754
3755 memset(dg, 0, sizeof(*dg));
3756
3757 dg->dg_secperunit = raidPtr->totalSectors;
3758 dg->dg_secsize = raidPtr->bytesPerSector;
3759 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
3760 dg->dg_ntracks = 4 * raidPtr->numCol;
3761
3762 disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, NULL);
3763 }
3764
3765 /*
3766 * Get cache info for all the components (including spares).
3767 * Returns intersection of all the cache flags of all disks, or first
3768 * error if any encountered.
3769 * XXXfua feature flags can change as spares are added - lock down somehow
3770 */
3771 static int
3772 rf_get_component_caches(RF_Raid_t *raidPtr, int *data)
3773 {
3774 int c;
3775 int error;
3776 int dkwhole = 0, dkpart;
3777
3778 for (c = 0; c < raidPtr->numCol + raidPtr->numSpare; c++) {
3779 /*
3780 * Check any non-dead disk, even when currently being
3781 * reconstructed.
3782 */
3783 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)
3784 || raidPtr->Disks[c].status == rf_ds_reconstructing) {
3785 error = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp,
3786 DIOCGCACHE, &dkpart, FREAD, NOCRED);
3787 if (error) {
3788 if (error != ENODEV) {
3789 printf("raid%d: get cache for component %s failed\n",
3790 raidPtr->raidid,
3791 raidPtr->Disks[c].devname);
3792 }
3793
3794 return error;
3795 }
3796
3797 if (c == 0)
3798 dkwhole = dkpart;
3799 else
3800 dkwhole = DKCACHE_COMBINE(dkwhole, dkpart);
3801 }
3802 }
3803
3804 *data = dkwhole;
3805
3806 return 0;
3807 }
3808
3809 /*
3810 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3811 * We end up returning whatever error was returned by the first cache flush
3812 * that fails.
3813 */
3814
3815 static int
3816 rf_sync_component_cache(RF_Raid_t *raidPtr, int c, int force)
3817 {
3818 int e = 0;
3819 for (int i = 0; i < 5; i++) {
3820 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3821 &force, FWRITE, NOCRED);
3822 if (!e || e == ENODEV)
3823 return e;
3824 printf("raid%d: cache flush[%d] to component %s failed (%d)\n",
3825 raidPtr->raidid, i, raidPtr->Disks[c].devname, e);
3826 }
3827 return e;
3828 }
3829
3830 int
3831 rf_sync_component_caches(RF_Raid_t *raidPtr, int force)
3832 {
3833 int c, error;
3834
3835 error = 0;
3836 for (c = 0; c < raidPtr->numCol; c++) {
3837 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3838 int e = rf_sync_component_cache(raidPtr, c, force);
3839 if (e && !error)
3840 error = e;
3841 }
3842 }
3843
3844 for (c = 0; c < raidPtr->numSpare ; c++) {
3845 int sparecol = raidPtr->numCol + c;
3846 /* Need to ensure that the reconstruct actually completed! */
3847 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3848 int e = rf_sync_component_cache(raidPtr, sparecol,
3849 force);
3850 if (e && !error)
3851 error = e;
3852 }
3853 }
3854 return error;
3855 }
3856
3857 /* Fill in info with the current status */
3858 void
3859 rf_check_recon_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3860 {
3861
3862 if (raidPtr->status != rf_rs_reconstructing) {
3863 info->total = 100;
3864 info->completed = 100;
3865 } else {
3866 info->total = raidPtr->reconControl->numRUsTotal;
3867 info->completed = raidPtr->reconControl->numRUsComplete;
3868 }
3869 info->remaining = info->total - info->completed;
3870 }
3871
3872 /* Fill in info with the current status */
3873 void
3874 rf_check_parityrewrite_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3875 {
3876
3877 if (raidPtr->parity_rewrite_in_progress == 1) {
3878 info->total = raidPtr->Layout.numStripe;
3879 info->completed = raidPtr->parity_rewrite_stripes_done;
3880 } else {
3881 info->completed = 100;
3882 info->total = 100;
3883 }
3884 info->remaining = info->total - info->completed;
3885 }
3886
3887 /* Fill in info with the current status */
3888 void
3889 rf_check_copyback_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3890 {
3891
3892 if (raidPtr->copyback_in_progress == 1) {
3893 info->total = raidPtr->Layout.numStripe;
3894 info->completed = raidPtr->copyback_stripes_done;
3895 info->remaining = info->total - info->completed;
3896 } else {
3897 info->remaining = 0;
3898 info->completed = 100;
3899 info->total = 100;
3900 }
3901 }
3902
3903 /* Fill in config with the current info */
3904 int
3905 rf_get_info(RF_Raid_t *raidPtr, RF_DeviceConfig_t *config)
3906 {
3907 int d, i, j;
3908
3909 if (!raidPtr->valid)
3910 return ENODEV;
3911 config->cols = raidPtr->numCol;
3912 config->ndevs = raidPtr->numCol;
3913 if (config->ndevs >= RF_MAX_DISKS)
3914 return ENOMEM;
3915 config->nspares = raidPtr->numSpare;
3916 if (config->nspares >= RF_MAX_DISKS)
3917 return ENOMEM;
3918 config->maxqdepth = raidPtr->maxQueueDepth;
3919 d = 0;
3920 for (j = 0; j < config->cols; j++) {
3921 config->devs[d] = raidPtr->Disks[j];
3922 d++;
3923 }
3924 for (j = config->cols, i = 0; i < config->nspares; i++, j++) {
3925 config->spares[i] = raidPtr->Disks[j];
3926 if (config->spares[i].status == rf_ds_rebuilding_spare) {
3927 /* XXX: raidctl(8) expects to see this as a used spare */
3928 config->spares[i].status = rf_ds_used_spare;
3929 }
3930 }
3931 return 0;
3932 }
3933
3934 int
3935 rf_get_component_label(RF_Raid_t *raidPtr, void *data)
3936 {
3937 RF_ComponentLabel_t *clabel = (RF_ComponentLabel_t *)data;
3938 RF_ComponentLabel_t *raid_clabel;
3939 int column = clabel->column;
3940
3941 if ((column < 0) || (column >= raidPtr->numCol + raidPtr->numSpare))
3942 return EINVAL;
3943 raid_clabel = raidget_component_label(raidPtr, column);
3944 memcpy(clabel, raid_clabel, sizeof *clabel);
3945 /* Fix-up for userland. */
3946 if (clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION))
3947 clabel->version = RF_COMPONENT_LABEL_VERSION;
3948
3949 return 0;
3950 }
3951
3952 /*
3953 * Module interface
3954 */
3955
3956 MODULE(MODULE_CLASS_DRIVER, raid, "dk_subr,bufq_fcfs");
3957
3958 #ifdef _MODULE
3959 CFDRIVER_DECL(raid, DV_DISK, NULL);
3960 #endif
3961
3962 static int raid_modcmd(modcmd_t, void *);
3963 static int raid_modcmd_init(void);
3964 static int raid_modcmd_fini(void);
3965
3966 static int
3967 raid_modcmd(modcmd_t cmd, void *data)
3968 {
3969 int error;
3970
3971 error = 0;
3972 switch (cmd) {
3973 case MODULE_CMD_INIT:
3974 error = raid_modcmd_init();
3975 break;
3976 case MODULE_CMD_FINI:
3977 error = raid_modcmd_fini();
3978 break;
3979 default:
3980 error = ENOTTY;
3981 break;
3982 }
3983 return error;
3984 }
3985
3986 static int
3987 raid_modcmd_init(void)
3988 {
3989 int error;
3990 int bmajor, cmajor;
3991
3992 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
3993 mutex_enter(&raid_lock);
3994 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
3995 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
3996 rf_init_cond2(rf_sparet_wait_cv, "sparetw");
3997 rf_init_cond2(rf_sparet_resp_cv, "rfgst");
3998
3999 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
4000 #endif
4001
4002 bmajor = cmajor = -1;
4003 error = devsw_attach("raid", &raid_bdevsw, &bmajor,
4004 &raid_cdevsw, &cmajor);
4005 if (error != 0 && error != EEXIST) {
4006 aprint_error("%s: devsw_attach failed %d\n", __func__, error);
4007 mutex_exit(&raid_lock);
4008 return error;
4009 }
4010 #ifdef _MODULE
4011 error = config_cfdriver_attach(&raid_cd);
4012 if (error != 0) {
4013 aprint_error("%s: config_cfdriver_attach failed %d\n",
4014 __func__, error);
4015 devsw_detach(&raid_bdevsw, &raid_cdevsw);
4016 mutex_exit(&raid_lock);
4017 return error;
4018 }
4019 #endif
4020 error = config_cfattach_attach(raid_cd.cd_name, &raid_ca);
4021 if (error != 0) {
4022 aprint_error("%s: config_cfattach_attach failed %d\n",
4023 __func__, error);
4024 #ifdef _MODULE
4025 config_cfdriver_detach(&raid_cd);
4026 #endif
4027 devsw_detach(&raid_bdevsw, &raid_cdevsw);
4028 mutex_exit(&raid_lock);
4029 return error;
4030 }
4031
4032 raidautoconfigdone = false;
4033
4034 mutex_exit(&raid_lock);
4035
4036 if (error == 0) {
4037 if (rf_BootRaidframe(true) == 0)
4038 aprint_verbose("Kernelized RAIDframe activated\n");
4039 else
4040 panic("Serious error activating RAID!!");
4041 }
4042
4043 /*
4044 * Register a finalizer which will be used to auto-config RAID
4045 * sets once all real hardware devices have been found.
4046 */
4047 error = config_finalize_register(NULL, rf_autoconfig);
4048 if (error != 0) {
4049 aprint_error("WARNING: unable to register RAIDframe "
4050 "finalizer\n");
4051 error = 0;
4052 }
4053
4054 return error;
4055 }
4056
4057 static int
4058 raid_modcmd_fini(void)
4059 {
4060 int error;
4061
4062 mutex_enter(&raid_lock);
4063
4064 /* Don't allow unload if raid device(s) exist. */
4065 if (!LIST_EMPTY(&raids)) {
4066 mutex_exit(&raid_lock);
4067 return EBUSY;
4068 }
4069
4070 error = config_cfattach_detach(raid_cd.cd_name, &raid_ca);
4071 if (error != 0) {
4072 aprint_error("%s: cannot detach cfattach\n",__func__);
4073 mutex_exit(&raid_lock);
4074 return error;
4075 }
4076 #ifdef _MODULE
4077 error = config_cfdriver_detach(&raid_cd);
4078 if (error != 0) {
4079 aprint_error("%s: cannot detach cfdriver\n",__func__);
4080 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
4081 mutex_exit(&raid_lock);
4082 return error;
4083 }
4084 #endif
4085 error = devsw_detach(&raid_bdevsw, &raid_cdevsw);
4086 if (error != 0) {
4087 aprint_error("%s: cannot detach devsw\n",__func__);
4088 #ifdef _MODULE
4089 config_cfdriver_attach(&raid_cd);
4090 #endif
4091 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
4092 mutex_exit(&raid_lock);
4093 return error;
4094 }
4095 rf_BootRaidframe(false);
4096 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
4097 rf_destroy_mutex2(rf_sparet_wait_mutex);
4098 rf_destroy_cond2(rf_sparet_wait_cv);
4099 rf_destroy_cond2(rf_sparet_resp_cv);
4100 #endif
4101 mutex_exit(&raid_lock);
4102 mutex_destroy(&raid_lock);
4103
4104 return error;
4105 }
4106