rf_netbsdkintf.c revision 1.411 1 /* $NetBSD: rf_netbsdkintf.c,v 1.411 2023/03/30 11:02:15 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: cd.c 1.6 90/11/28$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
89 * School of Computer Science
90 * Carnegie Mellon University
91 * Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97 /***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.411 2023/03/30 11:02:15 riastradh Exp $");
105
106 #ifdef _KERNEL_OPT
107 #include "opt_raid_autoconfig.h"
108 #include "opt_compat_netbsd32.h"
109 #endif
110
111 #include <sys/param.h>
112 #include <sys/errno.h>
113 #include <sys/pool.h>
114 #include <sys/proc.h>
115 #include <sys/queue.h>
116 #include <sys/disk.h>
117 #include <sys/device.h>
118 #include <sys/stat.h>
119 #include <sys/ioctl.h>
120 #include <sys/fcntl.h>
121 #include <sys/systm.h>
122 #include <sys/vnode.h>
123 #include <sys/disklabel.h>
124 #include <sys/conf.h>
125 #include <sys/buf.h>
126 #include <sys/bufq.h>
127 #include <sys/reboot.h>
128 #include <sys/kauth.h>
129 #include <sys/module.h>
130 #include <sys/compat_stub.h>
131
132 #include <prop/proplib.h>
133
134 #include <dev/raidframe/raidframevar.h>
135 #include <dev/raidframe/raidframeio.h>
136 #include <dev/raidframe/rf_paritymap.h>
137
138 #include "rf_raid.h"
139 #include "rf_copyback.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_desc.h"
143 #include "rf_diskqueue.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_kintf.h"
147 #include "rf_options.h"
148 #include "rf_driver.h"
149 #include "rf_parityscan.h"
150 #include "rf_threadstuff.h"
151
152 #include "ioconf.h"
153
154 #ifdef DEBUG
155 int rf_kdebug_level = 0;
156 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
157 #else /* DEBUG */
158 #define db1_printf(a) { }
159 #endif /* DEBUG */
160
161 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
162 static rf_declare_mutex2(rf_sparet_wait_mutex);
163 static rf_declare_cond2(rf_sparet_wait_cv);
164 static rf_declare_cond2(rf_sparet_resp_cv);
165
166 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
167 * spare table */
168 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
169 * installation process */
170 #endif
171
172 const int rf_b_pass = (B_PHYS|B_RAW|B_MEDIA_FLAGS);
173
174 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
175
176 /* prototypes */
177 static void KernelWakeupFunc(struct buf *);
178 static void InitBP(struct buf *, struct vnode *, unsigned,
179 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
180 void *, int);
181 static void raidinit(struct raid_softc *);
182 static int raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp);
183 static int rf_get_component_caches(RF_Raid_t *raidPtr, int *);
184
185 static int raid_match(device_t, cfdata_t, void *);
186 static void raid_attach(device_t, device_t, void *);
187 static int raid_detach(device_t, int);
188
189 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
190 daddr_t, daddr_t);
191 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
192 daddr_t, daddr_t);
193
194 static int raidwrite_component_label(unsigned,
195 dev_t, struct vnode *, RF_ComponentLabel_t *);
196 static int raidread_component_label(unsigned,
197 dev_t, struct vnode *, RF_ComponentLabel_t *);
198
199 static int raid_diskstart(device_t, struct buf *bp);
200 static int raid_dumpblocks(device_t, void *, daddr_t, int);
201 static int raid_lastclose(device_t);
202
203 static dev_type_open(raidopen);
204 static dev_type_close(raidclose);
205 static dev_type_read(raidread);
206 static dev_type_write(raidwrite);
207 static dev_type_ioctl(raidioctl);
208 static dev_type_strategy(raidstrategy);
209 static dev_type_dump(raiddump);
210 static dev_type_size(raidsize);
211
212 const struct bdevsw raid_bdevsw = {
213 .d_open = raidopen,
214 .d_close = raidclose,
215 .d_strategy = raidstrategy,
216 .d_ioctl = raidioctl,
217 .d_dump = raiddump,
218 .d_psize = raidsize,
219 .d_discard = nodiscard,
220 .d_flag = D_DISK
221 };
222
223 const struct cdevsw raid_cdevsw = {
224 .d_open = raidopen,
225 .d_close = raidclose,
226 .d_read = raidread,
227 .d_write = raidwrite,
228 .d_ioctl = raidioctl,
229 .d_stop = nostop,
230 .d_tty = notty,
231 .d_poll = nopoll,
232 .d_mmap = nommap,
233 .d_kqfilter = nokqfilter,
234 .d_discard = nodiscard,
235 .d_flag = D_DISK
236 };
237
238 static struct dkdriver rf_dkdriver = {
239 .d_open = raidopen,
240 .d_close = raidclose,
241 .d_strategy = raidstrategy,
242 .d_diskstart = raid_diskstart,
243 .d_dumpblocks = raid_dumpblocks,
244 .d_lastclose = raid_lastclose,
245 .d_minphys = minphys
246 };
247
248 #define raidunit(x) DISKUNIT(x)
249 #define raidsoftc(dev) (((struct raid_softc *)device_private(dev))->sc_r.softc)
250
251 extern struct cfdriver raid_cd;
252 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
253 raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
254 DVF_DETACH_SHUTDOWN);
255
256 /* Internal representation of a rf_recon_req */
257 struct rf_recon_req_internal {
258 RF_RowCol_t col;
259 RF_ReconReqFlags_t flags;
260 void *raidPtr;
261 };
262
263 /*
264 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
265 * Be aware that large numbers can allow the driver to consume a lot of
266 * kernel memory, especially on writes, and in degraded mode reads.
267 *
268 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
269 * a single 64K write will typically require 64K for the old data,
270 * 64K for the old parity, and 64K for the new parity, for a total
271 * of 192K (if the parity buffer is not re-used immediately).
272 * Even it if is used immediately, that's still 128K, which when multiplied
273 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
274 *
275 * Now in degraded mode, for example, a 64K read on the above setup may
276 * require data reconstruction, which will require *all* of the 4 remaining
277 * disks to participate -- 4 * 32K/disk == 128K again.
278 */
279
280 #ifndef RAIDOUTSTANDING
281 #define RAIDOUTSTANDING 6
282 #endif
283
284 #define RAIDLABELDEV(dev) \
285 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
286
287 /* declared here, and made public, for the benefit of KVM stuff.. */
288
289 static int raidlock(struct raid_softc *);
290 static void raidunlock(struct raid_softc *);
291
292 static int raid_detach_unlocked(struct raid_softc *);
293
294 static void rf_markalldirty(RF_Raid_t *);
295 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
296
297 static void rf_ReconThread(struct rf_recon_req_internal *);
298 static void rf_RewriteParityThread(RF_Raid_t *raidPtr);
299 static void rf_CopybackThread(RF_Raid_t *raidPtr);
300 static void rf_ReconstructInPlaceThread(struct rf_recon_req_internal *);
301 static int rf_autoconfig(device_t);
302 static int rf_rescan(void);
303 static void rf_buildroothack(RF_ConfigSet_t *);
304
305 static RF_AutoConfig_t *rf_find_raid_components(void);
306 static RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
307 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
308 static void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
309 static int rf_set_autoconfig(RF_Raid_t *, int);
310 static int rf_set_rootpartition(RF_Raid_t *, int);
311 static void rf_release_all_vps(RF_ConfigSet_t *);
312 static void rf_cleanup_config_set(RF_ConfigSet_t *);
313 static int rf_have_enough_components(RF_ConfigSet_t *);
314 static struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
315 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
316
317 /*
318 * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
319 * Note that this is overridden by having RAID_AUTOCONFIG as an option
320 * in the kernel config file.
321 */
322 #ifdef RAID_AUTOCONFIG
323 int raidautoconfig = 1;
324 #else
325 int raidautoconfig = 0;
326 #endif
327 static bool raidautoconfigdone = false;
328
329 struct pool rf_alloclist_pool; /* AllocList */
330
331 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
332 static kmutex_t raid_lock;
333
334 static struct raid_softc *
335 raidcreate(int unit) {
336 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
337 sc->sc_unit = unit;
338 cv_init(&sc->sc_cv, "raidunit");
339 mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_NONE);
340 return sc;
341 }
342
343 static void
344 raiddestroy(struct raid_softc *sc) {
345 cv_destroy(&sc->sc_cv);
346 mutex_destroy(&sc->sc_mutex);
347 kmem_free(sc, sizeof(*sc));
348 }
349
350 static struct raid_softc *
351 raidget(int unit, bool create) {
352 struct raid_softc *sc;
353 if (unit < 0) {
354 #ifdef DIAGNOSTIC
355 panic("%s: unit %d!", __func__, unit);
356 #endif
357 return NULL;
358 }
359 mutex_enter(&raid_lock);
360 LIST_FOREACH(sc, &raids, sc_link) {
361 if (sc->sc_unit == unit) {
362 mutex_exit(&raid_lock);
363 return sc;
364 }
365 }
366 mutex_exit(&raid_lock);
367 if (!create)
368 return NULL;
369 sc = raidcreate(unit);
370 mutex_enter(&raid_lock);
371 LIST_INSERT_HEAD(&raids, sc, sc_link);
372 mutex_exit(&raid_lock);
373 return sc;
374 }
375
376 static void
377 raidput(struct raid_softc *sc) {
378 mutex_enter(&raid_lock);
379 LIST_REMOVE(sc, sc_link);
380 mutex_exit(&raid_lock);
381 raiddestroy(sc);
382 }
383
384 void
385 raidattach(int num)
386 {
387
388 /*
389 * Device attachment and associated initialization now occurs
390 * as part of the module initialization.
391 */
392 }
393
394 static int
395 rf_autoconfig(device_t self)
396 {
397 RF_AutoConfig_t *ac_list;
398 RF_ConfigSet_t *config_sets;
399
400 if (!raidautoconfig || raidautoconfigdone == true)
401 return 0;
402
403 /* XXX This code can only be run once. */
404 raidautoconfigdone = true;
405
406 #ifdef __HAVE_CPU_BOOTCONF
407 /*
408 * 0. find the boot device if needed first so we can use it later
409 * this needs to be done before we autoconfigure any raid sets,
410 * because if we use wedges we are not going to be able to open
411 * the boot device later
412 */
413 if (booted_device == NULL)
414 cpu_bootconf();
415 #endif
416 /* 1. locate all RAID components on the system */
417 aprint_debug("Searching for RAID components...\n");
418 ac_list = rf_find_raid_components();
419
420 /* 2. Sort them into their respective sets. */
421 config_sets = rf_create_auto_sets(ac_list);
422
423 /*
424 * 3. Evaluate each set and configure the valid ones.
425 * This gets done in rf_buildroothack().
426 */
427 rf_buildroothack(config_sets);
428
429 return 1;
430 }
431
432 int
433 rf_inited(const struct raid_softc *rs) {
434 return (rs->sc_flags & RAIDF_INITED) != 0;
435 }
436
437 RF_Raid_t *
438 rf_get_raid(struct raid_softc *rs) {
439 return &rs->sc_r;
440 }
441
442 int
443 rf_get_unit(const struct raid_softc *rs) {
444 return rs->sc_unit;
445 }
446
447 static int
448 rf_containsboot(RF_Raid_t *r, device_t bdv) {
449 const char *bootname;
450 size_t len;
451
452 /* if bdv is NULL, the set can't contain it. exit early. */
453 if (bdv == NULL)
454 return 0;
455
456 bootname = device_xname(bdv);
457 len = strlen(bootname);
458
459 for (int col = 0; col < r->numCol; col++) {
460 const char *devname = r->Disks[col].devname;
461 devname += sizeof("/dev/") - 1;
462 if (strncmp(devname, "dk", 2) == 0) {
463 const char *parent =
464 dkwedge_get_parent_name(r->Disks[col].dev);
465 if (parent != NULL)
466 devname = parent;
467 }
468 if (strncmp(devname, bootname, len) == 0) {
469 struct raid_softc *sc = r->softc;
470 aprint_debug("raid%d includes boot device %s\n",
471 sc->sc_unit, devname);
472 return 1;
473 }
474 }
475 return 0;
476 }
477
478 static int
479 rf_rescan(void)
480 {
481 RF_AutoConfig_t *ac_list;
482 RF_ConfigSet_t *config_sets, *cset, *next_cset;
483 struct raid_softc *sc;
484 int raid_added;
485
486 ac_list = rf_find_raid_components();
487 config_sets = rf_create_auto_sets(ac_list);
488
489 raid_added = 1;
490 while (raid_added > 0) {
491 raid_added = 0;
492 cset = config_sets;
493 while (cset != NULL) {
494 next_cset = cset->next;
495 if (rf_have_enough_components(cset) &&
496 cset->ac->clabel->autoconfigure == 1) {
497 sc = rf_auto_config_set(cset);
498 if (sc != NULL) {
499 aprint_debug("raid%d: configured ok, rootable %d\n",
500 sc->sc_unit, cset->rootable);
501 /* We added one RAID set */
502 raid_added++;
503 } else {
504 /* The autoconfig didn't work :( */
505 aprint_debug("Autoconfig failed\n");
506 rf_release_all_vps(cset);
507 }
508 } else {
509 /* we're not autoconfiguring this set...
510 release the associated resources */
511 rf_release_all_vps(cset);
512 }
513 /* cleanup */
514 rf_cleanup_config_set(cset);
515 cset = next_cset;
516 }
517 if (raid_added > 0) {
518 /* We added at least one RAID set, so re-scan for recursive RAID */
519 ac_list = rf_find_raid_components();
520 config_sets = rf_create_auto_sets(ac_list);
521 }
522 }
523
524 return 0;
525 }
526
527
528 static void
529 rf_buildroothack(RF_ConfigSet_t *config_sets)
530 {
531 RF_AutoConfig_t *ac_list;
532 RF_ConfigSet_t *cset;
533 RF_ConfigSet_t *next_cset;
534 int num_root;
535 int raid_added;
536 struct raid_softc *sc, *rsc;
537 struct dk_softc *dksc = NULL; /* XXX gcc -Os: may be used uninit. */
538
539 sc = rsc = NULL;
540 num_root = 0;
541
542 raid_added = 1;
543 while (raid_added > 0) {
544 raid_added = 0;
545 cset = config_sets;
546 while (cset != NULL) {
547 next_cset = cset->next;
548 if (rf_have_enough_components(cset) &&
549 cset->ac->clabel->autoconfigure == 1) {
550 sc = rf_auto_config_set(cset);
551 if (sc != NULL) {
552 aprint_debug("raid%d: configured ok, rootable %d\n",
553 sc->sc_unit, cset->rootable);
554 /* We added one RAID set */
555 raid_added++;
556 if (cset->rootable) {
557 rsc = sc;
558 num_root++;
559 }
560 } else {
561 /* The autoconfig didn't work :( */
562 aprint_debug("Autoconfig failed\n");
563 rf_release_all_vps(cset);
564 }
565 } else {
566 /* we're not autoconfiguring this set...
567 release the associated resources */
568 rf_release_all_vps(cset);
569 }
570 /* cleanup */
571 rf_cleanup_config_set(cset);
572 cset = next_cset;
573 }
574 if (raid_added > 0) {
575 /* We added at least one RAID set, so re-scan for recursive RAID */
576 ac_list = rf_find_raid_components();
577 config_sets = rf_create_auto_sets(ac_list);
578 }
579 }
580
581 /* if the user has specified what the root device should be
582 then we don't touch booted_device or boothowto... */
583
584 if (rootspec != NULL) {
585 aprint_debug("%s: rootspec %s\n", __func__, rootspec);
586 return;
587 }
588
589 /* we found something bootable... */
590
591 /*
592 * XXX: The following code assumes that the root raid
593 * is the first ('a') partition. This is about the best
594 * we can do with a BSD disklabel, but we might be able
595 * to do better with a GPT label, by setting a specified
596 * attribute to indicate the root partition. We can then
597 * stash the partition number in the r->root_partition
598 * high bits (the bottom 2 bits are already used). For
599 * now we just set booted_partition to 0 when we override
600 * root.
601 */
602 if (num_root == 1) {
603 device_t candidate_root;
604 dksc = &rsc->sc_dksc;
605 if (dksc->sc_dkdev.dk_nwedges != 0) {
606 char cname[sizeof(cset->ac->devname)];
607 /* XXX: assume partition 'a' first */
608 snprintf(cname, sizeof(cname), "%s%c",
609 device_xname(dksc->sc_dev), 'a');
610 candidate_root = dkwedge_find_by_wname(cname);
611 aprint_debug("%s: candidate wedge root=%s\n", __func__,
612 cname);
613 if (candidate_root == NULL) {
614 /*
615 * If that is not found, because we don't use
616 * disklabel, return the first dk child
617 * XXX: we can skip the 'a' check above
618 * and always do this...
619 */
620 size_t i = 0;
621 candidate_root = dkwedge_find_by_parent(
622 device_xname(dksc->sc_dev), &i);
623 }
624 aprint_debug("%s: candidate wedge root=%p\n", __func__,
625 candidate_root);
626 } else
627 candidate_root = dksc->sc_dev;
628 aprint_debug("%s: candidate root=%p booted_device=%p "
629 "root_partition=%d contains_boot=%d\n",
630 __func__, candidate_root, booted_device,
631 rsc->sc_r.root_partition,
632 rf_containsboot(&rsc->sc_r, booted_device));
633 /* XXX the check for booted_device == NULL can probably be
634 * dropped, now that rf_containsboot handles that case.
635 */
636 if (booted_device == NULL ||
637 rsc->sc_r.root_partition == 1 ||
638 rf_containsboot(&rsc->sc_r, booted_device)) {
639 booted_device = candidate_root;
640 booted_method = "raidframe/single";
641 booted_partition = 0; /* XXX assume 'a' */
642 aprint_debug("%s: set booted_device=%s(%p)\n", __func__,
643 device_xname(booted_device), booted_device);
644 }
645 } else if (num_root > 1) {
646 aprint_debug("%s: many roots=%d, %p\n", __func__, num_root,
647 booted_device);
648
649 /*
650 * Maybe the MD code can help. If it cannot, then
651 * setroot() will discover that we have no
652 * booted_device and will ask the user if nothing was
653 * hardwired in the kernel config file
654 */
655 if (booted_device == NULL)
656 return;
657
658 num_root = 0;
659 mutex_enter(&raid_lock);
660 LIST_FOREACH(sc, &raids, sc_link) {
661 RF_Raid_t *r = &sc->sc_r;
662 if (r->valid == 0)
663 continue;
664
665 if (r->root_partition == 0)
666 continue;
667
668 if (rf_containsboot(r, booted_device)) {
669 num_root++;
670 rsc = sc;
671 dksc = &rsc->sc_dksc;
672 }
673 }
674 mutex_exit(&raid_lock);
675
676 if (num_root == 1) {
677 booted_device = dksc->sc_dev;
678 booted_method = "raidframe/multi";
679 booted_partition = 0; /* XXX assume 'a' */
680 } else {
681 /* we can't guess.. require the user to answer... */
682 boothowto |= RB_ASKNAME;
683 }
684 }
685 }
686
687 static int
688 raidsize(dev_t dev)
689 {
690 struct raid_softc *rs;
691 struct dk_softc *dksc;
692 unsigned int unit;
693
694 unit = raidunit(dev);
695 if ((rs = raidget(unit, false)) == NULL)
696 return -1;
697 dksc = &rs->sc_dksc;
698
699 if ((rs->sc_flags & RAIDF_INITED) == 0)
700 return -1;
701
702 return dk_size(dksc, dev);
703 }
704
705 static int
706 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
707 {
708 unsigned int unit;
709 struct raid_softc *rs;
710 struct dk_softc *dksc;
711
712 unit = raidunit(dev);
713 if ((rs = raidget(unit, false)) == NULL)
714 return ENXIO;
715 dksc = &rs->sc_dksc;
716
717 if ((rs->sc_flags & RAIDF_INITED) == 0)
718 return ENODEV;
719
720 /*
721 Note that blkno is relative to this particular partition.
722 By adding adding RF_PROTECTED_SECTORS, we get a value that
723 is relative to the partition used for the underlying component.
724 */
725 blkno += RF_PROTECTED_SECTORS;
726
727 return dk_dump(dksc, dev, blkno, va, size, DK_DUMP_RECURSIVE);
728 }
729
730 static int
731 raid_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk)
732 {
733 struct raid_softc *rs = raidsoftc(dev);
734 const struct bdevsw *bdev;
735 RF_Raid_t *raidPtr;
736 int c, sparecol, j, scol, dumpto;
737 int error = 0;
738
739 raidPtr = &rs->sc_r;
740
741 /* we only support dumping to RAID 1 sets */
742 if (raidPtr->Layout.numDataCol != 1 ||
743 raidPtr->Layout.numParityCol != 1)
744 return EINVAL;
745
746 if ((error = raidlock(rs)) != 0)
747 return error;
748
749 /* figure out what device is alive.. */
750
751 /*
752 Look for a component to dump to. The preference for the
753 component to dump to is as follows:
754 1) the first component
755 2) a used_spare of the first component
756 3) the second component
757 4) a used_spare of the second component
758 */
759
760 dumpto = -1;
761 for (c = 0; c < raidPtr->numCol; c++) {
762 if (raidPtr->Disks[c].status == rf_ds_optimal) {
763 /* this might be the one */
764 dumpto = c;
765 break;
766 }
767 }
768
769 /*
770 At this point we have possibly selected a live component.
771 If we didn't find a live ocmponent, we now check to see
772 if there is a relevant spared component.
773 */
774
775 for (c = 0; c < raidPtr->numSpare; c++) {
776 sparecol = raidPtr->numCol + c;
777 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
778 /* How about this one? */
779 scol = -1;
780 for(j=0;j<raidPtr->numCol;j++) {
781 if (raidPtr->Disks[j].spareCol == sparecol) {
782 scol = j;
783 break;
784 }
785 }
786 if (scol == 0) {
787 /*
788 We must have found a spared first
789 component! We'll take that over
790 anything else found so far. (We
791 couldn't have found a real first
792 component before, since this is a
793 used spare, and it's saying that
794 it's replacing the first
795 component.) On reboot (with
796 autoconfiguration turned on)
797 sparecol will become the first
798 component (component0) of this set.
799 */
800 dumpto = sparecol;
801 break;
802 } else if (scol != -1) {
803 /*
804 Must be a spared second component.
805 We'll dump to that if we havn't found
806 anything else so far.
807 */
808 if (dumpto == -1)
809 dumpto = sparecol;
810 }
811 }
812 }
813
814 if (dumpto == -1) {
815 /* we couldn't find any live components to dump to!?!?
816 */
817 error = EINVAL;
818 goto out;
819 }
820
821 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
822 if (bdev == NULL) {
823 error = ENXIO;
824 goto out;
825 }
826
827 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
828 blkno, va, nblk * raidPtr->bytesPerSector);
829
830 out:
831 raidunlock(rs);
832
833 return error;
834 }
835
836 /* ARGSUSED */
837 static int
838 raidopen(dev_t dev, int flags, int fmt,
839 struct lwp *l)
840 {
841 int unit = raidunit(dev);
842 struct raid_softc *rs;
843 struct dk_softc *dksc;
844 int error = 0;
845 int part, pmask;
846
847 if ((rs = raidget(unit, true)) == NULL)
848 return ENXIO;
849 if ((error = raidlock(rs)) != 0)
850 return error;
851
852 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
853 error = EBUSY;
854 goto bad;
855 }
856
857 dksc = &rs->sc_dksc;
858
859 part = DISKPART(dev);
860 pmask = (1 << part);
861
862 if (!DK_BUSY(dksc, pmask) &&
863 ((rs->sc_flags & RAIDF_INITED) != 0)) {
864 /* First one... mark things as dirty... Note that we *MUST*
865 have done a configure before this. I DO NOT WANT TO BE
866 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
867 THAT THEY BELONG TOGETHER!!!!! */
868 /* XXX should check to see if we're only open for reading
869 here... If so, we needn't do this, but then need some
870 other way of keeping track of what's happened.. */
871
872 rf_markalldirty(&rs->sc_r);
873 }
874
875 if ((rs->sc_flags & RAIDF_INITED) != 0)
876 error = dk_open(dksc, dev, flags, fmt, l);
877
878 bad:
879 raidunlock(rs);
880
881 return error;
882
883
884 }
885
886 static int
887 raid_lastclose(device_t self)
888 {
889 struct raid_softc *rs = raidsoftc(self);
890
891 /* Last one... device is not unconfigured yet.
892 Device shutdown has taken care of setting the
893 clean bits if RAIDF_INITED is not set
894 mark things as clean... */
895
896 rf_update_component_labels(&rs->sc_r,
897 RF_FINAL_COMPONENT_UPDATE);
898
899 /* pass to unlocked code */
900 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
901 rs->sc_flags |= RAIDF_DETACH;
902
903 return 0;
904 }
905
906 /* ARGSUSED */
907 static int
908 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
909 {
910 int unit = raidunit(dev);
911 struct raid_softc *rs;
912 struct dk_softc *dksc;
913 cfdata_t cf;
914 int error = 0, do_detach = 0, do_put = 0;
915
916 if ((rs = raidget(unit, false)) == NULL)
917 return ENXIO;
918 dksc = &rs->sc_dksc;
919
920 if ((error = raidlock(rs)) != 0)
921 return error;
922
923 if ((rs->sc_flags & RAIDF_INITED) != 0) {
924 error = dk_close(dksc, dev, flags, fmt, l);
925 if ((rs->sc_flags & RAIDF_DETACH) != 0)
926 do_detach = 1;
927 } else if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
928 do_put = 1;
929
930 raidunlock(rs);
931
932 if (do_detach) {
933 /* free the pseudo device attach bits */
934 cf = device_cfdata(dksc->sc_dev);
935 error = config_detach(dksc->sc_dev, 0);
936 if (error == 0)
937 free(cf, M_RAIDFRAME);
938 } else if (do_put) {
939 raidput(rs);
940 }
941
942 return error;
943
944 }
945
946 static void
947 raid_wakeup(RF_Raid_t *raidPtr)
948 {
949 rf_lock_mutex2(raidPtr->iodone_lock);
950 rf_signal_cond2(raidPtr->iodone_cv);
951 rf_unlock_mutex2(raidPtr->iodone_lock);
952 }
953
954 static void
955 raidstrategy(struct buf *bp)
956 {
957 unsigned int unit;
958 struct raid_softc *rs;
959 struct dk_softc *dksc;
960 RF_Raid_t *raidPtr;
961
962 unit = raidunit(bp->b_dev);
963 if ((rs = raidget(unit, false)) == NULL) {
964 bp->b_error = ENXIO;
965 goto fail;
966 }
967 if ((rs->sc_flags & RAIDF_INITED) == 0) {
968 bp->b_error = ENXIO;
969 goto fail;
970 }
971 dksc = &rs->sc_dksc;
972 raidPtr = &rs->sc_r;
973
974 /* Queue IO only */
975 if (dk_strategy_defer(dksc, bp))
976 goto done;
977
978 /* schedule the IO to happen at the next convenient time */
979 raid_wakeup(raidPtr);
980
981 done:
982 return;
983
984 fail:
985 bp->b_resid = bp->b_bcount;
986 biodone(bp);
987 }
988
989 static int
990 raid_diskstart(device_t dev, struct buf *bp)
991 {
992 struct raid_softc *rs = raidsoftc(dev);
993 RF_Raid_t *raidPtr;
994
995 raidPtr = &rs->sc_r;
996 if (!raidPtr->valid) {
997 db1_printf(("raid is not valid..\n"));
998 return ENODEV;
999 }
1000
1001 /* XXX */
1002 bp->b_resid = 0;
1003
1004 return raiddoaccess(raidPtr, bp);
1005 }
1006
1007 void
1008 raiddone(RF_Raid_t *raidPtr, struct buf *bp)
1009 {
1010 struct raid_softc *rs;
1011 struct dk_softc *dksc;
1012
1013 rs = raidPtr->softc;
1014 dksc = &rs->sc_dksc;
1015
1016 dk_done(dksc, bp);
1017
1018 rf_lock_mutex2(raidPtr->mutex);
1019 raidPtr->openings++;
1020 rf_unlock_mutex2(raidPtr->mutex);
1021
1022 /* schedule more IO */
1023 raid_wakeup(raidPtr);
1024 }
1025
1026 /* ARGSUSED */
1027 static int
1028 raidread(dev_t dev, struct uio *uio, int flags)
1029 {
1030 int unit = raidunit(dev);
1031 struct raid_softc *rs;
1032
1033 if ((rs = raidget(unit, false)) == NULL)
1034 return ENXIO;
1035
1036 if ((rs->sc_flags & RAIDF_INITED) == 0)
1037 return ENXIO;
1038
1039 return physio(raidstrategy, NULL, dev, B_READ, minphys, uio);
1040
1041 }
1042
1043 /* ARGSUSED */
1044 static int
1045 raidwrite(dev_t dev, struct uio *uio, int flags)
1046 {
1047 int unit = raidunit(dev);
1048 struct raid_softc *rs;
1049
1050 if ((rs = raidget(unit, false)) == NULL)
1051 return ENXIO;
1052
1053 if ((rs->sc_flags & RAIDF_INITED) == 0)
1054 return ENXIO;
1055
1056 return physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio);
1057
1058 }
1059
1060 static int
1061 raid_detach_unlocked(struct raid_softc *rs)
1062 {
1063 struct dk_softc *dksc = &rs->sc_dksc;
1064 RF_Raid_t *raidPtr;
1065 int error;
1066
1067 raidPtr = &rs->sc_r;
1068
1069 if (DK_BUSY(dksc, 0) ||
1070 raidPtr->recon_in_progress != 0 ||
1071 raidPtr->parity_rewrite_in_progress != 0 ||
1072 raidPtr->copyback_in_progress != 0)
1073 return EBUSY;
1074
1075 if ((rs->sc_flags & RAIDF_INITED) == 0)
1076 return 0;
1077
1078 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1079
1080 if ((error = rf_Shutdown(raidPtr)) != 0)
1081 return error;
1082
1083 rs->sc_flags &= ~RAIDF_INITED;
1084
1085 /* Kill off any queued buffers */
1086 dk_drain(dksc);
1087 bufq_free(dksc->sc_bufq);
1088
1089 /* Detach the disk. */
1090 dkwedge_delall(&dksc->sc_dkdev);
1091 disk_detach(&dksc->sc_dkdev);
1092 disk_destroy(&dksc->sc_dkdev);
1093 dk_detach(dksc);
1094
1095 return 0;
1096 }
1097
1098 int
1099 rf_fail_disk(RF_Raid_t *raidPtr, struct rf_recon_req *rr)
1100 {
1101 struct rf_recon_req_internal *rrint;
1102
1103 if (raidPtr->Layout.map->faultsTolerated == 0) {
1104 /* Can't do this on a RAID 0!! */
1105 return EINVAL;
1106 }
1107
1108 if (rr->col < 0 || rr->col >= raidPtr->numCol) {
1109 /* bad column */
1110 return EINVAL;
1111 }
1112
1113 rf_lock_mutex2(raidPtr->mutex);
1114 if (raidPtr->status == rf_rs_reconstructing) {
1115 /* you can't fail a disk while we're reconstructing! */
1116 /* XXX wrong for RAID6 */
1117 goto out;
1118 }
1119 if ((raidPtr->Disks[rr->col].status == rf_ds_optimal) &&
1120 (raidPtr->numFailures > 0)) {
1121 /* some other component has failed. Let's not make
1122 things worse. XXX wrong for RAID6 */
1123 goto out;
1124 }
1125 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1126 /* Can't fail a spared disk! */
1127 goto out;
1128 }
1129 rf_unlock_mutex2(raidPtr->mutex);
1130
1131 /* make a copy of the recon request so that we don't rely on
1132 * the user's buffer */
1133 rrint = RF_Malloc(sizeof(*rrint));
1134 if (rrint == NULL)
1135 return(ENOMEM);
1136 rrint->col = rr->col;
1137 rrint->flags = rr->flags;
1138 rrint->raidPtr = raidPtr;
1139
1140 return RF_CREATE_THREAD(raidPtr->recon_thread, rf_ReconThread,
1141 rrint, "raid_recon");
1142 out:
1143 rf_unlock_mutex2(raidPtr->mutex);
1144 return EINVAL;
1145 }
1146
1147 static int
1148 rf_copyinspecificbuf(RF_Config_t *k_cfg)
1149 {
1150 /* allocate a buffer for the layout-specific data, and copy it in */
1151 if (k_cfg->layoutSpecificSize == 0)
1152 return 0;
1153
1154 if (k_cfg->layoutSpecificSize > 10000) {
1155 /* sanity check */
1156 return EINVAL;
1157 }
1158
1159 u_char *specific_buf;
1160 specific_buf = RF_Malloc(k_cfg->layoutSpecificSize);
1161 if (specific_buf == NULL)
1162 return ENOMEM;
1163
1164 int retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1165 k_cfg->layoutSpecificSize);
1166 if (retcode) {
1167 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1168 db1_printf(("%s: retcode=%d copyin.2\n", __func__, retcode));
1169 return retcode;
1170 }
1171
1172 k_cfg->layoutSpecific = specific_buf;
1173 return 0;
1174 }
1175
1176 static int
1177 rf_getConfiguration(struct raid_softc *rs, void *data, RF_Config_t **k_cfg)
1178 {
1179 RF_Config_t *u_cfg = *((RF_Config_t **) data);
1180
1181 if (rs->sc_r.valid) {
1182 /* There is a valid RAID set running on this unit! */
1183 printf("raid%d: Device already configured!\n", rs->sc_unit);
1184 return EINVAL;
1185 }
1186
1187 /* copy-in the configuration information */
1188 /* data points to a pointer to the configuration structure */
1189 *k_cfg = RF_Malloc(sizeof(**k_cfg));
1190 if (*k_cfg == NULL) {
1191 return ENOMEM;
1192 }
1193 int retcode = copyin(u_cfg, *k_cfg, sizeof(RF_Config_t));
1194 if (retcode == 0)
1195 return 0;
1196 RF_Free(*k_cfg, sizeof(RF_Config_t));
1197 db1_printf(("%s: retcode=%d copyin.1\n", __func__, retcode));
1198 rs->sc_flags |= RAIDF_SHUTDOWN;
1199 return retcode;
1200 }
1201
1202 int
1203 rf_construct(struct raid_softc *rs, RF_Config_t *k_cfg)
1204 {
1205 int retcode, i;
1206 RF_Raid_t *raidPtr = &rs->sc_r;
1207
1208 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1209
1210 if ((retcode = rf_copyinspecificbuf(k_cfg)) != 0)
1211 goto out;
1212
1213 /* should do some kind of sanity check on the configuration.
1214 * Store the sum of all the bytes in the last byte? */
1215
1216 /* Force nul-termination on all strings. */
1217 #define ZERO_FINAL(s) do { s[sizeof(s) - 1] = '\0'; } while (0)
1218 for (i = 0; i < RF_MAXCOL; i++) {
1219 ZERO_FINAL(k_cfg->devnames[0][i]);
1220 }
1221 for (i = 0; i < RF_MAXSPARE; i++) {
1222 ZERO_FINAL(k_cfg->spare_names[i]);
1223 }
1224 for (i = 0; i < RF_MAXDBGV; i++) {
1225 ZERO_FINAL(k_cfg->debugVars[i]);
1226 }
1227 #undef ZERO_FINAL
1228
1229 /* Check some basic limits. */
1230 if (k_cfg->numCol >= RF_MAXCOL || k_cfg->numCol < 0) {
1231 retcode = EINVAL;
1232 goto out;
1233 }
1234 if (k_cfg->numSpare >= RF_MAXSPARE || k_cfg->numSpare < 0) {
1235 retcode = EINVAL;
1236 goto out;
1237 }
1238
1239 /* configure the system */
1240
1241 /*
1242 * Clear the entire RAID descriptor, just to make sure
1243 * there is no stale data left in the case of a
1244 * reconfiguration
1245 */
1246 memset(raidPtr, 0, sizeof(*raidPtr));
1247 raidPtr->softc = rs;
1248 raidPtr->raidid = rs->sc_unit;
1249
1250 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1251
1252 if (retcode == 0) {
1253 /* allow this many simultaneous IO's to
1254 this RAID device */
1255 raidPtr->openings = RAIDOUTSTANDING;
1256
1257 raidinit(rs);
1258 raid_wakeup(raidPtr);
1259 rf_markalldirty(raidPtr);
1260 }
1261
1262 /* free the buffers. No return code here. */
1263 if (k_cfg->layoutSpecificSize) {
1264 RF_Free(k_cfg->layoutSpecific, k_cfg->layoutSpecificSize);
1265 }
1266 out:
1267 RF_Free(k_cfg, sizeof(RF_Config_t));
1268 if (retcode) {
1269 /*
1270 * If configuration failed, set sc_flags so that we
1271 * will detach the device when we close it.
1272 */
1273 rs->sc_flags |= RAIDF_SHUTDOWN;
1274 }
1275 return retcode;
1276 }
1277
1278 #if RF_DISABLED
1279 static int
1280 rf_set_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
1281 {
1282
1283 /* XXX check the label for valid stuff... */
1284 /* Note that some things *should not* get modified --
1285 the user should be re-initing the labels instead of
1286 trying to patch things.
1287 */
1288 #ifdef DEBUG
1289 int raidid = raidPtr->raidid;
1290 printf("raid%d: Got component label:\n", raidid);
1291 printf("raid%d: Version: %d\n", raidid, clabel->version);
1292 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1293 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1294 printf("raid%d: Column: %d\n", raidid, clabel->column);
1295 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1296 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1297 printf("raid%d: Status: %d\n", raidid, clabel->status);
1298 #endif /* DEBUG */
1299 clabel->row = 0;
1300 int column = clabel->column;
1301
1302 if ((column < 0) || (column >= raidPtr->numCol)) {
1303 return(EINVAL);
1304 }
1305
1306 /* XXX this isn't allowed to do anything for now :-) */
1307
1308 /* XXX and before it is, we need to fill in the rest
1309 of the fields!?!?!?! */
1310 memcpy(raidget_component_label(raidPtr, column),
1311 clabel, sizeof(*clabel));
1312 raidflush_component_label(raidPtr, column);
1313 return 0;
1314 }
1315 #endif
1316
1317 static int
1318 rf_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
1319 {
1320 /*
1321 we only want the serial number from
1322 the above. We get all the rest of the information
1323 from the config that was used to create this RAID
1324 set.
1325 */
1326
1327 raidPtr->serial_number = clabel->serial_number;
1328
1329 for (int column = 0; column < raidPtr->numCol; column++) {
1330 RF_RaidDisk_t *diskPtr = &raidPtr->Disks[column];
1331 if (RF_DEAD_DISK(diskPtr->status))
1332 continue;
1333 RF_ComponentLabel_t *ci_label = raidget_component_label(
1334 raidPtr, column);
1335 /* Zeroing this is important. */
1336 memset(ci_label, 0, sizeof(*ci_label));
1337 raid_init_component_label(raidPtr, ci_label);
1338 ci_label->serial_number = raidPtr->serial_number;
1339 ci_label->row = 0; /* we dont' pretend to support more */
1340 rf_component_label_set_partitionsize(ci_label,
1341 diskPtr->partitionSize);
1342 ci_label->column = column;
1343 raidflush_component_label(raidPtr, column);
1344 /* XXXjld what about the spares? */
1345 }
1346
1347 return 0;
1348 }
1349
1350 static int
1351 rf_rebuild_in_place(RF_Raid_t *raidPtr, RF_SingleComponent_t *componentPtr)
1352 {
1353
1354 if (raidPtr->Layout.map->faultsTolerated == 0) {
1355 /* Can't do this on a RAID 0!! */
1356 return EINVAL;
1357 }
1358
1359 if (raidPtr->recon_in_progress == 1) {
1360 /* a reconstruct is already in progress! */
1361 return EINVAL;
1362 }
1363
1364 RF_SingleComponent_t component;
1365 memcpy(&component, componentPtr, sizeof(RF_SingleComponent_t));
1366 component.row = 0; /* we don't support any more */
1367 int column = component.column;
1368
1369 if ((column < 0) || (column >= raidPtr->numCol)) {
1370 return EINVAL;
1371 }
1372
1373 rf_lock_mutex2(raidPtr->mutex);
1374 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1375 (raidPtr->numFailures > 0)) {
1376 /* XXX 0 above shouldn't be constant!!! */
1377 /* some component other than this has failed.
1378 Let's not make things worse than they already
1379 are... */
1380 printf("raid%d: Unable to reconstruct to disk at:\n",
1381 raidPtr->raidid);
1382 printf("raid%d: Col: %d Too many failures.\n",
1383 raidPtr->raidid, column);
1384 rf_unlock_mutex2(raidPtr->mutex);
1385 return EINVAL;
1386 }
1387
1388 if (raidPtr->Disks[column].status == rf_ds_reconstructing) {
1389 printf("raid%d: Unable to reconstruct to disk at:\n",
1390 raidPtr->raidid);
1391 printf("raid%d: Col: %d "
1392 "Reconstruction already occurring!\n",
1393 raidPtr->raidid, column);
1394
1395 rf_unlock_mutex2(raidPtr->mutex);
1396 return EINVAL;
1397 }
1398
1399 if (raidPtr->Disks[column].status == rf_ds_spared) {
1400 rf_unlock_mutex2(raidPtr->mutex);
1401 return EINVAL;
1402 }
1403
1404 rf_unlock_mutex2(raidPtr->mutex);
1405
1406 struct rf_recon_req_internal *rrint;
1407 rrint = RF_Malloc(sizeof(*rrint));
1408 if (rrint == NULL)
1409 return ENOMEM;
1410
1411 rrint->col = column;
1412 rrint->raidPtr = raidPtr;
1413
1414 return RF_CREATE_THREAD(raidPtr->recon_thread,
1415 rf_ReconstructInPlaceThread, rrint, "raid_reconip");
1416 }
1417
1418 static int
1419 rf_check_recon_status(RF_Raid_t *raidPtr, int *data)
1420 {
1421 /*
1422 * This makes no sense on a RAID 0, or if we are not reconstructing
1423 * so tell the user it's done.
1424 */
1425 if (raidPtr->Layout.map->faultsTolerated == 0 ||
1426 raidPtr->status != rf_rs_reconstructing) {
1427 *data = 100;
1428 return 0;
1429 }
1430 if (raidPtr->reconControl->numRUsTotal == 0) {
1431 *data = 0;
1432 return 0;
1433 }
1434 *data = (raidPtr->reconControl->numRUsComplete * 100
1435 / raidPtr->reconControl->numRUsTotal);
1436 return 0;
1437 }
1438
1439 /*
1440 * Copy a RF_SingleComponent_t from 'data', ensuring nul-termination
1441 * on the component_name[] array.
1442 */
1443 static void
1444 rf_copy_single_component(RF_SingleComponent_t *component, void *data)
1445 {
1446
1447 memcpy(component, data, sizeof *component);
1448 component->component_name[sizeof(component->component_name) - 1] = '\0';
1449 }
1450
1451 static int
1452 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1453 {
1454 int unit = raidunit(dev);
1455 int part, pmask;
1456 struct raid_softc *rs;
1457 struct dk_softc *dksc;
1458 RF_Config_t *k_cfg;
1459 RF_Raid_t *raidPtr;
1460 RF_AccTotals_t *totals;
1461 RF_SingleComponent_t component;
1462 RF_DeviceConfig_t *d_cfg, *ucfgp;
1463 int retcode = 0;
1464 int column;
1465 RF_ComponentLabel_t *clabel;
1466 int d;
1467
1468 if ((rs = raidget(unit, false)) == NULL)
1469 return ENXIO;
1470
1471 dksc = &rs->sc_dksc;
1472 raidPtr = &rs->sc_r;
1473
1474 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1475 (int) DISKPART(dev), (int) unit, cmd));
1476
1477 /* Only CONFIGURE and RESCAN can be done without the RAID being initialized. */
1478 switch (cmd) {
1479 case RAIDFRAME_CONFIGURE:
1480 case RAIDFRAME_RESCAN:
1481 break;
1482 default:
1483 if (!rf_inited(rs))
1484 return ENXIO;
1485 }
1486
1487 switch (cmd) {
1488 /* configure the system */
1489 case RAIDFRAME_CONFIGURE:
1490 if ((retcode = rf_getConfiguration(rs, data, &k_cfg)) != 0)
1491 return retcode;
1492 return rf_construct(rs, k_cfg);
1493
1494 /* shutdown the system */
1495 case RAIDFRAME_SHUTDOWN:
1496
1497 part = DISKPART(dev);
1498 pmask = (1 << part);
1499
1500 if ((retcode = raidlock(rs)) != 0)
1501 return retcode;
1502
1503 if (DK_BUSY(dksc, pmask) ||
1504 raidPtr->recon_in_progress != 0 ||
1505 raidPtr->parity_rewrite_in_progress != 0 ||
1506 raidPtr->copyback_in_progress != 0)
1507 retcode = EBUSY;
1508 else {
1509 /* detach and free on close */
1510 rs->sc_flags |= RAIDF_SHUTDOWN;
1511 retcode = 0;
1512 }
1513
1514 raidunlock(rs);
1515
1516 return retcode;
1517 case RAIDFRAME_GET_COMPONENT_LABEL:
1518 return rf_get_component_label(raidPtr, data);
1519
1520 #if RF_DISABLED
1521 case RAIDFRAME_SET_COMPONENT_LABEL:
1522 return rf_set_component_label(raidPtr, data);
1523 #endif
1524
1525 case RAIDFRAME_INIT_LABELS:
1526 return rf_init_component_label(raidPtr, data);
1527
1528 case RAIDFRAME_SET_AUTOCONFIG:
1529 d = rf_set_autoconfig(raidPtr, *(int *) data);
1530 printf("raid%d: New autoconfig value is: %d\n",
1531 raidPtr->raidid, d);
1532 *(int *) data = d;
1533 return retcode;
1534
1535 case RAIDFRAME_SET_ROOT:
1536 d = rf_set_rootpartition(raidPtr, *(int *) data);
1537 printf("raid%d: New rootpartition value is: %d\n",
1538 raidPtr->raidid, d);
1539 *(int *) data = d;
1540 return retcode;
1541
1542 /* initialize all parity */
1543 case RAIDFRAME_REWRITEPARITY:
1544
1545 if (raidPtr->Layout.map->faultsTolerated == 0) {
1546 /* Parity for RAID 0 is trivially correct */
1547 raidPtr->parity_good = RF_RAID_CLEAN;
1548 return 0;
1549 }
1550
1551 if (raidPtr->parity_rewrite_in_progress == 1) {
1552 /* Re-write is already in progress! */
1553 return EINVAL;
1554 }
1555
1556 return RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1557 rf_RewriteParityThread, raidPtr,"raid_parity");
1558
1559 case RAIDFRAME_ADD_HOT_SPARE:
1560 rf_copy_single_component(&component, data);
1561 return rf_add_hot_spare(raidPtr, &component);
1562
1563 case RAIDFRAME_REMOVE_HOT_SPARE:
1564 return retcode;
1565
1566 case RAIDFRAME_DELETE_COMPONENT:
1567 rf_copy_single_component(&component, data);
1568 return rf_delete_component(raidPtr, &component);
1569
1570 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1571 rf_copy_single_component(&component, data);
1572 return rf_incorporate_hot_spare(raidPtr, &component);
1573
1574 case RAIDFRAME_REBUILD_IN_PLACE:
1575 return rf_rebuild_in_place(raidPtr, data);
1576
1577 case RAIDFRAME_GET_INFO:
1578 ucfgp = *(RF_DeviceConfig_t **)data;
1579 d_cfg = RF_Malloc(sizeof(*d_cfg));
1580 if (d_cfg == NULL)
1581 return ENOMEM;
1582 retcode = rf_get_info(raidPtr, d_cfg);
1583 if (retcode == 0) {
1584 retcode = copyout(d_cfg, ucfgp, sizeof(*d_cfg));
1585 }
1586 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1587 return retcode;
1588
1589 case RAIDFRAME_CHECK_PARITY:
1590 *(int *) data = raidPtr->parity_good;
1591 return 0;
1592
1593 case RAIDFRAME_PARITYMAP_STATUS:
1594 if (rf_paritymap_ineligible(raidPtr))
1595 return EINVAL;
1596 rf_paritymap_status(raidPtr->parity_map, data);
1597 return 0;
1598
1599 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1600 if (rf_paritymap_ineligible(raidPtr))
1601 return EINVAL;
1602 if (raidPtr->parity_map == NULL)
1603 return ENOENT; /* ??? */
1604 if (rf_paritymap_set_params(raidPtr->parity_map, data, 1) != 0)
1605 return EINVAL;
1606 return 0;
1607
1608 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1609 if (rf_paritymap_ineligible(raidPtr))
1610 return EINVAL;
1611 *(int *) data = rf_paritymap_get_disable(raidPtr);
1612 return 0;
1613
1614 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1615 if (rf_paritymap_ineligible(raidPtr))
1616 return EINVAL;
1617 rf_paritymap_set_disable(raidPtr, *(int *)data);
1618 /* XXX should errors be passed up? */
1619 return 0;
1620
1621 case RAIDFRAME_RESCAN:
1622 return rf_rescan();
1623
1624 case RAIDFRAME_RESET_ACCTOTALS:
1625 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1626 return 0;
1627
1628 case RAIDFRAME_GET_ACCTOTALS:
1629 totals = (RF_AccTotals_t *) data;
1630 *totals = raidPtr->acc_totals;
1631 return 0;
1632
1633 case RAIDFRAME_KEEP_ACCTOTALS:
1634 raidPtr->keep_acc_totals = *(int *)data;
1635 return 0;
1636
1637 case RAIDFRAME_GET_SIZE:
1638 *(int *) data = raidPtr->totalSectors;
1639 return 0;
1640
1641 case RAIDFRAME_FAIL_DISK:
1642 return rf_fail_disk(raidPtr, data);
1643
1644 /* invoke a copyback operation after recon on whatever disk
1645 * needs it, if any */
1646 case RAIDFRAME_COPYBACK:
1647
1648 if (raidPtr->Layout.map->faultsTolerated == 0) {
1649 /* This makes no sense on a RAID 0!! */
1650 return EINVAL;
1651 }
1652
1653 if (raidPtr->copyback_in_progress == 1) {
1654 /* Copyback is already in progress! */
1655 return EINVAL;
1656 }
1657
1658 return RF_CREATE_THREAD(raidPtr->copyback_thread,
1659 rf_CopybackThread, raidPtr, "raid_copyback");
1660
1661 /* return the percentage completion of reconstruction */
1662 case RAIDFRAME_CHECK_RECON_STATUS:
1663 return rf_check_recon_status(raidPtr, data);
1664
1665 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1666 rf_check_recon_status_ext(raidPtr, data);
1667 return 0;
1668
1669 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1670 if (raidPtr->Layout.map->faultsTolerated == 0) {
1671 /* This makes no sense on a RAID 0, so tell the
1672 user it's done. */
1673 *(int *) data = 100;
1674 return 0;
1675 }
1676 if (raidPtr->parity_rewrite_in_progress == 1) {
1677 *(int *) data = 100 *
1678 raidPtr->parity_rewrite_stripes_done /
1679 raidPtr->Layout.numStripe;
1680 } else {
1681 *(int *) data = 100;
1682 }
1683 return 0;
1684
1685 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1686 rf_check_parityrewrite_status_ext(raidPtr, data);
1687 return 0;
1688
1689 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1690 if (raidPtr->Layout.map->faultsTolerated == 0) {
1691 /* This makes no sense on a RAID 0 */
1692 *(int *) data = 100;
1693 return 0;
1694 }
1695 if (raidPtr->copyback_in_progress == 1) {
1696 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1697 raidPtr->Layout.numStripe;
1698 } else {
1699 *(int *) data = 100;
1700 }
1701 return 0;
1702
1703 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1704 rf_check_copyback_status_ext(raidPtr, data);
1705 return 0;
1706
1707 case RAIDFRAME_SET_LAST_UNIT:
1708 for (column = 0; column < raidPtr->numCol; column++)
1709 if (raidPtr->Disks[column].status != rf_ds_optimal)
1710 return EBUSY;
1711
1712 for (column = 0; column < raidPtr->numCol; column++) {
1713 clabel = raidget_component_label(raidPtr, column);
1714 clabel->last_unit = *(int *)data;
1715 raidflush_component_label(raidPtr, column);
1716 }
1717 rs->sc_cflags |= RAIDF_UNIT_CHANGED;
1718 return 0;
1719
1720 /* the sparetable daemon calls this to wait for the kernel to
1721 * need a spare table. this ioctl does not return until a
1722 * spare table is needed. XXX -- calling mpsleep here in the
1723 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1724 * -- I should either compute the spare table in the kernel,
1725 * or have a different -- XXX XXX -- interface (a different
1726 * character device) for delivering the table -- XXX */
1727 #if RF_DISABLED
1728 case RAIDFRAME_SPARET_WAIT:
1729 rf_lock_mutex2(rf_sparet_wait_mutex);
1730 while (!rf_sparet_wait_queue)
1731 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1732 RF_SparetWait_t *waitreq = rf_sparet_wait_queue;
1733 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1734 rf_unlock_mutex2(rf_sparet_wait_mutex);
1735
1736 /* structure assignment */
1737 *((RF_SparetWait_t *) data) = *waitreq;
1738
1739 RF_Free(waitreq, sizeof(*waitreq));
1740 return 0;
1741
1742 /* wakes up a process waiting on SPARET_WAIT and puts an error
1743 * code in it that will cause the dameon to exit */
1744 case RAIDFRAME_ABORT_SPARET_WAIT:
1745 waitreq = RF_Malloc(sizeof(*waitreq));
1746 waitreq->fcol = -1;
1747 rf_lock_mutex2(rf_sparet_wait_mutex);
1748 waitreq->next = rf_sparet_wait_queue;
1749 rf_sparet_wait_queue = waitreq;
1750 rf_broadcast_cond2(rf_sparet_wait_cv);
1751 rf_unlock_mutex2(rf_sparet_wait_mutex);
1752 return 0;
1753
1754 /* used by the spare table daemon to deliver a spare table
1755 * into the kernel */
1756 case RAIDFRAME_SEND_SPARET:
1757
1758 /* install the spare table */
1759 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1760
1761 /* respond to the requestor. the return status of the spare
1762 * table installation is passed in the "fcol" field */
1763 waitred = RF_Malloc(sizeof(*waitreq));
1764 waitreq->fcol = retcode;
1765 rf_lock_mutex2(rf_sparet_wait_mutex);
1766 waitreq->next = rf_sparet_resp_queue;
1767 rf_sparet_resp_queue = waitreq;
1768 rf_broadcast_cond2(rf_sparet_resp_cv);
1769 rf_unlock_mutex2(rf_sparet_wait_mutex);
1770
1771 return retcode;
1772 #endif
1773 default:
1774 /*
1775 * Don't bother trying to load compat modules
1776 * if it is not our ioctl. This is more efficient
1777 * and makes rump tests not depend on compat code
1778 */
1779 if (IOCGROUP(cmd) != 'r')
1780 break;
1781 #ifdef _LP64
1782 if ((l->l_proc->p_flag & PK_32) != 0) {
1783 module_autoload("compat_netbsd32_raid",
1784 MODULE_CLASS_EXEC);
1785 MODULE_HOOK_CALL(raidframe_netbsd32_ioctl_hook,
1786 (rs, cmd, data), enosys(), retcode);
1787 if (retcode != EPASSTHROUGH)
1788 return retcode;
1789 }
1790 #endif
1791 module_autoload("compat_raid_80", MODULE_CLASS_EXEC);
1792 MODULE_HOOK_CALL(raidframe_ioctl_80_hook,
1793 (rs, cmd, data), enosys(), retcode);
1794 if (retcode != EPASSTHROUGH)
1795 return retcode;
1796
1797 module_autoload("compat_raid_50", MODULE_CLASS_EXEC);
1798 MODULE_HOOK_CALL(raidframe_ioctl_50_hook,
1799 (rs, cmd, data), enosys(), retcode);
1800 if (retcode != EPASSTHROUGH)
1801 return retcode;
1802 break; /* fall through to the os-specific code below */
1803
1804 }
1805
1806 if (!raidPtr->valid)
1807 return EINVAL;
1808
1809 /*
1810 * Add support for "regular" device ioctls here.
1811 */
1812
1813 switch (cmd) {
1814 case DIOCGCACHE:
1815 retcode = rf_get_component_caches(raidPtr, (int *)data);
1816 break;
1817
1818 case DIOCCACHESYNC:
1819 retcode = rf_sync_component_caches(raidPtr, *(int *)data);
1820 break;
1821
1822 default:
1823 retcode = dk_ioctl(dksc, dev, cmd, data, flag, l);
1824 break;
1825 }
1826
1827 return retcode;
1828
1829 }
1830
1831
1832 /* raidinit -- complete the rest of the initialization for the
1833 RAIDframe device. */
1834
1835
1836 static void
1837 raidinit(struct raid_softc *rs)
1838 {
1839 cfdata_t cf;
1840 unsigned int unit;
1841 struct dk_softc *dksc = &rs->sc_dksc;
1842 RF_Raid_t *raidPtr = &rs->sc_r;
1843 device_t dev;
1844
1845 unit = raidPtr->raidid;
1846
1847 /* XXX doesn't check bounds. */
1848 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%u", unit);
1849
1850 /* attach the pseudo device */
1851 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1852 cf->cf_name = raid_cd.cd_name;
1853 cf->cf_atname = raid_cd.cd_name;
1854 cf->cf_unit = unit;
1855 cf->cf_fstate = FSTATE_STAR;
1856
1857 dev = config_attach_pseudo(cf);
1858 if (dev == NULL) {
1859 printf("raid%d: config_attach_pseudo failed\n",
1860 raidPtr->raidid);
1861 free(cf, M_RAIDFRAME);
1862 return;
1863 }
1864
1865 /* provide a backpointer to the real softc */
1866 raidsoftc(dev) = rs;
1867
1868 /* disk_attach actually creates space for the CPU disklabel, among
1869 * other things, so it's critical to call this *BEFORE* we try putzing
1870 * with disklabels. */
1871 dk_init(dksc, dev, DKTYPE_RAID);
1872 disk_init(&dksc->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1873
1874 /* XXX There may be a weird interaction here between this, and
1875 * protectedSectors, as used in RAIDframe. */
1876
1877 rs->sc_size = raidPtr->totalSectors;
1878
1879 /* Attach dk and disk subsystems */
1880 dk_attach(dksc);
1881 disk_attach(&dksc->sc_dkdev);
1882 rf_set_geometry(rs, raidPtr);
1883
1884 bufq_alloc(&dksc->sc_bufq, "fcfs", BUFQ_SORT_RAWBLOCK);
1885
1886 /* mark unit as usuable */
1887 rs->sc_flags |= RAIDF_INITED;
1888
1889 dkwedge_discover(&dksc->sc_dkdev);
1890 }
1891
1892 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1893 /* wake up the daemon & tell it to get us a spare table
1894 * XXX
1895 * the entries in the queues should be tagged with the raidPtr
1896 * so that in the extremely rare case that two recons happen at once,
1897 * we know for which device were requesting a spare table
1898 * XXX
1899 *
1900 * XXX This code is not currently used. GO
1901 */
1902 int
1903 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1904 {
1905 int retcode;
1906
1907 rf_lock_mutex2(rf_sparet_wait_mutex);
1908 req->next = rf_sparet_wait_queue;
1909 rf_sparet_wait_queue = req;
1910 rf_broadcast_cond2(rf_sparet_wait_cv);
1911
1912 /* mpsleep unlocks the mutex */
1913 while (!rf_sparet_resp_queue) {
1914 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
1915 }
1916 req = rf_sparet_resp_queue;
1917 rf_sparet_resp_queue = req->next;
1918 rf_unlock_mutex2(rf_sparet_wait_mutex);
1919
1920 retcode = req->fcol;
1921 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1922 * alloc'd */
1923 return retcode;
1924 }
1925 #endif
1926
1927 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1928 * bp & passes it down.
1929 * any calls originating in the kernel must use non-blocking I/O
1930 * do some extra sanity checking to return "appropriate" error values for
1931 * certain conditions (to make some standard utilities work)
1932 *
1933 * Formerly known as: rf_DoAccessKernel
1934 */
1935 void
1936 raidstart(RF_Raid_t *raidPtr)
1937 {
1938 struct raid_softc *rs;
1939 struct dk_softc *dksc;
1940
1941 rs = raidPtr->softc;
1942 dksc = &rs->sc_dksc;
1943 /* quick check to see if anything has died recently */
1944 rf_lock_mutex2(raidPtr->mutex);
1945 if (raidPtr->numNewFailures > 0) {
1946 rf_unlock_mutex2(raidPtr->mutex);
1947 rf_update_component_labels(raidPtr,
1948 RF_NORMAL_COMPONENT_UPDATE);
1949 rf_lock_mutex2(raidPtr->mutex);
1950 raidPtr->numNewFailures--;
1951 }
1952 rf_unlock_mutex2(raidPtr->mutex);
1953
1954 if ((rs->sc_flags & RAIDF_INITED) == 0) {
1955 printf("raid%d: raidstart not ready\n", raidPtr->raidid);
1956 return;
1957 }
1958
1959 dk_start(dksc, NULL);
1960 }
1961
1962 static int
1963 raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp)
1964 {
1965 RF_SectorCount_t num_blocks, pb, sum;
1966 RF_RaidAddr_t raid_addr;
1967 daddr_t blocknum;
1968 int rc;
1969
1970 rf_lock_mutex2(raidPtr->mutex);
1971 if (raidPtr->openings == 0) {
1972 rf_unlock_mutex2(raidPtr->mutex);
1973 return EAGAIN;
1974 }
1975 rf_unlock_mutex2(raidPtr->mutex);
1976
1977 blocknum = bp->b_rawblkno;
1978
1979 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1980 (int) blocknum));
1981
1982 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1983 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1984
1985 /* *THIS* is where we adjust what block we're going to...
1986 * but DO NOT TOUCH bp->b_blkno!!! */
1987 raid_addr = blocknum;
1988
1989 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1990 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1991 sum = raid_addr + num_blocks + pb;
1992 if (1 || rf_debugKernelAccess) {
1993 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1994 (int) raid_addr, (int) sum, (int) num_blocks,
1995 (int) pb, (int) bp->b_resid));
1996 }
1997 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1998 || (sum < num_blocks) || (sum < pb)) {
1999 rc = ENOSPC;
2000 goto done;
2001 }
2002 /*
2003 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
2004 */
2005
2006 if (bp->b_bcount & raidPtr->sectorMask) {
2007 rc = ENOSPC;
2008 goto done;
2009 }
2010 db1_printf(("Calling DoAccess..\n"));
2011
2012
2013 rf_lock_mutex2(raidPtr->mutex);
2014 raidPtr->openings--;
2015 rf_unlock_mutex2(raidPtr->mutex);
2016
2017 /* don't ever condition on bp->b_flags & B_WRITE.
2018 * always condition on B_READ instead */
2019
2020 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
2021 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
2022 raid_addr, num_blocks,
2023 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
2024
2025 done:
2026 return rc;
2027 }
2028
2029 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
2030
2031 int
2032 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
2033 {
2034 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2035 struct buf *bp;
2036
2037 req->queue = queue;
2038 bp = req->bp;
2039
2040 switch (req->type) {
2041 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2042 /* XXX need to do something extra here.. */
2043 /* I'm leaving this in, as I've never actually seen it used,
2044 * and I'd like folks to report it... GO */
2045 printf("%s: WAKEUP CALLED\n", __func__);
2046 queue->numOutstanding++;
2047
2048 bp->b_flags = 0;
2049 bp->b_private = req;
2050
2051 KernelWakeupFunc(bp);
2052 break;
2053
2054 case RF_IO_TYPE_READ:
2055 case RF_IO_TYPE_WRITE:
2056 #if RF_ACC_TRACE > 0
2057 if (req->tracerec) {
2058 RF_ETIMER_START(req->tracerec->timer);
2059 }
2060 #endif
2061 InitBP(bp, queue->rf_cinfo->ci_vp,
2062 op, queue->rf_cinfo->ci_dev,
2063 req->sectorOffset, req->numSector,
2064 req->buf, KernelWakeupFunc, (void *) req,
2065 queue->raidPtr->logBytesPerSector);
2066
2067 if (rf_debugKernelAccess) {
2068 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2069 (long) bp->b_blkno));
2070 }
2071 queue->numOutstanding++;
2072 queue->last_deq_sector = req->sectorOffset;
2073 /* acc wouldn't have been let in if there were any pending
2074 * reqs at any other priority */
2075 queue->curPriority = req->priority;
2076
2077 db1_printf(("Going for %c to unit %d col %d\n",
2078 req->type, queue->raidPtr->raidid,
2079 queue->col));
2080 db1_printf(("sector %d count %d (%d bytes) %d\n",
2081 (int) req->sectorOffset, (int) req->numSector,
2082 (int) (req->numSector <<
2083 queue->raidPtr->logBytesPerSector),
2084 (int) queue->raidPtr->logBytesPerSector));
2085
2086 /*
2087 * XXX: drop lock here since this can block at
2088 * least with backing SCSI devices. Retake it
2089 * to minimize fuss with calling interfaces.
2090 */
2091
2092 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2093 bdev_strategy(bp);
2094 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2095 break;
2096
2097 default:
2098 panic("bad req->type in rf_DispatchKernelIO");
2099 }
2100 db1_printf(("Exiting from DispatchKernelIO\n"));
2101
2102 return 0;
2103 }
2104 /* this is the callback function associated with a I/O invoked from
2105 kernel code.
2106 */
2107 static void
2108 KernelWakeupFunc(struct buf *bp)
2109 {
2110 RF_DiskQueueData_t *req = NULL;
2111 RF_DiskQueue_t *queue;
2112
2113 db1_printf(("recovering the request queue:\n"));
2114
2115 req = bp->b_private;
2116
2117 queue = (RF_DiskQueue_t *) req->queue;
2118
2119 rf_lock_mutex2(queue->raidPtr->iodone_lock);
2120
2121 #if RF_ACC_TRACE > 0
2122 if (req->tracerec) {
2123 RF_ETIMER_STOP(req->tracerec->timer);
2124 RF_ETIMER_EVAL(req->tracerec->timer);
2125 rf_lock_mutex2(rf_tracing_mutex);
2126 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2127 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2128 req->tracerec->num_phys_ios++;
2129 rf_unlock_mutex2(rf_tracing_mutex);
2130 }
2131 #endif
2132
2133 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2134 * ballistic, and mark the component as hosed... */
2135
2136 if (bp->b_error != 0) {
2137 /* Mark the disk as dead */
2138 /* but only mark it once... */
2139 /* and only if it wouldn't leave this RAID set
2140 completely broken */
2141 if (((queue->raidPtr->Disks[queue->col].status ==
2142 rf_ds_optimal) ||
2143 (queue->raidPtr->Disks[queue->col].status ==
2144 rf_ds_used_spare)) &&
2145 (queue->raidPtr->numFailures <
2146 queue->raidPtr->Layout.map->faultsTolerated)) {
2147 printf("raid%d: IO Error (%d). Marking %s as failed.\n",
2148 queue->raidPtr->raidid,
2149 bp->b_error,
2150 queue->raidPtr->Disks[queue->col].devname);
2151 queue->raidPtr->Disks[queue->col].status =
2152 rf_ds_failed;
2153 queue->raidPtr->status = rf_rs_degraded;
2154 queue->raidPtr->numFailures++;
2155 queue->raidPtr->numNewFailures++;
2156 } else { /* Disk is already dead... */
2157 /* printf("Disk already marked as dead!\n"); */
2158 }
2159
2160 }
2161
2162 /* Fill in the error value */
2163 req->error = bp->b_error;
2164
2165 /* Drop this one on the "finished" queue... */
2166 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2167
2168 /* Let the raidio thread know there is work to be done. */
2169 rf_signal_cond2(queue->raidPtr->iodone_cv);
2170
2171 rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2172 }
2173
2174
2175 /*
2176 * initialize a buf structure for doing an I/O in the kernel.
2177 */
2178 static void
2179 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2180 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2181 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector)
2182 {
2183 bp->b_flags = rw_flag | (bp->b_flags & rf_b_pass);
2184 bp->b_oflags = 0;
2185 bp->b_cflags = 0;
2186 bp->b_bcount = numSect << logBytesPerSector;
2187 bp->b_bufsize = bp->b_bcount;
2188 bp->b_error = 0;
2189 bp->b_dev = dev;
2190 bp->b_data = bf;
2191 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2192 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2193 if (bp->b_bcount == 0) {
2194 panic("bp->b_bcount is zero in InitBP!!");
2195 }
2196 bp->b_iodone = cbFunc;
2197 bp->b_private = cbArg;
2198 }
2199
2200 /*
2201 * Wait interruptibly for an exclusive lock.
2202 *
2203 * XXX
2204 * Several drivers do this; it should be abstracted and made MP-safe.
2205 * (Hmm... where have we seen this warning before :-> GO )
2206 */
2207 static int
2208 raidlock(struct raid_softc *rs)
2209 {
2210 int error;
2211
2212 error = 0;
2213 mutex_enter(&rs->sc_mutex);
2214 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2215 rs->sc_flags |= RAIDF_WANTED;
2216 error = cv_wait_sig(&rs->sc_cv, &rs->sc_mutex);
2217 if (error != 0)
2218 goto done;
2219 }
2220 rs->sc_flags |= RAIDF_LOCKED;
2221 done:
2222 mutex_exit(&rs->sc_mutex);
2223 return error;
2224 }
2225 /*
2226 * Unlock and wake up any waiters.
2227 */
2228 static void
2229 raidunlock(struct raid_softc *rs)
2230 {
2231
2232 mutex_enter(&rs->sc_mutex);
2233 rs->sc_flags &= ~RAIDF_LOCKED;
2234 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2235 rs->sc_flags &= ~RAIDF_WANTED;
2236 cv_broadcast(&rs->sc_cv);
2237 }
2238 mutex_exit(&rs->sc_mutex);
2239 }
2240
2241
2242 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2243 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2244 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2245
2246 static daddr_t
2247 rf_component_info_offset(void)
2248 {
2249
2250 return RF_COMPONENT_INFO_OFFSET;
2251 }
2252
2253 static daddr_t
2254 rf_component_info_size(unsigned secsize)
2255 {
2256 daddr_t info_size;
2257
2258 KASSERT(secsize);
2259 if (secsize > RF_COMPONENT_INFO_SIZE)
2260 info_size = secsize;
2261 else
2262 info_size = RF_COMPONENT_INFO_SIZE;
2263
2264 return info_size;
2265 }
2266
2267 static daddr_t
2268 rf_parity_map_offset(RF_Raid_t *raidPtr)
2269 {
2270 daddr_t map_offset;
2271
2272 KASSERT(raidPtr->bytesPerSector);
2273 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2274 map_offset = raidPtr->bytesPerSector;
2275 else
2276 map_offset = RF_COMPONENT_INFO_SIZE;
2277 map_offset += rf_component_info_offset();
2278
2279 return map_offset;
2280 }
2281
2282 static daddr_t
2283 rf_parity_map_size(RF_Raid_t *raidPtr)
2284 {
2285 daddr_t map_size;
2286
2287 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2288 map_size = raidPtr->bytesPerSector;
2289 else
2290 map_size = RF_PARITY_MAP_SIZE;
2291
2292 return map_size;
2293 }
2294
2295 int
2296 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2297 {
2298 RF_ComponentLabel_t *clabel;
2299
2300 clabel = raidget_component_label(raidPtr, col);
2301 clabel->clean = RF_RAID_CLEAN;
2302 raidflush_component_label(raidPtr, col);
2303 return(0);
2304 }
2305
2306
2307 int
2308 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2309 {
2310 RF_ComponentLabel_t *clabel;
2311
2312 clabel = raidget_component_label(raidPtr, col);
2313 clabel->clean = RF_RAID_DIRTY;
2314 raidflush_component_label(raidPtr, col);
2315 return(0);
2316 }
2317
2318 int
2319 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2320 {
2321 KASSERT(raidPtr->bytesPerSector);
2322
2323 return raidread_component_label(raidPtr->bytesPerSector,
2324 raidPtr->Disks[col].dev,
2325 raidPtr->raid_cinfo[col].ci_vp,
2326 &raidPtr->raid_cinfo[col].ci_label);
2327 }
2328
2329 RF_ComponentLabel_t *
2330 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2331 {
2332 return &raidPtr->raid_cinfo[col].ci_label;
2333 }
2334
2335 int
2336 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2337 {
2338 RF_ComponentLabel_t *label;
2339
2340 label = &raidPtr->raid_cinfo[col].ci_label;
2341 label->mod_counter = raidPtr->mod_counter;
2342 #ifndef RF_NO_PARITY_MAP
2343 label->parity_map_modcount = label->mod_counter;
2344 #endif
2345 return raidwrite_component_label(raidPtr->bytesPerSector,
2346 raidPtr->Disks[col].dev,
2347 raidPtr->raid_cinfo[col].ci_vp, label);
2348 }
2349
2350 /*
2351 * Swap the label endianness.
2352 *
2353 * Everything in the component label is 4-byte-swapped except the version,
2354 * which is kept in the byte-swapped version at all times, and indicates
2355 * for the writer that a swap is necessary.
2356 *
2357 * For reads it is expected that out_label == clabel, but writes expect
2358 * separate labels so only the re-swapped label is written out to disk,
2359 * leaving the swapped-except-version internally.
2360 *
2361 * Only support swapping label version 2.
2362 */
2363 static void
2364 rf_swap_label(RF_ComponentLabel_t *clabel, RF_ComponentLabel_t *out_label)
2365 {
2366 int *in, *out, *in_last;
2367
2368 KASSERT(clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION));
2369
2370 /* Don't swap the label, but do copy it. */
2371 out_label->version = clabel->version;
2372
2373 in = &clabel->serial_number;
2374 in_last = &clabel->future_use2[42];
2375 out = &out_label->serial_number;
2376
2377 for (; in < in_last; in++, out++)
2378 *out = bswap32(*in);
2379 }
2380
2381 static int
2382 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2383 RF_ComponentLabel_t *clabel)
2384 {
2385 int error;
2386
2387 error = raidread_component_area(dev, b_vp, clabel,
2388 sizeof(RF_ComponentLabel_t),
2389 rf_component_info_offset(),
2390 rf_component_info_size(secsize));
2391
2392 if (error == 0 &&
2393 clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) {
2394 rf_swap_label(clabel, clabel);
2395 }
2396
2397 return error;
2398 }
2399
2400 /* ARGSUSED */
2401 static int
2402 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2403 size_t msize, daddr_t offset, daddr_t dsize)
2404 {
2405 struct buf *bp;
2406 int error;
2407
2408 /* XXX should probably ensure that we don't try to do this if
2409 someone has changed rf_protected_sectors. */
2410
2411 if (b_vp == NULL) {
2412 /* For whatever reason, this component is not valid.
2413 Don't try to read a component label from it. */
2414 return(EINVAL);
2415 }
2416
2417 /* get a block of the appropriate size... */
2418 bp = geteblk((int)dsize);
2419 bp->b_dev = dev;
2420
2421 /* get our ducks in a row for the read */
2422 bp->b_blkno = offset / DEV_BSIZE;
2423 bp->b_bcount = dsize;
2424 bp->b_flags |= B_READ;
2425 bp->b_resid = dsize;
2426
2427 bdev_strategy(bp);
2428 error = biowait(bp);
2429
2430 if (!error) {
2431 memcpy(data, bp->b_data, msize);
2432 }
2433
2434 brelse(bp, 0);
2435 return(error);
2436 }
2437
2438 static int
2439 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2440 RF_ComponentLabel_t *clabel)
2441 {
2442 RF_ComponentLabel_t *clabel_write = clabel;
2443 RF_ComponentLabel_t lclabel;
2444 int error;
2445
2446 if (clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) {
2447 clabel_write = &lclabel;
2448 rf_swap_label(clabel, clabel_write);
2449 }
2450 error = raidwrite_component_area(dev, b_vp, clabel_write,
2451 sizeof(RF_ComponentLabel_t),
2452 rf_component_info_offset(),
2453 rf_component_info_size(secsize));
2454
2455 return error;
2456 }
2457
2458 /* ARGSUSED */
2459 static int
2460 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2461 size_t msize, daddr_t offset, daddr_t dsize)
2462 {
2463 struct buf *bp;
2464 int error;
2465
2466 /* get a block of the appropriate size... */
2467 bp = geteblk((int)dsize);
2468 bp->b_dev = dev;
2469
2470 /* get our ducks in a row for the write */
2471 bp->b_blkno = offset / DEV_BSIZE;
2472 bp->b_bcount = dsize;
2473 bp->b_flags |= B_WRITE;
2474 bp->b_resid = dsize;
2475
2476 memset(bp->b_data, 0, dsize);
2477 memcpy(bp->b_data, data, msize);
2478
2479 bdev_strategy(bp);
2480 error = biowait(bp);
2481 brelse(bp, 0);
2482 if (error) {
2483 #if 1
2484 printf("Failed to write RAID component info!\n");
2485 #endif
2486 }
2487
2488 return(error);
2489 }
2490
2491 void
2492 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2493 {
2494 int c;
2495
2496 for (c = 0; c < raidPtr->numCol; c++) {
2497 /* Skip dead disks. */
2498 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2499 continue;
2500 /* XXXjld: what if an error occurs here? */
2501 raidwrite_component_area(raidPtr->Disks[c].dev,
2502 raidPtr->raid_cinfo[c].ci_vp, map,
2503 RF_PARITYMAP_NBYTE,
2504 rf_parity_map_offset(raidPtr),
2505 rf_parity_map_size(raidPtr));
2506 }
2507 }
2508
2509 void
2510 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2511 {
2512 struct rf_paritymap_ondisk tmp;
2513 int c,first;
2514
2515 first=1;
2516 for (c = 0; c < raidPtr->numCol; c++) {
2517 /* Skip dead disks. */
2518 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2519 continue;
2520 raidread_component_area(raidPtr->Disks[c].dev,
2521 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2522 RF_PARITYMAP_NBYTE,
2523 rf_parity_map_offset(raidPtr),
2524 rf_parity_map_size(raidPtr));
2525 if (first) {
2526 memcpy(map, &tmp, sizeof(*map));
2527 first = 0;
2528 } else {
2529 rf_paritymap_merge(map, &tmp);
2530 }
2531 }
2532 }
2533
2534 void
2535 rf_markalldirty(RF_Raid_t *raidPtr)
2536 {
2537 RF_ComponentLabel_t *clabel;
2538 int sparecol;
2539 int c;
2540 int j;
2541 int scol = -1;
2542
2543 raidPtr->mod_counter++;
2544 for (c = 0; c < raidPtr->numCol; c++) {
2545 /* we don't want to touch (at all) a disk that has
2546 failed */
2547 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2548 clabel = raidget_component_label(raidPtr, c);
2549 if (clabel->status == rf_ds_spared) {
2550 /* XXX do something special...
2551 but whatever you do, don't
2552 try to access it!! */
2553 } else {
2554 raidmarkdirty(raidPtr, c);
2555 }
2556 }
2557 }
2558
2559 for( c = 0; c < raidPtr->numSpare ; c++) {
2560 sparecol = raidPtr->numCol + c;
2561 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2562 /*
2563
2564 we claim this disk is "optimal" if it's
2565 rf_ds_used_spare, as that means it should be
2566 directly substitutable for the disk it replaced.
2567 We note that too...
2568
2569 */
2570
2571 for(j=0;j<raidPtr->numCol;j++) {
2572 if (raidPtr->Disks[j].spareCol == sparecol) {
2573 scol = j;
2574 break;
2575 }
2576 }
2577
2578 clabel = raidget_component_label(raidPtr, sparecol);
2579 /* make sure status is noted */
2580
2581 raid_init_component_label(raidPtr, clabel);
2582
2583 clabel->row = 0;
2584 clabel->column = scol;
2585 /* Note: we *don't* change status from rf_ds_used_spare
2586 to rf_ds_optimal */
2587 /* clabel.status = rf_ds_optimal; */
2588
2589 raidmarkdirty(raidPtr, sparecol);
2590 }
2591 }
2592 }
2593
2594
2595 void
2596 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2597 {
2598 RF_ComponentLabel_t *clabel;
2599 int sparecol;
2600 int c;
2601 int j;
2602 int scol;
2603 struct raid_softc *rs = raidPtr->softc;
2604
2605 scol = -1;
2606
2607 /* XXX should do extra checks to make sure things really are clean,
2608 rather than blindly setting the clean bit... */
2609
2610 raidPtr->mod_counter++;
2611
2612 for (c = 0; c < raidPtr->numCol; c++) {
2613 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2614 clabel = raidget_component_label(raidPtr, c);
2615 /* make sure status is noted */
2616 clabel->status = rf_ds_optimal;
2617
2618 /* note what unit we are configured as */
2619 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2620 clabel->last_unit = raidPtr->raidid;
2621
2622 raidflush_component_label(raidPtr, c);
2623 if (final == RF_FINAL_COMPONENT_UPDATE) {
2624 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2625 raidmarkclean(raidPtr, c);
2626 }
2627 }
2628 }
2629 /* else we don't touch it.. */
2630 }
2631
2632 for( c = 0; c < raidPtr->numSpare ; c++) {
2633 sparecol = raidPtr->numCol + c;
2634 /* Need to ensure that the reconstruct actually completed! */
2635 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2636 /*
2637
2638 we claim this disk is "optimal" if it's
2639 rf_ds_used_spare, as that means it should be
2640 directly substitutable for the disk it replaced.
2641 We note that too...
2642
2643 */
2644
2645 for(j=0;j<raidPtr->numCol;j++) {
2646 if (raidPtr->Disks[j].spareCol == sparecol) {
2647 scol = j;
2648 break;
2649 }
2650 }
2651
2652 /* XXX shouldn't *really* need this... */
2653 clabel = raidget_component_label(raidPtr, sparecol);
2654 /* make sure status is noted */
2655
2656 raid_init_component_label(raidPtr, clabel);
2657
2658 clabel->column = scol;
2659 clabel->status = rf_ds_optimal;
2660 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2661 clabel->last_unit = raidPtr->raidid;
2662
2663 raidflush_component_label(raidPtr, sparecol);
2664 if (final == RF_FINAL_COMPONENT_UPDATE) {
2665 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2666 raidmarkclean(raidPtr, sparecol);
2667 }
2668 }
2669 }
2670 }
2671 }
2672
2673 void
2674 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2675 {
2676
2677 if (vp != NULL) {
2678 if (auto_configured == 1) {
2679 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2680 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2681 vput(vp);
2682
2683 } else {
2684 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2685 }
2686 }
2687 }
2688
2689
2690 void
2691 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2692 {
2693 int r,c;
2694 struct vnode *vp;
2695 int acd;
2696
2697
2698 /* We take this opportunity to close the vnodes like we should.. */
2699
2700 for (c = 0; c < raidPtr->numCol; c++) {
2701 vp = raidPtr->raid_cinfo[c].ci_vp;
2702 acd = raidPtr->Disks[c].auto_configured;
2703 rf_close_component(raidPtr, vp, acd);
2704 raidPtr->raid_cinfo[c].ci_vp = NULL;
2705 raidPtr->Disks[c].auto_configured = 0;
2706 }
2707
2708 for (r = 0; r < raidPtr->numSpare; r++) {
2709 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2710 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2711 rf_close_component(raidPtr, vp, acd);
2712 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2713 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2714 }
2715 }
2716
2717
2718 static void
2719 rf_ReconThread(struct rf_recon_req_internal *req)
2720 {
2721 int s;
2722 RF_Raid_t *raidPtr;
2723
2724 s = splbio();
2725 raidPtr = (RF_Raid_t *) req->raidPtr;
2726 raidPtr->recon_in_progress = 1;
2727
2728 if (req->flags & RF_FDFLAGS_RECON_FORCE) {
2729 raidPtr->forceRecon = 1;
2730 }
2731
2732 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2733 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2734
2735 if (req->flags & RF_FDFLAGS_RECON_FORCE) {
2736 raidPtr->forceRecon = 0;
2737 }
2738
2739 RF_Free(req, sizeof(*req));
2740
2741 raidPtr->recon_in_progress = 0;
2742 splx(s);
2743
2744 /* That's all... */
2745 kthread_exit(0); /* does not return */
2746 }
2747
2748 static void
2749 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2750 {
2751 int retcode;
2752 int s;
2753
2754 raidPtr->parity_rewrite_stripes_done = 0;
2755 raidPtr->parity_rewrite_in_progress = 1;
2756 s = splbio();
2757 retcode = rf_RewriteParity(raidPtr);
2758 splx(s);
2759 if (retcode) {
2760 printf("raid%d: Error re-writing parity (%d)!\n",
2761 raidPtr->raidid, retcode);
2762 } else {
2763 /* set the clean bit! If we shutdown correctly,
2764 the clean bit on each component label will get
2765 set */
2766 raidPtr->parity_good = RF_RAID_CLEAN;
2767 }
2768 raidPtr->parity_rewrite_in_progress = 0;
2769
2770 /* Anyone waiting for us to stop? If so, inform them... */
2771 if (raidPtr->waitShutdown) {
2772 rf_lock_mutex2(raidPtr->rad_lock);
2773 cv_broadcast(&raidPtr->parity_rewrite_cv);
2774 rf_unlock_mutex2(raidPtr->rad_lock);
2775 }
2776
2777 /* That's all... */
2778 kthread_exit(0); /* does not return */
2779 }
2780
2781
2782 static void
2783 rf_CopybackThread(RF_Raid_t *raidPtr)
2784 {
2785 int s;
2786
2787 raidPtr->copyback_in_progress = 1;
2788 s = splbio();
2789 rf_CopybackReconstructedData(raidPtr);
2790 splx(s);
2791 raidPtr->copyback_in_progress = 0;
2792
2793 /* That's all... */
2794 kthread_exit(0); /* does not return */
2795 }
2796
2797
2798 static void
2799 rf_ReconstructInPlaceThread(struct rf_recon_req_internal *req)
2800 {
2801 int s;
2802 RF_Raid_t *raidPtr;
2803
2804 s = splbio();
2805 raidPtr = req->raidPtr;
2806 raidPtr->recon_in_progress = 1;
2807
2808 if (req->flags & RF_FDFLAGS_RECON_FORCE) {
2809 raidPtr->forceRecon = 1;
2810 }
2811
2812 rf_ReconstructInPlace(raidPtr, req->col);
2813
2814 if (req->flags & RF_FDFLAGS_RECON_FORCE) {
2815 raidPtr->forceRecon = 0;
2816 }
2817
2818 RF_Free(req, sizeof(*req));
2819 raidPtr->recon_in_progress = 0;
2820 splx(s);
2821
2822 /* That's all... */
2823 kthread_exit(0); /* does not return */
2824 }
2825
2826 static RF_AutoConfig_t *
2827 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
2828 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
2829 unsigned secsize)
2830 {
2831 int good_one = 0;
2832 RF_ComponentLabel_t *clabel;
2833 RF_AutoConfig_t *ac;
2834
2835 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_WAITOK);
2836
2837 if (!raidread_component_label(secsize, dev, vp, clabel)) {
2838 /* Got the label. Does it look reasonable? */
2839 if (rf_reasonable_label(clabel, numsecs) &&
2840 (rf_component_label_partitionsize(clabel) <= size)) {
2841 #ifdef DEBUG
2842 printf("Component on: %s: %llu\n",
2843 cname, (unsigned long long)size);
2844 rf_print_component_label(clabel);
2845 #endif
2846 /* if it's reasonable, add it, else ignore it. */
2847 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
2848 M_WAITOK);
2849 strlcpy(ac->devname, cname, sizeof(ac->devname));
2850 ac->dev = dev;
2851 ac->vp = vp;
2852 ac->clabel = clabel;
2853 ac->next = ac_list;
2854 ac_list = ac;
2855 good_one = 1;
2856 }
2857 }
2858 if (!good_one) {
2859 /* cleanup */
2860 free(clabel, M_RAIDFRAME);
2861 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2862 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2863 vput(vp);
2864 }
2865 return ac_list;
2866 }
2867
2868 static RF_AutoConfig_t *
2869 rf_find_raid_components(void)
2870 {
2871 struct vnode *vp;
2872 struct disklabel label;
2873 device_t dv;
2874 deviter_t di;
2875 dev_t dev;
2876 int bmajor, bminor, wedge, rf_part_found;
2877 int error;
2878 int i;
2879 RF_AutoConfig_t *ac_list;
2880 uint64_t numsecs;
2881 unsigned secsize;
2882 int dowedges;
2883
2884 /* initialize the AutoConfig list */
2885 ac_list = NULL;
2886
2887 /*
2888 * we begin by trolling through *all* the devices on the system *twice*
2889 * first we scan for wedges, second for other devices. This avoids
2890 * using a raw partition instead of a wedge that covers the whole disk
2891 */
2892
2893 for (dowedges=1; dowedges>=0; --dowedges) {
2894 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
2895 dv = deviter_next(&di)) {
2896
2897 /* we are only interested in disks */
2898 if (device_class(dv) != DV_DISK)
2899 continue;
2900
2901 /* we don't care about floppies */
2902 if (device_is_a(dv, "fd")) {
2903 continue;
2904 }
2905
2906 /* we don't care about CDs. */
2907 if (device_is_a(dv, "cd")) {
2908 continue;
2909 }
2910
2911 /* we don't care about md. */
2912 if (device_is_a(dv, "md")) {
2913 continue;
2914 }
2915
2916 /* hdfd is the Atari/Hades floppy driver */
2917 if (device_is_a(dv, "hdfd")) {
2918 continue;
2919 }
2920
2921 /* fdisa is the Atari/Milan floppy driver */
2922 if (device_is_a(dv, "fdisa")) {
2923 continue;
2924 }
2925
2926 /* we don't care about spiflash */
2927 if (device_is_a(dv, "spiflash")) {
2928 continue;
2929 }
2930
2931 /* are we in the wedges pass ? */
2932 wedge = device_is_a(dv, "dk");
2933 if (wedge != dowedges) {
2934 continue;
2935 }
2936
2937 /* need to find the device_name_to_block_device_major stuff */
2938 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
2939
2940 rf_part_found = 0; /*No raid partition as yet*/
2941
2942 /* get a vnode for the raw partition of this disk */
2943 bminor = minor(device_unit(dv));
2944 dev = wedge ? makedev(bmajor, bminor) :
2945 MAKEDISKDEV(bmajor, bminor, RAW_PART);
2946 if (bdevvp(dev, &vp))
2947 panic("RAID can't alloc vnode");
2948
2949 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2950 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
2951
2952 if (error) {
2953 /* "Who cares." Continue looking
2954 for something that exists*/
2955 vput(vp);
2956 continue;
2957 }
2958
2959 VOP_UNLOCK(vp);
2960 error = getdisksize(vp, &numsecs, &secsize);
2961 if (error) {
2962 /*
2963 * Pseudo devices like vnd and cgd can be
2964 * opened but may still need some configuration.
2965 * Ignore these quietly.
2966 */
2967 if (error != ENXIO)
2968 printf("RAIDframe: can't get disk size"
2969 " for dev %s (%d)\n",
2970 device_xname(dv), error);
2971 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2972 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2973 vput(vp);
2974 continue;
2975 }
2976 if (wedge) {
2977 struct dkwedge_info dkw;
2978 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
2979 NOCRED);
2980 if (error) {
2981 printf("RAIDframe: can't get wedge info for "
2982 "dev %s (%d)\n", device_xname(dv), error);
2983 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2984 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2985 vput(vp);
2986 continue;
2987 }
2988
2989 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
2990 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2991 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2992 vput(vp);
2993 continue;
2994 }
2995
2996 ac_list = rf_get_component(ac_list, dev, vp,
2997 device_xname(dv), dkw.dkw_size, numsecs, secsize);
2998 rf_part_found = 1; /*There is a raid component on this disk*/
2999 continue;
3000 }
3001
3002 /* Ok, the disk exists. Go get the disklabel. */
3003 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
3004 if (error) {
3005 /*
3006 * XXX can't happen - open() would
3007 * have errored out (or faked up one)
3008 */
3009 if (error != ENOTTY)
3010 printf("RAIDframe: can't get label for dev "
3011 "%s (%d)\n", device_xname(dv), error);
3012 }
3013
3014 /* don't need this any more. We'll allocate it again
3015 a little later if we really do... */
3016 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3017 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3018 vput(vp);
3019
3020 if (error)
3021 continue;
3022
3023 rf_part_found = 0; /*No raid partitions yet*/
3024 for (i = 0; i < label.d_npartitions; i++) {
3025 char cname[sizeof(ac_list->devname)];
3026
3027 /* We only support partitions marked as RAID */
3028 if (label.d_partitions[i].p_fstype != FS_RAID)
3029 continue;
3030
3031 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
3032 if (bdevvp(dev, &vp))
3033 panic("RAID can't alloc vnode");
3034
3035 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3036 error = VOP_OPEN(vp, FREAD, NOCRED);
3037 if (error) {
3038 /* Not quite a 'whatever'. In
3039 * this situation we know
3040 * there is a FS_RAID
3041 * partition, but we can't
3042 * open it. The most likely
3043 * reason is that the
3044 * partition is already in
3045 * use by another RAID set.
3046 * So note that we've already
3047 * found a partition on this
3048 * disk so we don't attempt
3049 * to use the raw disk later. */
3050 rf_part_found = 1;
3051 vput(vp);
3052 continue;
3053 }
3054 VOP_UNLOCK(vp);
3055 snprintf(cname, sizeof(cname), "%s%c",
3056 device_xname(dv), 'a' + i);
3057 ac_list = rf_get_component(ac_list, dev, vp, cname,
3058 label.d_partitions[i].p_size, numsecs, secsize);
3059 rf_part_found = 1; /*There is at least one raid partition on this disk*/
3060 }
3061
3062 /*
3063 *If there is no raid component on this disk, either in a
3064 *disklabel or inside a wedge, check the raw partition as well,
3065 *as it is possible to configure raid components on raw disk
3066 *devices.
3067 */
3068
3069 if (!rf_part_found) {
3070 char cname[sizeof(ac_list->devname)];
3071
3072 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
3073 if (bdevvp(dev, &vp))
3074 panic("RAID can't alloc vnode");
3075
3076 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3077
3078 error = VOP_OPEN(vp, FREAD, NOCRED);
3079 if (error) {
3080 /* Whatever... */
3081 vput(vp);
3082 continue;
3083 }
3084 VOP_UNLOCK(vp);
3085 snprintf(cname, sizeof(cname), "%s%c",
3086 device_xname(dv), 'a' + RAW_PART);
3087 ac_list = rf_get_component(ac_list, dev, vp, cname,
3088 label.d_partitions[RAW_PART].p_size, numsecs, secsize);
3089 }
3090 }
3091 deviter_release(&di);
3092 }
3093 return ac_list;
3094 }
3095
3096 int
3097 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3098 {
3099
3100 if ((clabel->version==RF_COMPONENT_LABEL_VERSION_1 ||
3101 clabel->version==RF_COMPONENT_LABEL_VERSION ||
3102 clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) &&
3103 (clabel->clean == RF_RAID_CLEAN ||
3104 clabel->clean == RF_RAID_DIRTY) &&
3105 clabel->row >=0 &&
3106 clabel->column >= 0 &&
3107 clabel->num_rows > 0 &&
3108 clabel->num_columns > 0 &&
3109 clabel->row < clabel->num_rows &&
3110 clabel->column < clabel->num_columns &&
3111 clabel->blockSize > 0 &&
3112 /*
3113 * numBlocksHi may contain garbage, but it is ok since
3114 * the type is unsigned. If it is really garbage,
3115 * rf_fix_old_label_size() will fix it.
3116 */
3117 rf_component_label_numblocks(clabel) > 0) {
3118 /*
3119 * label looks reasonable enough...
3120 * let's make sure it has no old garbage.
3121 */
3122 if (numsecs)
3123 rf_fix_old_label_size(clabel, numsecs);
3124 return(1);
3125 }
3126 return(0);
3127 }
3128
3129
3130 /*
3131 * For reasons yet unknown, some old component labels have garbage in
3132 * the newer numBlocksHi region, and this causes lossage. Since those
3133 * disks will also have numsecs set to less than 32 bits of sectors,
3134 * we can determine when this corruption has occurred, and fix it.
3135 *
3136 * The exact same problem, with the same unknown reason, happens to
3137 * the partitionSizeHi member as well.
3138 */
3139 static void
3140 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3141 {
3142
3143 if (numsecs < ((uint64_t)1 << 32)) {
3144 if (clabel->numBlocksHi) {
3145 printf("WARNING: total sectors < 32 bits, yet "
3146 "numBlocksHi set\n"
3147 "WARNING: resetting numBlocksHi to zero.\n");
3148 clabel->numBlocksHi = 0;
3149 }
3150
3151 if (clabel->partitionSizeHi) {
3152 printf("WARNING: total sectors < 32 bits, yet "
3153 "partitionSizeHi set\n"
3154 "WARNING: resetting partitionSizeHi to zero.\n");
3155 clabel->partitionSizeHi = 0;
3156 }
3157 }
3158 }
3159
3160
3161 #ifdef DEBUG
3162 void
3163 rf_print_component_label(RF_ComponentLabel_t *clabel)
3164 {
3165 uint64_t numBlocks;
3166 static const char *rp[] = {
3167 "No", "Force", "Soft", "*invalid*"
3168 };
3169
3170
3171 numBlocks = rf_component_label_numblocks(clabel);
3172
3173 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3174 clabel->row, clabel->column,
3175 clabel->num_rows, clabel->num_columns);
3176 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3177 clabel->version, clabel->serial_number,
3178 clabel->mod_counter);
3179 printf(" Clean: %s Status: %d\n",
3180 clabel->clean ? "Yes" : "No", clabel->status);
3181 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3182 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3183 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3184 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3185 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3186 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]);
3187 printf(" Last configured as: raid%d\n", clabel->last_unit);
3188 #if 0
3189 printf(" Config order: %d\n", clabel->config_order);
3190 #endif
3191
3192 }
3193 #endif
3194
3195 static RF_ConfigSet_t *
3196 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3197 {
3198 RF_AutoConfig_t *ac;
3199 RF_ConfigSet_t *config_sets;
3200 RF_ConfigSet_t *cset;
3201 RF_AutoConfig_t *ac_next;
3202
3203
3204 config_sets = NULL;
3205
3206 /* Go through the AutoConfig list, and figure out which components
3207 belong to what sets. */
3208 ac = ac_list;
3209 while(ac!=NULL) {
3210 /* we're going to putz with ac->next, so save it here
3211 for use at the end of the loop */
3212 ac_next = ac->next;
3213
3214 if (config_sets == NULL) {
3215 /* will need at least this one... */
3216 config_sets = malloc(sizeof(RF_ConfigSet_t),
3217 M_RAIDFRAME, M_WAITOK);
3218 /* this one is easy :) */
3219 config_sets->ac = ac;
3220 config_sets->next = NULL;
3221 config_sets->rootable = 0;
3222 ac->next = NULL;
3223 } else {
3224 /* which set does this component fit into? */
3225 cset = config_sets;
3226 while(cset!=NULL) {
3227 if (rf_does_it_fit(cset, ac)) {
3228 /* looks like it matches... */
3229 ac->next = cset->ac;
3230 cset->ac = ac;
3231 break;
3232 }
3233 cset = cset->next;
3234 }
3235 if (cset==NULL) {
3236 /* didn't find a match above... new set..*/
3237 cset = malloc(sizeof(RF_ConfigSet_t),
3238 M_RAIDFRAME, M_WAITOK);
3239 cset->ac = ac;
3240 ac->next = NULL;
3241 cset->next = config_sets;
3242 cset->rootable = 0;
3243 config_sets = cset;
3244 }
3245 }
3246 ac = ac_next;
3247 }
3248
3249
3250 return(config_sets);
3251 }
3252
3253 static int
3254 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3255 {
3256 RF_ComponentLabel_t *clabel1, *clabel2;
3257
3258 /* If this one matches the *first* one in the set, that's good
3259 enough, since the other members of the set would have been
3260 through here too... */
3261 /* note that we are not checking partitionSize here..
3262
3263 Note that we are also not checking the mod_counters here.
3264 If everything else matches except the mod_counter, that's
3265 good enough for this test. We will deal with the mod_counters
3266 a little later in the autoconfiguration process.
3267
3268 (clabel1->mod_counter == clabel2->mod_counter) &&
3269
3270 The reason we don't check for this is that failed disks
3271 will have lower modification counts. If those disks are
3272 not added to the set they used to belong to, then they will
3273 form their own set, which may result in 2 different sets,
3274 for example, competing to be configured at raid0, and
3275 perhaps competing to be the root filesystem set. If the
3276 wrong ones get configured, or both attempt to become /,
3277 weird behaviour and or serious lossage will occur. Thus we
3278 need to bring them into the fold here, and kick them out at
3279 a later point.
3280
3281 */
3282
3283 clabel1 = cset->ac->clabel;
3284 clabel2 = ac->clabel;
3285 if ((clabel1->version == clabel2->version) &&
3286 (clabel1->serial_number == clabel2->serial_number) &&
3287 (clabel1->num_rows == clabel2->num_rows) &&
3288 (clabel1->num_columns == clabel2->num_columns) &&
3289 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3290 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3291 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3292 (clabel1->parityConfig == clabel2->parityConfig) &&
3293 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3294 (clabel1->blockSize == clabel2->blockSize) &&
3295 rf_component_label_numblocks(clabel1) ==
3296 rf_component_label_numblocks(clabel2) &&
3297 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3298 (clabel1->root_partition == clabel2->root_partition) &&
3299 (clabel1->last_unit == clabel2->last_unit) &&
3300 (clabel1->config_order == clabel2->config_order)) {
3301 /* if it get's here, it almost *has* to be a match */
3302 } else {
3303 /* it's not consistent with somebody in the set..
3304 punt */
3305 return(0);
3306 }
3307 /* all was fine.. it must fit... */
3308 return(1);
3309 }
3310
3311 static int
3312 rf_have_enough_components(RF_ConfigSet_t *cset)
3313 {
3314 RF_AutoConfig_t *ac;
3315 RF_AutoConfig_t *auto_config;
3316 RF_ComponentLabel_t *clabel;
3317 int c;
3318 int num_cols;
3319 int num_missing;
3320 int mod_counter;
3321 int mod_counter_found;
3322 int even_pair_failed;
3323 char parity_type;
3324
3325
3326 /* check to see that we have enough 'live' components
3327 of this set. If so, we can configure it if necessary */
3328
3329 num_cols = cset->ac->clabel->num_columns;
3330 parity_type = cset->ac->clabel->parityConfig;
3331
3332 /* XXX Check for duplicate components!?!?!? */
3333
3334 /* Determine what the mod_counter is supposed to be for this set. */
3335
3336 mod_counter_found = 0;
3337 mod_counter = 0;
3338 ac = cset->ac;
3339 while(ac!=NULL) {
3340 if (mod_counter_found==0) {
3341 mod_counter = ac->clabel->mod_counter;
3342 mod_counter_found = 1;
3343 } else {
3344 if (ac->clabel->mod_counter > mod_counter) {
3345 mod_counter = ac->clabel->mod_counter;
3346 }
3347 }
3348 ac = ac->next;
3349 }
3350
3351 num_missing = 0;
3352 auto_config = cset->ac;
3353
3354 even_pair_failed = 0;
3355 for(c=0; c<num_cols; c++) {
3356 ac = auto_config;
3357 while(ac!=NULL) {
3358 if ((ac->clabel->column == c) &&
3359 (ac->clabel->mod_counter == mod_counter)) {
3360 /* it's this one... */
3361 #ifdef DEBUG
3362 printf("Found: %s at %d\n",
3363 ac->devname,c);
3364 #endif
3365 break;
3366 }
3367 ac=ac->next;
3368 }
3369 if (ac==NULL) {
3370 /* Didn't find one here! */
3371 /* special case for RAID 1, especially
3372 where there are more than 2
3373 components (where RAIDframe treats
3374 things a little differently :( ) */
3375 if (parity_type == '1') {
3376 if (c%2 == 0) { /* even component */
3377 even_pair_failed = 1;
3378 } else { /* odd component. If
3379 we're failed, and
3380 so is the even
3381 component, it's
3382 "Good Night, Charlie" */
3383 if (even_pair_failed == 1) {
3384 return(0);
3385 }
3386 }
3387 } else {
3388 /* normal accounting */
3389 num_missing++;
3390 }
3391 }
3392 if ((parity_type == '1') && (c%2 == 1)) {
3393 /* Just did an even component, and we didn't
3394 bail.. reset the even_pair_failed flag,
3395 and go on to the next component.... */
3396 even_pair_failed = 0;
3397 }
3398 }
3399
3400 clabel = cset->ac->clabel;
3401
3402 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3403 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3404 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3405 /* XXX this needs to be made *much* more general */
3406 /* Too many failures */
3407 return(0);
3408 }
3409 /* otherwise, all is well, and we've got enough to take a kick
3410 at autoconfiguring this set */
3411 return(1);
3412 }
3413
3414 static void
3415 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3416 RF_Raid_t *raidPtr)
3417 {
3418 RF_ComponentLabel_t *clabel;
3419 int i;
3420
3421 clabel = ac->clabel;
3422
3423 /* 1. Fill in the common stuff */
3424 config->numCol = clabel->num_columns;
3425 config->numSpare = 0; /* XXX should this be set here? */
3426 config->sectPerSU = clabel->sectPerSU;
3427 config->SUsPerPU = clabel->SUsPerPU;
3428 config->SUsPerRU = clabel->SUsPerRU;
3429 config->parityConfig = clabel->parityConfig;
3430 /* XXX... */
3431 strcpy(config->diskQueueType,"fifo");
3432 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3433 config->layoutSpecificSize = 0; /* XXX ?? */
3434
3435 while(ac!=NULL) {
3436 /* row/col values will be in range due to the checks
3437 in reasonable_label() */
3438 strcpy(config->devnames[0][ac->clabel->column],
3439 ac->devname);
3440 ac = ac->next;
3441 }
3442
3443 for(i=0;i<RF_MAXDBGV;i++) {
3444 config->debugVars[i][0] = 0;
3445 }
3446 }
3447
3448 static int
3449 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3450 {
3451 RF_ComponentLabel_t *clabel;
3452 int column;
3453 int sparecol;
3454
3455 raidPtr->autoconfigure = new_value;
3456
3457 for(column=0; column<raidPtr->numCol; column++) {
3458 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3459 clabel = raidget_component_label(raidPtr, column);
3460 clabel->autoconfigure = new_value;
3461 raidflush_component_label(raidPtr, column);
3462 }
3463 }
3464 for(column = 0; column < raidPtr->numSpare ; column++) {
3465 sparecol = raidPtr->numCol + column;
3466 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3467 clabel = raidget_component_label(raidPtr, sparecol);
3468 clabel->autoconfigure = new_value;
3469 raidflush_component_label(raidPtr, sparecol);
3470 }
3471 }
3472 return(new_value);
3473 }
3474
3475 static int
3476 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3477 {
3478 RF_ComponentLabel_t *clabel;
3479 int column;
3480 int sparecol;
3481
3482 raidPtr->root_partition = new_value;
3483 for(column=0; column<raidPtr->numCol; column++) {
3484 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3485 clabel = raidget_component_label(raidPtr, column);
3486 clabel->root_partition = new_value;
3487 raidflush_component_label(raidPtr, column);
3488 }
3489 }
3490 for(column = 0; column < raidPtr->numSpare ; column++) {
3491 sparecol = raidPtr->numCol + column;
3492 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3493 clabel = raidget_component_label(raidPtr, sparecol);
3494 clabel->root_partition = new_value;
3495 raidflush_component_label(raidPtr, sparecol);
3496 }
3497 }
3498 return(new_value);
3499 }
3500
3501 static void
3502 rf_release_all_vps(RF_ConfigSet_t *cset)
3503 {
3504 RF_AutoConfig_t *ac;
3505
3506 ac = cset->ac;
3507 while(ac!=NULL) {
3508 /* Close the vp, and give it back */
3509 if (ac->vp) {
3510 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3511 VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
3512 vput(ac->vp);
3513 ac->vp = NULL;
3514 }
3515 ac = ac->next;
3516 }
3517 }
3518
3519
3520 static void
3521 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3522 {
3523 RF_AutoConfig_t *ac;
3524 RF_AutoConfig_t *next_ac;
3525
3526 ac = cset->ac;
3527 while(ac!=NULL) {
3528 next_ac = ac->next;
3529 /* nuke the label */
3530 free(ac->clabel, M_RAIDFRAME);
3531 /* cleanup the config structure */
3532 free(ac, M_RAIDFRAME);
3533 /* "next.." */
3534 ac = next_ac;
3535 }
3536 /* and, finally, nuke the config set */
3537 free(cset, M_RAIDFRAME);
3538 }
3539
3540
3541 void
3542 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3543 {
3544 /* avoid over-writing byteswapped version. */
3545 if (clabel->version != bswap32(RF_COMPONENT_LABEL_VERSION))
3546 clabel->version = RF_COMPONENT_LABEL_VERSION;
3547 clabel->serial_number = raidPtr->serial_number;
3548 clabel->mod_counter = raidPtr->mod_counter;
3549
3550 clabel->num_rows = 1;
3551 clabel->num_columns = raidPtr->numCol;
3552 clabel->clean = RF_RAID_DIRTY; /* not clean */
3553 clabel->status = rf_ds_optimal; /* "It's good!" */
3554
3555 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3556 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3557 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3558
3559 clabel->blockSize = raidPtr->bytesPerSector;
3560 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3561
3562 /* XXX not portable */
3563 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3564 clabel->maxOutstanding = raidPtr->maxOutstanding;
3565 clabel->autoconfigure = raidPtr->autoconfigure;
3566 clabel->root_partition = raidPtr->root_partition;
3567 clabel->last_unit = raidPtr->raidid;
3568 clabel->config_order = raidPtr->config_order;
3569
3570 #ifndef RF_NO_PARITY_MAP
3571 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3572 #endif
3573 }
3574
3575 static struct raid_softc *
3576 rf_auto_config_set(RF_ConfigSet_t *cset)
3577 {
3578 RF_Raid_t *raidPtr;
3579 RF_Config_t *config;
3580 int raidID;
3581 struct raid_softc *sc;
3582
3583 #ifdef DEBUG
3584 printf("RAID autoconfigure\n");
3585 #endif
3586
3587 /* 1. Create a config structure */
3588 config = malloc(sizeof(*config), M_RAIDFRAME, M_WAITOK|M_ZERO);
3589
3590 /*
3591 2. Figure out what RAID ID this one is supposed to live at
3592 See if we can get the same RAID dev that it was configured
3593 on last time..
3594 */
3595
3596 raidID = cset->ac->clabel->last_unit;
3597 for (sc = raidget(raidID, false); sc && sc->sc_r.valid != 0;
3598 sc = raidget(++raidID, false))
3599 continue;
3600 #ifdef DEBUG
3601 printf("Configuring raid%d:\n",raidID);
3602 #endif
3603
3604 if (sc == NULL)
3605 sc = raidget(raidID, true);
3606 raidPtr = &sc->sc_r;
3607
3608 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3609 raidPtr->softc = sc;
3610 raidPtr->raidid = raidID;
3611 raidPtr->openings = RAIDOUTSTANDING;
3612
3613 /* 3. Build the configuration structure */
3614 rf_create_configuration(cset->ac, config, raidPtr);
3615
3616 /* 4. Do the configuration */
3617 if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3618 raidinit(sc);
3619
3620 rf_markalldirty(raidPtr);
3621 raidPtr->autoconfigure = 1; /* XXX do this here? */
3622 switch (cset->ac->clabel->root_partition) {
3623 case 1: /* Force Root */
3624 case 2: /* Soft Root: root when boot partition part of raid */
3625 /*
3626 * everything configured just fine. Make a note
3627 * that this set is eligible to be root,
3628 * or forced to be root
3629 */
3630 cset->rootable = cset->ac->clabel->root_partition;
3631 /* XXX do this here? */
3632 raidPtr->root_partition = cset->rootable;
3633 break;
3634 default:
3635 break;
3636 }
3637 } else {
3638 raidput(sc);
3639 sc = NULL;
3640 }
3641
3642 /* 5. Cleanup */
3643 free(config, M_RAIDFRAME);
3644 return sc;
3645 }
3646
3647 void
3648 rf_pool_init(RF_Raid_t *raidPtr, char *w_chan, struct pool *p, size_t size, const char *pool_name,
3649 size_t xmin, size_t xmax)
3650 {
3651
3652 /* Format: raid%d_foo */
3653 snprintf(w_chan, RF_MAX_POOLNAMELEN, "raid%d_%s", raidPtr->raidid, pool_name);
3654
3655 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3656 pool_sethiwat(p, xmax);
3657 pool_prime(p, xmin);
3658 }
3659
3660
3661 /*
3662 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buffer queue
3663 * to see if there is IO pending and if that IO could possibly be done
3664 * for a given RAID set. Returns 0 if IO is waiting and can be done, 1
3665 * otherwise.
3666 *
3667 */
3668 int
3669 rf_buf_queue_check(RF_Raid_t *raidPtr)
3670 {
3671 struct raid_softc *rs;
3672 struct dk_softc *dksc;
3673
3674 rs = raidPtr->softc;
3675 dksc = &rs->sc_dksc;
3676
3677 if ((rs->sc_flags & RAIDF_INITED) == 0)
3678 return 1;
3679
3680 if (dk_strategy_pending(dksc) && raidPtr->openings > 0) {
3681 /* there is work to do */
3682 return 0;
3683 }
3684 /* default is nothing to do */
3685 return 1;
3686 }
3687
3688 int
3689 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3690 {
3691 uint64_t numsecs;
3692 unsigned secsize;
3693 int error;
3694
3695 error = getdisksize(vp, &numsecs, &secsize);
3696 if (error == 0) {
3697 diskPtr->blockSize = secsize;
3698 diskPtr->numBlocks = numsecs - rf_protectedSectors;
3699 diskPtr->partitionSize = numsecs;
3700 return 0;
3701 }
3702 return error;
3703 }
3704
3705 static int
3706 raid_match(device_t self, cfdata_t cfdata, void *aux)
3707 {
3708 return 1;
3709 }
3710
3711 static void
3712 raid_attach(device_t parent, device_t self, void *aux)
3713 {
3714 }
3715
3716
3717 static int
3718 raid_detach(device_t self, int flags)
3719 {
3720 int error;
3721 struct raid_softc *rs = raidsoftc(self);
3722
3723 if (rs == NULL)
3724 return ENXIO;
3725
3726 if ((error = raidlock(rs)) != 0)
3727 return error;
3728
3729 error = raid_detach_unlocked(rs);
3730
3731 raidunlock(rs);
3732
3733 /* XXX raid can be referenced here */
3734
3735 if (error)
3736 return error;
3737
3738 /* Free the softc */
3739 raidput(rs);
3740
3741 return 0;
3742 }
3743
3744 static void
3745 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
3746 {
3747 struct dk_softc *dksc = &rs->sc_dksc;
3748 struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
3749
3750 memset(dg, 0, sizeof(*dg));
3751
3752 dg->dg_secperunit = raidPtr->totalSectors;
3753 dg->dg_secsize = raidPtr->bytesPerSector;
3754 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
3755 dg->dg_ntracks = 4 * raidPtr->numCol;
3756
3757 disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, NULL);
3758 }
3759
3760 /*
3761 * Get cache info for all the components (including spares).
3762 * Returns intersection of all the cache flags of all disks, or first
3763 * error if any encountered.
3764 * XXXfua feature flags can change as spares are added - lock down somehow
3765 */
3766 static int
3767 rf_get_component_caches(RF_Raid_t *raidPtr, int *data)
3768 {
3769 int c;
3770 int error;
3771 int dkwhole = 0, dkpart;
3772
3773 for (c = 0; c < raidPtr->numCol + raidPtr->numSpare; c++) {
3774 /*
3775 * Check any non-dead disk, even when currently being
3776 * reconstructed.
3777 */
3778 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)
3779 || raidPtr->Disks[c].status == rf_ds_reconstructing) {
3780 error = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp,
3781 DIOCGCACHE, &dkpart, FREAD, NOCRED);
3782 if (error) {
3783 if (error != ENODEV) {
3784 printf("raid%d: get cache for component %s failed\n",
3785 raidPtr->raidid,
3786 raidPtr->Disks[c].devname);
3787 }
3788
3789 return error;
3790 }
3791
3792 if (c == 0)
3793 dkwhole = dkpart;
3794 else
3795 dkwhole = DKCACHE_COMBINE(dkwhole, dkpart);
3796 }
3797 }
3798
3799 *data = dkwhole;
3800
3801 return 0;
3802 }
3803
3804 /*
3805 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3806 * We end up returning whatever error was returned by the first cache flush
3807 * that fails.
3808 */
3809
3810 static int
3811 rf_sync_component_cache(RF_Raid_t *raidPtr, int c, int force)
3812 {
3813 int e = 0;
3814 for (int i = 0; i < 5; i++) {
3815 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3816 &force, FWRITE, NOCRED);
3817 if (!e || e == ENODEV)
3818 return e;
3819 printf("raid%d: cache flush[%d] to component %s failed (%d)\n",
3820 raidPtr->raidid, i, raidPtr->Disks[c].devname, e);
3821 }
3822 return e;
3823 }
3824
3825 int
3826 rf_sync_component_caches(RF_Raid_t *raidPtr, int force)
3827 {
3828 int c, error;
3829
3830 error = 0;
3831 for (c = 0; c < raidPtr->numCol; c++) {
3832 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3833 int e = rf_sync_component_cache(raidPtr, c, force);
3834 if (e && !error)
3835 error = e;
3836 }
3837 }
3838
3839 for (c = 0; c < raidPtr->numSpare ; c++) {
3840 int sparecol = raidPtr->numCol + c;
3841 /* Need to ensure that the reconstruct actually completed! */
3842 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3843 int e = rf_sync_component_cache(raidPtr, sparecol,
3844 force);
3845 if (e && !error)
3846 error = e;
3847 }
3848 }
3849 return error;
3850 }
3851
3852 /* Fill in info with the current status */
3853 void
3854 rf_check_recon_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3855 {
3856
3857 memset(info, 0, sizeof(*info));
3858
3859 if (raidPtr->status != rf_rs_reconstructing) {
3860 info->total = 100;
3861 info->completed = 100;
3862 } else {
3863 info->total = raidPtr->reconControl->numRUsTotal;
3864 info->completed = raidPtr->reconControl->numRUsComplete;
3865 }
3866 info->remaining = info->total - info->completed;
3867 }
3868
3869 /* Fill in info with the current status */
3870 void
3871 rf_check_parityrewrite_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3872 {
3873
3874 memset(info, 0, sizeof(*info));
3875
3876 if (raidPtr->parity_rewrite_in_progress == 1) {
3877 info->total = raidPtr->Layout.numStripe;
3878 info->completed = raidPtr->parity_rewrite_stripes_done;
3879 } else {
3880 info->completed = 100;
3881 info->total = 100;
3882 }
3883 info->remaining = info->total - info->completed;
3884 }
3885
3886 /* Fill in info with the current status */
3887 void
3888 rf_check_copyback_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3889 {
3890
3891 memset(info, 0, sizeof(*info));
3892
3893 if (raidPtr->copyback_in_progress == 1) {
3894 info->total = raidPtr->Layout.numStripe;
3895 info->completed = raidPtr->copyback_stripes_done;
3896 info->remaining = info->total - info->completed;
3897 } else {
3898 info->remaining = 0;
3899 info->completed = 100;
3900 info->total = 100;
3901 }
3902 }
3903
3904 /* Fill in config with the current info */
3905 int
3906 rf_get_info(RF_Raid_t *raidPtr, RF_DeviceConfig_t *config)
3907 {
3908 int d, i, j;
3909
3910 if (!raidPtr->valid)
3911 return ENODEV;
3912 config->cols = raidPtr->numCol;
3913 config->ndevs = raidPtr->numCol;
3914 if (config->ndevs >= RF_MAX_DISKS)
3915 return ENOMEM;
3916 config->nspares = raidPtr->numSpare;
3917 if (config->nspares >= RF_MAX_DISKS)
3918 return ENOMEM;
3919 config->maxqdepth = raidPtr->maxQueueDepth;
3920 d = 0;
3921 for (j = 0; j < config->cols; j++) {
3922 config->devs[d] = raidPtr->Disks[j];
3923 d++;
3924 }
3925 for (j = config->cols, i = 0; i < config->nspares; i++, j++) {
3926 config->spares[i] = raidPtr->Disks[j];
3927 if (config->spares[i].status == rf_ds_rebuilding_spare) {
3928 /* XXX: raidctl(8) expects to see this as a used spare */
3929 config->spares[i].status = rf_ds_used_spare;
3930 }
3931 }
3932 return 0;
3933 }
3934
3935 int
3936 rf_get_component_label(RF_Raid_t *raidPtr, void *data)
3937 {
3938 RF_ComponentLabel_t *clabel = (RF_ComponentLabel_t *)data;
3939 RF_ComponentLabel_t *raid_clabel;
3940 int column = clabel->column;
3941
3942 if ((column < 0) || (column >= raidPtr->numCol + raidPtr->numSpare))
3943 return EINVAL;
3944 raid_clabel = raidget_component_label(raidPtr, column);
3945 memcpy(clabel, raid_clabel, sizeof *clabel);
3946 /* Fix-up for userland. */
3947 if (clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION))
3948 clabel->version = RF_COMPONENT_LABEL_VERSION;
3949
3950 return 0;
3951 }
3952
3953 /*
3954 * Module interface
3955 */
3956
3957 MODULE(MODULE_CLASS_DRIVER, raid, "dk_subr,bufq_fcfs");
3958
3959 #ifdef _MODULE
3960 CFDRIVER_DECL(raid, DV_DISK, NULL);
3961 #endif
3962
3963 static int raid_modcmd(modcmd_t, void *);
3964 static int raid_modcmd_init(void);
3965 static int raid_modcmd_fini(void);
3966
3967 static int
3968 raid_modcmd(modcmd_t cmd, void *data)
3969 {
3970 int error;
3971
3972 error = 0;
3973 switch (cmd) {
3974 case MODULE_CMD_INIT:
3975 error = raid_modcmd_init();
3976 break;
3977 case MODULE_CMD_FINI:
3978 error = raid_modcmd_fini();
3979 break;
3980 default:
3981 error = ENOTTY;
3982 break;
3983 }
3984 return error;
3985 }
3986
3987 static int
3988 raid_modcmd_init(void)
3989 {
3990 int error;
3991 int bmajor, cmajor;
3992
3993 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
3994 mutex_enter(&raid_lock);
3995 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
3996 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
3997 rf_init_cond2(rf_sparet_wait_cv, "sparetw");
3998 rf_init_cond2(rf_sparet_resp_cv, "rfgst");
3999
4000 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
4001 #endif
4002
4003 bmajor = cmajor = -1;
4004 error = devsw_attach("raid", &raid_bdevsw, &bmajor,
4005 &raid_cdevsw, &cmajor);
4006 if (error != 0 && error != EEXIST) {
4007 aprint_error("%s: devsw_attach failed %d\n", __func__, error);
4008 mutex_exit(&raid_lock);
4009 return error;
4010 }
4011 #ifdef _MODULE
4012 error = config_cfdriver_attach(&raid_cd);
4013 if (error != 0) {
4014 aprint_error("%s: config_cfdriver_attach failed %d\n",
4015 __func__, error);
4016 devsw_detach(&raid_bdevsw, &raid_cdevsw);
4017 mutex_exit(&raid_lock);
4018 return error;
4019 }
4020 #endif
4021 error = config_cfattach_attach(raid_cd.cd_name, &raid_ca);
4022 if (error != 0) {
4023 aprint_error("%s: config_cfattach_attach failed %d\n",
4024 __func__, error);
4025 #ifdef _MODULE
4026 config_cfdriver_detach(&raid_cd);
4027 #endif
4028 devsw_detach(&raid_bdevsw, &raid_cdevsw);
4029 mutex_exit(&raid_lock);
4030 return error;
4031 }
4032
4033 raidautoconfigdone = false;
4034
4035 mutex_exit(&raid_lock);
4036
4037 if (error == 0) {
4038 if (rf_BootRaidframe(true) == 0)
4039 aprint_verbose("Kernelized RAIDframe activated\n");
4040 else
4041 panic("Serious error activating RAID!!");
4042 }
4043
4044 /*
4045 * Register a finalizer which will be used to auto-config RAID
4046 * sets once all real hardware devices have been found.
4047 */
4048 error = config_finalize_register(NULL, rf_autoconfig);
4049 if (error != 0) {
4050 aprint_error("WARNING: unable to register RAIDframe "
4051 "finalizer\n");
4052 error = 0;
4053 }
4054
4055 return error;
4056 }
4057
4058 static int
4059 raid_modcmd_fini(void)
4060 {
4061 int error;
4062
4063 mutex_enter(&raid_lock);
4064
4065 /* Don't allow unload if raid device(s) exist. */
4066 if (!LIST_EMPTY(&raids)) {
4067 mutex_exit(&raid_lock);
4068 return EBUSY;
4069 }
4070
4071 error = config_cfattach_detach(raid_cd.cd_name, &raid_ca);
4072 if (error != 0) {
4073 aprint_error("%s: cannot detach cfattach\n",__func__);
4074 mutex_exit(&raid_lock);
4075 return error;
4076 }
4077 #ifdef _MODULE
4078 error = config_cfdriver_detach(&raid_cd);
4079 if (error != 0) {
4080 aprint_error("%s: cannot detach cfdriver\n",__func__);
4081 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
4082 mutex_exit(&raid_lock);
4083 return error;
4084 }
4085 #endif
4086 devsw_detach(&raid_bdevsw, &raid_cdevsw);
4087 rf_BootRaidframe(false);
4088 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
4089 rf_destroy_mutex2(rf_sparet_wait_mutex);
4090 rf_destroy_cond2(rf_sparet_wait_cv);
4091 rf_destroy_cond2(rf_sparet_resp_cv);
4092 #endif
4093 mutex_exit(&raid_lock);
4094 mutex_destroy(&raid_lock);
4095
4096 return error;
4097 }
4098