rf_netbsdkintf.c revision 1.414 1 /* $NetBSD: rf_netbsdkintf.c,v 1.414 2023/09/17 20:07:39 oster Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: cd.c 1.6 90/11/28$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
89 * School of Computer Science
90 * Carnegie Mellon University
91 * Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97 /***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.414 2023/09/17 20:07:39 oster Exp $");
105
106 #ifdef _KERNEL_OPT
107 #include "opt_raid_autoconfig.h"
108 #include "opt_compat_netbsd32.h"
109 #endif
110
111 #include <sys/param.h>
112 #include <sys/errno.h>
113 #include <sys/pool.h>
114 #include <sys/proc.h>
115 #include <sys/queue.h>
116 #include <sys/disk.h>
117 #include <sys/device.h>
118 #include <sys/stat.h>
119 #include <sys/ioctl.h>
120 #include <sys/fcntl.h>
121 #include <sys/systm.h>
122 #include <sys/vnode.h>
123 #include <sys/disklabel.h>
124 #include <sys/conf.h>
125 #include <sys/buf.h>
126 #include <sys/bufq.h>
127 #include <sys/reboot.h>
128 #include <sys/kauth.h>
129 #include <sys/module.h>
130 #include <sys/compat_stub.h>
131
132 #include <prop/proplib.h>
133
134 #include <dev/raidframe/raidframevar.h>
135 #include <dev/raidframe/raidframeio.h>
136 #include <dev/raidframe/rf_paritymap.h>
137
138 #include "rf_raid.h"
139 #include "rf_copyback.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_desc.h"
143 #include "rf_diskqueue.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_kintf.h"
147 #include "rf_options.h"
148 #include "rf_driver.h"
149 #include "rf_parityscan.h"
150 #include "rf_threadstuff.h"
151
152 #include "ioconf.h"
153
154 #ifdef DEBUG
155 int rf_kdebug_level = 0;
156 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
157 #else /* DEBUG */
158 #define db1_printf(a) { }
159 #endif /* DEBUG */
160
161 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
162 static rf_declare_mutex2(rf_sparet_wait_mutex);
163 static rf_declare_cond2(rf_sparet_wait_cv);
164 static rf_declare_cond2(rf_sparet_resp_cv);
165
166 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
167 * spare table */
168 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
169 * installation process */
170 #endif
171
172 const int rf_b_pass = (B_PHYS|B_RAW|B_MEDIA_FLAGS);
173
174 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
175
176 /* prototypes */
177 static void KernelWakeupFunc(struct buf *);
178 static void InitBP(struct buf *, struct vnode *, unsigned,
179 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
180 void *, int);
181 static void raidinit(struct raid_softc *);
182 static int raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp);
183 static int rf_get_component_caches(RF_Raid_t *raidPtr, int *);
184
185 static int raid_match(device_t, cfdata_t, void *);
186 static void raid_attach(device_t, device_t, void *);
187 static int raid_detach(device_t, int);
188
189 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
190 daddr_t, daddr_t);
191 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
192 daddr_t, daddr_t);
193
194 static int raidwrite_component_label(unsigned,
195 dev_t, struct vnode *, RF_ComponentLabel_t *);
196 static int raidread_component_label(unsigned,
197 dev_t, struct vnode *, RF_ComponentLabel_t *);
198
199 static int raid_diskstart(device_t, struct buf *bp);
200 static int raid_dumpblocks(device_t, void *, daddr_t, int);
201 static int raid_lastclose(device_t);
202
203 static dev_type_open(raidopen);
204 static dev_type_close(raidclose);
205 static dev_type_read(raidread);
206 static dev_type_write(raidwrite);
207 static dev_type_ioctl(raidioctl);
208 static dev_type_strategy(raidstrategy);
209 static dev_type_dump(raiddump);
210 static dev_type_size(raidsize);
211
212 const struct bdevsw raid_bdevsw = {
213 .d_open = raidopen,
214 .d_close = raidclose,
215 .d_strategy = raidstrategy,
216 .d_ioctl = raidioctl,
217 .d_dump = raiddump,
218 .d_psize = raidsize,
219 .d_discard = nodiscard,
220 .d_flag = D_DISK
221 };
222
223 const struct cdevsw raid_cdevsw = {
224 .d_open = raidopen,
225 .d_close = raidclose,
226 .d_read = raidread,
227 .d_write = raidwrite,
228 .d_ioctl = raidioctl,
229 .d_stop = nostop,
230 .d_tty = notty,
231 .d_poll = nopoll,
232 .d_mmap = nommap,
233 .d_kqfilter = nokqfilter,
234 .d_discard = nodiscard,
235 .d_flag = D_DISK
236 };
237
238 static struct dkdriver rf_dkdriver = {
239 .d_open = raidopen,
240 .d_close = raidclose,
241 .d_strategy = raidstrategy,
242 .d_diskstart = raid_diskstart,
243 .d_dumpblocks = raid_dumpblocks,
244 .d_lastclose = raid_lastclose,
245 .d_minphys = minphys
246 };
247
248 #define raidunit(x) DISKUNIT(x)
249 #define raidsoftc(dev) (((struct raid_softc *)device_private(dev))->sc_r.softc)
250
251 extern struct cfdriver raid_cd;
252 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
253 raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
254 DVF_DETACH_SHUTDOWN);
255
256 /* Internal representation of a rf_recon_req */
257 struct rf_recon_req_internal {
258 RF_RowCol_t col;
259 RF_ReconReqFlags_t flags;
260 void *raidPtr;
261 };
262
263 /*
264 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
265 * Be aware that large numbers can allow the driver to consume a lot of
266 * kernel memory, especially on writes, and in degraded mode reads.
267 *
268 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
269 * a single 64K write will typically require 64K for the old data,
270 * 64K for the old parity, and 64K for the new parity, for a total
271 * of 192K (if the parity buffer is not re-used immediately).
272 * Even it if is used immediately, that's still 128K, which when multiplied
273 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
274 *
275 * Now in degraded mode, for example, a 64K read on the above setup may
276 * require data reconstruction, which will require *all* of the 4 remaining
277 * disks to participate -- 4 * 32K/disk == 128K again.
278 */
279
280 #ifndef RAIDOUTSTANDING
281 #define RAIDOUTSTANDING 6
282 #endif
283
284 #define RAIDLABELDEV(dev) \
285 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
286
287 /* declared here, and made public, for the benefit of KVM stuff.. */
288
289 static int raidlock(struct raid_softc *);
290 static void raidunlock(struct raid_softc *);
291
292 static int raid_detach_unlocked(struct raid_softc *);
293
294 static void rf_markalldirty(RF_Raid_t *);
295 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
296
297 static void rf_ReconThread(struct rf_recon_req_internal *);
298 static void rf_RewriteParityThread(RF_Raid_t *raidPtr);
299 static void rf_CopybackThread(RF_Raid_t *raidPtr);
300 static void rf_ReconstructInPlaceThread(struct rf_recon_req_internal *);
301 static int rf_autoconfig(device_t);
302 static int rf_rescan(void);
303 static void rf_buildroothack(RF_ConfigSet_t *);
304
305 static RF_AutoConfig_t *rf_find_raid_components(void);
306 static RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
307 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
308 static void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
309 static int rf_set_autoconfig(RF_Raid_t *, int);
310 static int rf_set_rootpartition(RF_Raid_t *, int);
311 static void rf_release_all_vps(RF_ConfigSet_t *);
312 static void rf_cleanup_config_set(RF_ConfigSet_t *);
313 static int rf_have_enough_components(RF_ConfigSet_t *);
314 static struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
315 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
316
317 /*
318 * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
319 * Note that this is overridden by having RAID_AUTOCONFIG as an option
320 * in the kernel config file.
321 */
322 #ifdef RAID_AUTOCONFIG
323 int raidautoconfig = 1;
324 #else
325 int raidautoconfig = 0;
326 #endif
327 static bool raidautoconfigdone = false;
328
329 struct pool rf_alloclist_pool; /* AllocList */
330
331 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
332 static kmutex_t raid_lock;
333
334 static struct raid_softc *
335 raidcreate(int unit) {
336 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
337 sc->sc_unit = unit;
338 cv_init(&sc->sc_cv, "raidunit");
339 mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_NONE);
340 return sc;
341 }
342
343 static void
344 raiddestroy(struct raid_softc *sc) {
345 cv_destroy(&sc->sc_cv);
346 mutex_destroy(&sc->sc_mutex);
347 kmem_free(sc, sizeof(*sc));
348 }
349
350 static struct raid_softc *
351 raidget(int unit, bool create) {
352 struct raid_softc *sc;
353 if (unit < 0) {
354 #ifdef DIAGNOSTIC
355 panic("%s: unit %d!", __func__, unit);
356 #endif
357 return NULL;
358 }
359 mutex_enter(&raid_lock);
360 LIST_FOREACH(sc, &raids, sc_link) {
361 if (sc->sc_unit == unit) {
362 mutex_exit(&raid_lock);
363 return sc;
364 }
365 }
366 mutex_exit(&raid_lock);
367 if (!create)
368 return NULL;
369 sc = raidcreate(unit);
370 mutex_enter(&raid_lock);
371 LIST_INSERT_HEAD(&raids, sc, sc_link);
372 mutex_exit(&raid_lock);
373 return sc;
374 }
375
376 static void
377 raidput(struct raid_softc *sc) {
378 mutex_enter(&raid_lock);
379 LIST_REMOVE(sc, sc_link);
380 mutex_exit(&raid_lock);
381 raiddestroy(sc);
382 }
383
384 void
385 raidattach(int num)
386 {
387
388 /*
389 * Device attachment and associated initialization now occurs
390 * as part of the module initialization.
391 */
392 }
393
394 static int
395 rf_autoconfig(device_t self)
396 {
397 RF_AutoConfig_t *ac_list;
398 RF_ConfigSet_t *config_sets;
399
400 if (!raidautoconfig || raidautoconfigdone == true)
401 return 0;
402
403 /* XXX This code can only be run once. */
404 raidautoconfigdone = true;
405
406 #ifdef __HAVE_CPU_BOOTCONF
407 /*
408 * 0. find the boot device if needed first so we can use it later
409 * this needs to be done before we autoconfigure any raid sets,
410 * because if we use wedges we are not going to be able to open
411 * the boot device later
412 */
413 if (booted_device == NULL)
414 cpu_bootconf();
415 #endif
416 /* 1. locate all RAID components on the system */
417 aprint_debug("Searching for RAID components...\n");
418 ac_list = rf_find_raid_components();
419
420 /* 2. Sort them into their respective sets. */
421 config_sets = rf_create_auto_sets(ac_list);
422
423 /*
424 * 3. Evaluate each set and configure the valid ones.
425 * This gets done in rf_buildroothack().
426 */
427 rf_buildroothack(config_sets);
428
429 return 1;
430 }
431
432 int
433 rf_inited(const struct raid_softc *rs) {
434 return (rs->sc_flags & RAIDF_INITED) != 0;
435 }
436
437 RF_Raid_t *
438 rf_get_raid(struct raid_softc *rs) {
439 return &rs->sc_r;
440 }
441
442 int
443 rf_get_unit(const struct raid_softc *rs) {
444 return rs->sc_unit;
445 }
446
447 static int
448 rf_containsboot(RF_Raid_t *r, device_t bdv) {
449 const char *bootname;
450 size_t len;
451
452 /* if bdv is NULL, the set can't contain it. exit early. */
453 if (bdv == NULL)
454 return 0;
455
456 bootname = device_xname(bdv);
457 len = strlen(bootname);
458
459 for (int col = 0; col < r->numCol; col++) {
460 const char *devname = r->Disks[col].devname;
461 devname += sizeof("/dev/") - 1;
462 if (strncmp(devname, "dk", 2) == 0) {
463 const char *parent =
464 dkwedge_get_parent_name(r->Disks[col].dev);
465 if (parent != NULL)
466 devname = parent;
467 }
468 if (strncmp(devname, bootname, len) == 0) {
469 struct raid_softc *sc = r->softc;
470 aprint_debug("raid%d includes boot device %s\n",
471 sc->sc_unit, devname);
472 return 1;
473 }
474 }
475 return 0;
476 }
477
478 static int
479 rf_rescan(void)
480 {
481 RF_AutoConfig_t *ac_list;
482 RF_ConfigSet_t *config_sets, *cset, *next_cset;
483 struct raid_softc *sc;
484 int raid_added;
485
486 ac_list = rf_find_raid_components();
487 config_sets = rf_create_auto_sets(ac_list);
488
489 raid_added = 1;
490 while (raid_added > 0) {
491 raid_added = 0;
492 cset = config_sets;
493 while (cset != NULL) {
494 next_cset = cset->next;
495 if (rf_have_enough_components(cset) &&
496 cset->ac->clabel->autoconfigure == 1) {
497 sc = rf_auto_config_set(cset);
498 if (sc != NULL) {
499 aprint_debug("raid%d: configured ok, rootable %d\n",
500 sc->sc_unit, cset->rootable);
501 /* We added one RAID set */
502 raid_added++;
503 } else {
504 /* The autoconfig didn't work :( */
505 aprint_debug("Autoconfig failed\n");
506 rf_release_all_vps(cset);
507 }
508 } else {
509 /* we're not autoconfiguring this set...
510 release the associated resources */
511 rf_release_all_vps(cset);
512 }
513 /* cleanup */
514 rf_cleanup_config_set(cset);
515 cset = next_cset;
516 }
517 if (raid_added > 0) {
518 /* We added at least one RAID set, so re-scan for recursive RAID */
519 ac_list = rf_find_raid_components();
520 config_sets = rf_create_auto_sets(ac_list);
521 }
522 }
523
524 return 0;
525 }
526
527
528 static void
529 rf_buildroothack(RF_ConfigSet_t *config_sets)
530 {
531 RF_AutoConfig_t *ac_list;
532 RF_ConfigSet_t *cset;
533 RF_ConfigSet_t *next_cset;
534 int num_root;
535 int raid_added;
536 struct raid_softc *sc, *rsc;
537 struct dk_softc *dksc = NULL; /* XXX gcc -Os: may be used uninit. */
538
539 sc = rsc = NULL;
540 num_root = 0;
541
542 raid_added = 1;
543 while (raid_added > 0) {
544 raid_added = 0;
545 cset = config_sets;
546 while (cset != NULL) {
547 next_cset = cset->next;
548 if (rf_have_enough_components(cset) &&
549 cset->ac->clabel->autoconfigure == 1) {
550 sc = rf_auto_config_set(cset);
551 if (sc != NULL) {
552 aprint_debug("raid%d: configured ok, rootable %d\n",
553 sc->sc_unit, cset->rootable);
554 /* We added one RAID set */
555 raid_added++;
556 if (cset->rootable) {
557 rsc = sc;
558 num_root++;
559 }
560 } else {
561 /* The autoconfig didn't work :( */
562 aprint_debug("Autoconfig failed\n");
563 rf_release_all_vps(cset);
564 }
565 } else {
566 /* we're not autoconfiguring this set...
567 release the associated resources */
568 rf_release_all_vps(cset);
569 }
570 /* cleanup */
571 rf_cleanup_config_set(cset);
572 cset = next_cset;
573 }
574 if (raid_added > 0) {
575 /* We added at least one RAID set, so re-scan for recursive RAID */
576 ac_list = rf_find_raid_components();
577 config_sets = rf_create_auto_sets(ac_list);
578 }
579 }
580
581 /* if the user has specified what the root device should be
582 then we don't touch booted_device or boothowto... */
583
584 if (rootspec != NULL) {
585 aprint_debug("%s: rootspec %s\n", __func__, rootspec);
586 return;
587 }
588
589 /* we found something bootable... */
590
591 /*
592 * XXX: The following code assumes that the root raid
593 * is the first ('a') partition. This is about the best
594 * we can do with a BSD disklabel, but we might be able
595 * to do better with a GPT label, by setting a specified
596 * attribute to indicate the root partition. We can then
597 * stash the partition number in the r->root_partition
598 * high bits (the bottom 2 bits are already used). For
599 * now we just set booted_partition to 0 when we override
600 * root.
601 */
602 if (num_root == 1) {
603 device_t candidate_root;
604 dksc = &rsc->sc_dksc;
605 if (dksc->sc_dkdev.dk_nwedges != 0) {
606 char cname[sizeof(cset->ac->devname)];
607 /* XXX: assume partition 'a' first */
608 snprintf(cname, sizeof(cname), "%s%c",
609 device_xname(dksc->sc_dev), 'a');
610 candidate_root = dkwedge_find_by_wname(cname);
611 aprint_debug("%s: candidate wedge root=%s\n", __func__,
612 cname);
613 if (candidate_root == NULL) {
614 /*
615 * If that is not found, because we don't use
616 * disklabel, return the first dk child
617 * XXX: we can skip the 'a' check above
618 * and always do this...
619 */
620 size_t i = 0;
621 candidate_root = dkwedge_find_by_parent(
622 device_xname(dksc->sc_dev), &i);
623 }
624 aprint_debug("%s: candidate wedge root=%p\n", __func__,
625 candidate_root);
626 } else
627 candidate_root = dksc->sc_dev;
628 aprint_debug("%s: candidate root=%p booted_device=%p "
629 "root_partition=%d contains_boot=%d\n",
630 __func__, candidate_root, booted_device,
631 rsc->sc_r.root_partition,
632 rf_containsboot(&rsc->sc_r, booted_device));
633 /* XXX the check for booted_device == NULL can probably be
634 * dropped, now that rf_containsboot handles that case.
635 */
636 if (booted_device == NULL ||
637 rsc->sc_r.root_partition == 1 ||
638 rf_containsboot(&rsc->sc_r, booted_device)) {
639 booted_device = candidate_root;
640 booted_method = "raidframe/single";
641 booted_partition = 0; /* XXX assume 'a' */
642 aprint_debug("%s: set booted_device=%s(%p)\n", __func__,
643 device_xname(booted_device), booted_device);
644 }
645 } else if (num_root > 1) {
646 aprint_debug("%s: many roots=%d, %p\n", __func__, num_root,
647 booted_device);
648
649 /*
650 * Maybe the MD code can help. If it cannot, then
651 * setroot() will discover that we have no
652 * booted_device and will ask the user if nothing was
653 * hardwired in the kernel config file
654 */
655 if (booted_device == NULL)
656 return;
657
658 num_root = 0;
659 mutex_enter(&raid_lock);
660 LIST_FOREACH(sc, &raids, sc_link) {
661 RF_Raid_t *r = &sc->sc_r;
662 if (r->valid == 0)
663 continue;
664
665 if (r->root_partition == 0)
666 continue;
667
668 if (rf_containsboot(r, booted_device)) {
669 num_root++;
670 rsc = sc;
671 dksc = &rsc->sc_dksc;
672 }
673 }
674 mutex_exit(&raid_lock);
675
676 if (num_root == 1) {
677 booted_device = dksc->sc_dev;
678 booted_method = "raidframe/multi";
679 booted_partition = 0; /* XXX assume 'a' */
680 } else {
681 /* we can't guess.. require the user to answer... */
682 boothowto |= RB_ASKNAME;
683 }
684 }
685 }
686
687 static int
688 raidsize(dev_t dev)
689 {
690 struct raid_softc *rs;
691 struct dk_softc *dksc;
692 unsigned int unit;
693
694 unit = raidunit(dev);
695 if ((rs = raidget(unit, false)) == NULL)
696 return -1;
697 dksc = &rs->sc_dksc;
698
699 if ((rs->sc_flags & RAIDF_INITED) == 0)
700 return -1;
701
702 return dk_size(dksc, dev);
703 }
704
705 static int
706 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
707 {
708 unsigned int unit;
709 struct raid_softc *rs;
710 struct dk_softc *dksc;
711
712 unit = raidunit(dev);
713 if ((rs = raidget(unit, false)) == NULL)
714 return ENXIO;
715 dksc = &rs->sc_dksc;
716
717 if ((rs->sc_flags & RAIDF_INITED) == 0)
718 return ENODEV;
719
720 /*
721 Note that blkno is relative to this particular partition.
722 By adding adding RF_PROTECTED_SECTORS, we get a value that
723 is relative to the partition used for the underlying component.
724 */
725 blkno += RF_PROTECTED_SECTORS;
726
727 return dk_dump(dksc, dev, blkno, va, size, DK_DUMP_RECURSIVE);
728 }
729
730 static int
731 raid_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk)
732 {
733 struct raid_softc *rs = raidsoftc(dev);
734 const struct bdevsw *bdev;
735 RF_Raid_t *raidPtr;
736 int c, sparecol, j, scol, dumpto;
737 int error = 0;
738
739 raidPtr = &rs->sc_r;
740
741 /* we only support dumping to RAID 1 sets */
742 if (raidPtr->Layout.numDataCol != 1 ||
743 raidPtr->Layout.numParityCol != 1)
744 return EINVAL;
745
746 if ((error = raidlock(rs)) != 0)
747 return error;
748
749 /* figure out what device is alive.. */
750
751 /*
752 Look for a component to dump to. The preference for the
753 component to dump to is as follows:
754 1) the first component
755 2) a used_spare of the first component
756 3) the second component
757 4) a used_spare of the second component
758 */
759
760 dumpto = -1;
761 for (c = 0; c < raidPtr->numCol; c++) {
762 if (raidPtr->Disks[c].status == rf_ds_optimal) {
763 /* this might be the one */
764 dumpto = c;
765 break;
766 }
767 }
768
769 /*
770 At this point we have possibly selected a live component.
771 If we didn't find a live ocmponent, we now check to see
772 if there is a relevant spared component.
773 */
774
775 for (c = 0; c < raidPtr->numSpare; c++) {
776 sparecol = raidPtr->numCol + c;
777
778 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
779 /* How about this one? */
780 scol = -1;
781 for(j=0;j<raidPtr->numCol;j++) {
782 if (raidPtr->Disks[j].spareCol == sparecol) {
783 scol = j;
784 break;
785 }
786 }
787 if (scol == 0) {
788 /*
789 We must have found a spared first
790 component! We'll take that over
791 anything else found so far. (We
792 couldn't have found a real first
793 component before, since this is a
794 used spare, and it's saying that
795 it's replacing the first
796 component.) On reboot (with
797 autoconfiguration turned on)
798 sparecol will become the first
799 component (component0) of this set.
800 */
801 dumpto = sparecol;
802 break;
803 } else if (scol != -1) {
804 /*
805 Must be a spared second component.
806 We'll dump to that if we havn't found
807 anything else so far.
808 */
809 if (dumpto == -1)
810 dumpto = sparecol;
811 }
812 }
813 }
814
815 if (dumpto == -1) {
816 /* we couldn't find any live components to dump to!?!?
817 */
818 error = EINVAL;
819 goto out;
820 }
821
822 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
823 if (bdev == NULL) {
824 error = ENXIO;
825 goto out;
826 }
827
828 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
829 blkno, va, nblk * raidPtr->bytesPerSector);
830
831 out:
832 raidunlock(rs);
833
834 return error;
835 }
836
837 /* ARGSUSED */
838 static int
839 raidopen(dev_t dev, int flags, int fmt,
840 struct lwp *l)
841 {
842 int unit = raidunit(dev);
843 struct raid_softc *rs;
844 struct dk_softc *dksc;
845 int error = 0;
846 int part, pmask;
847
848 if ((rs = raidget(unit, true)) == NULL)
849 return ENXIO;
850 if ((error = raidlock(rs)) != 0)
851 return error;
852
853 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
854 error = EBUSY;
855 goto bad;
856 }
857
858 dksc = &rs->sc_dksc;
859
860 part = DISKPART(dev);
861 pmask = (1 << part);
862
863 if (!DK_BUSY(dksc, pmask) &&
864 ((rs->sc_flags & RAIDF_INITED) != 0)) {
865 /* First one... mark things as dirty... Note that we *MUST*
866 have done a configure before this. I DO NOT WANT TO BE
867 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
868 THAT THEY BELONG TOGETHER!!!!! */
869 /* XXX should check to see if we're only open for reading
870 here... If so, we needn't do this, but then need some
871 other way of keeping track of what's happened.. */
872
873 rf_markalldirty(&rs->sc_r);
874 }
875
876 if ((rs->sc_flags & RAIDF_INITED) != 0)
877 error = dk_open(dksc, dev, flags, fmt, l);
878
879 bad:
880 raidunlock(rs);
881
882 return error;
883
884
885 }
886
887 static int
888 raid_lastclose(device_t self)
889 {
890 struct raid_softc *rs = raidsoftc(self);
891
892 /* Last one... device is not unconfigured yet.
893 Device shutdown has taken care of setting the
894 clean bits if RAIDF_INITED is not set
895 mark things as clean... */
896
897 rf_update_component_labels(&rs->sc_r,
898 RF_FINAL_COMPONENT_UPDATE);
899
900 /* pass to unlocked code */
901 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
902 rs->sc_flags |= RAIDF_DETACH;
903
904 return 0;
905 }
906
907 /* ARGSUSED */
908 static int
909 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
910 {
911 int unit = raidunit(dev);
912 struct raid_softc *rs;
913 struct dk_softc *dksc;
914 cfdata_t cf;
915 int error = 0, do_detach = 0, do_put = 0;
916
917 if ((rs = raidget(unit, false)) == NULL)
918 return ENXIO;
919 dksc = &rs->sc_dksc;
920
921 if ((error = raidlock(rs)) != 0)
922 return error;
923
924 if ((rs->sc_flags & RAIDF_INITED) != 0) {
925 error = dk_close(dksc, dev, flags, fmt, l);
926 if ((rs->sc_flags & RAIDF_DETACH) != 0)
927 do_detach = 1;
928 } else if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
929 do_put = 1;
930
931 raidunlock(rs);
932
933 if (do_detach) {
934 /* free the pseudo device attach bits */
935 cf = device_cfdata(dksc->sc_dev);
936 error = config_detach(dksc->sc_dev, 0);
937 if (error == 0)
938 free(cf, M_RAIDFRAME);
939 } else if (do_put) {
940 raidput(rs);
941 }
942
943 return error;
944
945 }
946
947 static void
948 raid_wakeup(RF_Raid_t *raidPtr)
949 {
950 rf_lock_mutex2(raidPtr->iodone_lock);
951 rf_signal_cond2(raidPtr->iodone_cv);
952 rf_unlock_mutex2(raidPtr->iodone_lock);
953 }
954
955 static void
956 raidstrategy(struct buf *bp)
957 {
958 unsigned int unit;
959 struct raid_softc *rs;
960 struct dk_softc *dksc;
961 RF_Raid_t *raidPtr;
962
963 unit = raidunit(bp->b_dev);
964 if ((rs = raidget(unit, false)) == NULL) {
965 bp->b_error = ENXIO;
966 goto fail;
967 }
968 if ((rs->sc_flags & RAIDF_INITED) == 0) {
969 bp->b_error = ENXIO;
970 goto fail;
971 }
972 dksc = &rs->sc_dksc;
973 raidPtr = &rs->sc_r;
974
975 /* Queue IO only */
976 if (dk_strategy_defer(dksc, bp))
977 goto done;
978
979 /* schedule the IO to happen at the next convenient time */
980 raid_wakeup(raidPtr);
981
982 done:
983 return;
984
985 fail:
986 bp->b_resid = bp->b_bcount;
987 biodone(bp);
988 }
989
990 static int
991 raid_diskstart(device_t dev, struct buf *bp)
992 {
993 struct raid_softc *rs = raidsoftc(dev);
994 RF_Raid_t *raidPtr;
995
996 raidPtr = &rs->sc_r;
997 if (!raidPtr->valid) {
998 db1_printf(("raid is not valid..\n"));
999 return ENODEV;
1000 }
1001
1002 /* XXX */
1003 bp->b_resid = 0;
1004
1005 return raiddoaccess(raidPtr, bp);
1006 }
1007
1008 void
1009 raiddone(RF_Raid_t *raidPtr, struct buf *bp)
1010 {
1011 struct raid_softc *rs;
1012 struct dk_softc *dksc;
1013
1014 rs = raidPtr->softc;
1015 dksc = &rs->sc_dksc;
1016
1017 dk_done(dksc, bp);
1018
1019 rf_lock_mutex2(raidPtr->mutex);
1020 raidPtr->openings++;
1021 rf_unlock_mutex2(raidPtr->mutex);
1022
1023 /* schedule more IO */
1024 raid_wakeup(raidPtr);
1025 }
1026
1027 /* ARGSUSED */
1028 static int
1029 raidread(dev_t dev, struct uio *uio, int flags)
1030 {
1031 int unit = raidunit(dev);
1032 struct raid_softc *rs;
1033
1034 if ((rs = raidget(unit, false)) == NULL)
1035 return ENXIO;
1036
1037 if ((rs->sc_flags & RAIDF_INITED) == 0)
1038 return ENXIO;
1039
1040 return physio(raidstrategy, NULL, dev, B_READ, minphys, uio);
1041
1042 }
1043
1044 /* ARGSUSED */
1045 static int
1046 raidwrite(dev_t dev, struct uio *uio, int flags)
1047 {
1048 int unit = raidunit(dev);
1049 struct raid_softc *rs;
1050
1051 if ((rs = raidget(unit, false)) == NULL)
1052 return ENXIO;
1053
1054 if ((rs->sc_flags & RAIDF_INITED) == 0)
1055 return ENXIO;
1056
1057 return physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio);
1058
1059 }
1060
1061 static int
1062 raid_detach_unlocked(struct raid_softc *rs)
1063 {
1064 struct dk_softc *dksc = &rs->sc_dksc;
1065 RF_Raid_t *raidPtr;
1066 int error;
1067
1068 raidPtr = &rs->sc_r;
1069
1070 if (DK_BUSY(dksc, 0) ||
1071 raidPtr->recon_in_progress != 0 ||
1072 raidPtr->parity_rewrite_in_progress != 0 ||
1073 raidPtr->copyback_in_progress != 0)
1074 return EBUSY;
1075
1076 if ((rs->sc_flags & RAIDF_INITED) == 0)
1077 return 0;
1078
1079 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1080
1081 if ((error = rf_Shutdown(raidPtr)) != 0)
1082 return error;
1083
1084 rs->sc_flags &= ~RAIDF_INITED;
1085
1086 /* Kill off any queued buffers */
1087 dk_drain(dksc);
1088 bufq_free(dksc->sc_bufq);
1089
1090 /* Detach the disk. */
1091 dkwedge_delall(&dksc->sc_dkdev);
1092 disk_detach(&dksc->sc_dkdev);
1093 disk_destroy(&dksc->sc_dkdev);
1094 dk_detach(dksc);
1095
1096 return 0;
1097 }
1098
1099 int
1100 rf_fail_disk(RF_Raid_t *raidPtr, struct rf_recon_req *rr)
1101 {
1102 struct rf_recon_req_internal *rrint;
1103
1104 if (raidPtr->Layout.map->faultsTolerated == 0) {
1105 /* Can't do this on a RAID 0!! */
1106 return EINVAL;
1107 }
1108
1109 if (rr->col < 0 || rr->col >= raidPtr->numCol) {
1110 /* bad column */
1111 return EINVAL;
1112 }
1113
1114 rf_lock_mutex2(raidPtr->mutex);
1115 if (raidPtr->status == rf_rs_reconstructing) {
1116 raidPtr->abortRecon[rr->col] = 1;
1117 }
1118 if ((raidPtr->Disks[rr->col].status == rf_ds_optimal) &&
1119 (raidPtr->numFailures > 0)) {
1120 /* some other component has failed. Let's not make
1121 things worse. XXX wrong for RAID6 */
1122 goto out;
1123 }
1124 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1125 int spareCol = raidPtr->Disks[rr->col].spareCol;
1126
1127 if (spareCol < raidPtr->numCol ||
1128 spareCol >= raidPtr->numCol + raidPtr->numSpare)
1129 goto out;
1130
1131 /*
1132 * Fail the spare disk so that we can
1133 * reconstruct on another one.
1134 */
1135 raidPtr->Disks[spareCol].status = rf_ds_failed;
1136
1137 }
1138 rf_unlock_mutex2(raidPtr->mutex);
1139
1140 /* make a copy of the recon request so that we don't rely on
1141 * the user's buffer */
1142 rrint = RF_Malloc(sizeof(*rrint));
1143 if (rrint == NULL)
1144 return(ENOMEM);
1145 rrint->col = rr->col;
1146 rrint->flags = rr->flags;
1147 rrint->raidPtr = raidPtr;
1148
1149 return RF_CREATE_THREAD(raidPtr->recon_thread, rf_ReconThread,
1150 rrint, "raid_recon");
1151 out:
1152 rf_unlock_mutex2(raidPtr->mutex);
1153 return EINVAL;
1154 }
1155
1156 static int
1157 rf_copyinspecificbuf(RF_Config_t *k_cfg)
1158 {
1159 /* allocate a buffer for the layout-specific data, and copy it in */
1160 if (k_cfg->layoutSpecificSize == 0)
1161 return 0;
1162
1163 if (k_cfg->layoutSpecificSize > 10000) {
1164 /* sanity check */
1165 return EINVAL;
1166 }
1167
1168 u_char *specific_buf;
1169 specific_buf = RF_Malloc(k_cfg->layoutSpecificSize);
1170 if (specific_buf == NULL)
1171 return ENOMEM;
1172
1173 int retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1174 k_cfg->layoutSpecificSize);
1175 if (retcode) {
1176 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1177 db1_printf(("%s: retcode=%d copyin.2\n", __func__, retcode));
1178 return retcode;
1179 }
1180
1181 k_cfg->layoutSpecific = specific_buf;
1182 return 0;
1183 }
1184
1185 static int
1186 rf_getConfiguration(struct raid_softc *rs, void *data, RF_Config_t **k_cfg)
1187 {
1188 RF_Config_t *u_cfg = *((RF_Config_t **) data);
1189
1190 if (rs->sc_r.valid) {
1191 /* There is a valid RAID set running on this unit! */
1192 printf("raid%d: Device already configured!\n", rs->sc_unit);
1193 return EINVAL;
1194 }
1195
1196 /* copy-in the configuration information */
1197 /* data points to a pointer to the configuration structure */
1198 *k_cfg = RF_Malloc(sizeof(**k_cfg));
1199 if (*k_cfg == NULL) {
1200 return ENOMEM;
1201 }
1202 int retcode = copyin(u_cfg, *k_cfg, sizeof(RF_Config_t));
1203 if (retcode == 0)
1204 return 0;
1205 RF_Free(*k_cfg, sizeof(RF_Config_t));
1206 db1_printf(("%s: retcode=%d copyin.1\n", __func__, retcode));
1207 rs->sc_flags |= RAIDF_SHUTDOWN;
1208 return retcode;
1209 }
1210
1211 int
1212 rf_construct(struct raid_softc *rs, RF_Config_t *k_cfg)
1213 {
1214 int retcode, i;
1215 RF_Raid_t *raidPtr = &rs->sc_r;
1216
1217 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1218
1219 if ((retcode = rf_copyinspecificbuf(k_cfg)) != 0)
1220 goto out;
1221
1222 /* should do some kind of sanity check on the configuration.
1223 * Store the sum of all the bytes in the last byte? */
1224
1225 /* Force nul-termination on all strings. */
1226 #define ZERO_FINAL(s) do { s[sizeof(s) - 1] = '\0'; } while (0)
1227 for (i = 0; i < RF_MAXCOL; i++) {
1228 ZERO_FINAL(k_cfg->devnames[0][i]);
1229 }
1230 for (i = 0; i < RF_MAXSPARE; i++) {
1231 ZERO_FINAL(k_cfg->spare_names[i]);
1232 }
1233 for (i = 0; i < RF_MAXDBGV; i++) {
1234 ZERO_FINAL(k_cfg->debugVars[i]);
1235 }
1236 #undef ZERO_FINAL
1237
1238 /* Check some basic limits. */
1239 if (k_cfg->numCol >= RF_MAXCOL || k_cfg->numCol < 0) {
1240 retcode = EINVAL;
1241 goto out;
1242 }
1243 if (k_cfg->numSpare >= RF_MAXSPARE || k_cfg->numSpare < 0) {
1244 retcode = EINVAL;
1245 goto out;
1246 }
1247
1248 /* configure the system */
1249
1250 /*
1251 * Clear the entire RAID descriptor, just to make sure
1252 * there is no stale data left in the case of a
1253 * reconfiguration
1254 */
1255 memset(raidPtr, 0, sizeof(*raidPtr));
1256 raidPtr->softc = rs;
1257 raidPtr->raidid = rs->sc_unit;
1258
1259 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1260
1261 if (retcode == 0) {
1262 /* allow this many simultaneous IO's to
1263 this RAID device */
1264 raidPtr->openings = RAIDOUTSTANDING;
1265
1266 raidinit(rs);
1267 raid_wakeup(raidPtr);
1268 rf_markalldirty(raidPtr);
1269 }
1270
1271 /* free the buffers. No return code here. */
1272 if (k_cfg->layoutSpecificSize) {
1273 RF_Free(k_cfg->layoutSpecific, k_cfg->layoutSpecificSize);
1274 }
1275 out:
1276 RF_Free(k_cfg, sizeof(RF_Config_t));
1277 if (retcode) {
1278 /*
1279 * If configuration failed, set sc_flags so that we
1280 * will detach the device when we close it.
1281 */
1282 rs->sc_flags |= RAIDF_SHUTDOWN;
1283 }
1284 return retcode;
1285 }
1286
1287 #if RF_DISABLED
1288 static int
1289 rf_set_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
1290 {
1291
1292 /* XXX check the label for valid stuff... */
1293 /* Note that some things *should not* get modified --
1294 the user should be re-initing the labels instead of
1295 trying to patch things.
1296 */
1297 #ifdef DEBUG
1298 int raidid = raidPtr->raidid;
1299 printf("raid%d: Got component label:\n", raidid);
1300 printf("raid%d: Version: %d\n", raidid, clabel->version);
1301 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1302 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1303 printf("raid%d: Column: %d\n", raidid, clabel->column);
1304 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1305 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1306 printf("raid%d: Status: %d\n", raidid, clabel->status);
1307 #endif /* DEBUG */
1308 clabel->row = 0;
1309 int column = clabel->column;
1310
1311 if ((column < 0) || (column >= raidPtr->numCol)) {
1312 return(EINVAL);
1313 }
1314
1315 /* XXX this isn't allowed to do anything for now :-) */
1316
1317 /* XXX and before it is, we need to fill in the rest
1318 of the fields!?!?!?! */
1319 memcpy(raidget_component_label(raidPtr, column),
1320 clabel, sizeof(*clabel));
1321 raidflush_component_label(raidPtr, column);
1322 return 0;
1323 }
1324 #endif
1325
1326 static int
1327 rf_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
1328 {
1329 /*
1330 we only want the serial number from
1331 the above. We get all the rest of the information
1332 from the config that was used to create this RAID
1333 set.
1334 */
1335
1336 raidPtr->serial_number = clabel->serial_number;
1337
1338 for (int column = 0; column < raidPtr->numCol; column++) {
1339 RF_RaidDisk_t *diskPtr = &raidPtr->Disks[column];
1340 if (RF_DEAD_DISK(diskPtr->status))
1341 continue;
1342 RF_ComponentLabel_t *ci_label = raidget_component_label(
1343 raidPtr, column);
1344 /* Zeroing this is important. */
1345 memset(ci_label, 0, sizeof(*ci_label));
1346 raid_init_component_label(raidPtr, ci_label);
1347 ci_label->serial_number = raidPtr->serial_number;
1348 ci_label->row = 0; /* we dont' pretend to support more */
1349 rf_component_label_set_partitionsize(ci_label,
1350 diskPtr->partitionSize);
1351 ci_label->column = column;
1352 raidflush_component_label(raidPtr, column);
1353 /* XXXjld what about the spares? */
1354 }
1355
1356 return 0;
1357 }
1358
1359 static int
1360 rf_rebuild_in_place(RF_Raid_t *raidPtr, RF_SingleComponent_t *componentPtr)
1361 {
1362
1363 if (raidPtr->Layout.map->faultsTolerated == 0) {
1364 /* Can't do this on a RAID 0!! */
1365 return EINVAL;
1366 }
1367
1368 if (raidPtr->recon_in_progress == 1) {
1369 /* a reconstruct is already in progress! */
1370 return EINVAL;
1371 }
1372
1373 RF_SingleComponent_t component;
1374 memcpy(&component, componentPtr, sizeof(RF_SingleComponent_t));
1375 component.row = 0; /* we don't support any more */
1376 int column = component.column;
1377
1378 if ((column < 0) || (column >= raidPtr->numCol)) {
1379 return EINVAL;
1380 }
1381
1382 rf_lock_mutex2(raidPtr->mutex);
1383 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1384 (raidPtr->numFailures > 0)) {
1385 /* XXX 0 above shouldn't be constant!!! */
1386 /* some component other than this has failed.
1387 Let's not make things worse than they already
1388 are... */
1389 printf("raid%d: Unable to reconstruct to disk at:\n",
1390 raidPtr->raidid);
1391 printf("raid%d: Col: %d Too many failures.\n",
1392 raidPtr->raidid, column);
1393 rf_unlock_mutex2(raidPtr->mutex);
1394 return EINVAL;
1395 }
1396
1397 if (raidPtr->Disks[column].status == rf_ds_reconstructing) {
1398 printf("raid%d: Unable to reconstruct to disk at:\n",
1399 raidPtr->raidid);
1400 printf("raid%d: Col: %d "
1401 "Reconstruction already occurring!\n",
1402 raidPtr->raidid, column);
1403
1404 rf_unlock_mutex2(raidPtr->mutex);
1405 return EINVAL;
1406 }
1407
1408 if (raidPtr->Disks[column].status == rf_ds_spared) {
1409 rf_unlock_mutex2(raidPtr->mutex);
1410 return EINVAL;
1411 }
1412
1413 rf_unlock_mutex2(raidPtr->mutex);
1414
1415 struct rf_recon_req_internal *rrint;
1416 rrint = RF_Malloc(sizeof(*rrint));
1417 if (rrint == NULL)
1418 return ENOMEM;
1419
1420 rrint->col = column;
1421 rrint->raidPtr = raidPtr;
1422
1423 return RF_CREATE_THREAD(raidPtr->recon_thread,
1424 rf_ReconstructInPlaceThread, rrint, "raid_reconip");
1425 }
1426
1427 static int
1428 rf_check_recon_status(RF_Raid_t *raidPtr, int *data)
1429 {
1430 /*
1431 * This makes no sense on a RAID 0, or if we are not reconstructing
1432 * so tell the user it's done.
1433 */
1434 if (raidPtr->Layout.map->faultsTolerated == 0 ||
1435 raidPtr->status != rf_rs_reconstructing) {
1436 *data = 100;
1437 return 0;
1438 }
1439 if (raidPtr->reconControl->numRUsTotal == 0) {
1440 *data = 0;
1441 return 0;
1442 }
1443 *data = (raidPtr->reconControl->numRUsComplete * 100
1444 / raidPtr->reconControl->numRUsTotal);
1445 return 0;
1446 }
1447
1448 /*
1449 * Copy a RF_SingleComponent_t from 'data', ensuring nul-termination
1450 * on the component_name[] array.
1451 */
1452 static void
1453 rf_copy_single_component(RF_SingleComponent_t *component, void *data)
1454 {
1455
1456 memcpy(component, data, sizeof *component);
1457 component->component_name[sizeof(component->component_name) - 1] = '\0';
1458 }
1459
1460 static int
1461 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1462 {
1463 int unit = raidunit(dev);
1464 int part, pmask;
1465 struct raid_softc *rs;
1466 struct dk_softc *dksc;
1467 RF_Config_t *k_cfg;
1468 RF_Raid_t *raidPtr;
1469 RF_AccTotals_t *totals;
1470 RF_SingleComponent_t component;
1471 RF_DeviceConfig_t *d_cfg, *ucfgp;
1472 int retcode = 0;
1473 int column;
1474 RF_ComponentLabel_t *clabel;
1475 int d;
1476
1477 if ((rs = raidget(unit, false)) == NULL)
1478 return ENXIO;
1479
1480 dksc = &rs->sc_dksc;
1481 raidPtr = &rs->sc_r;
1482
1483 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1484 (int) DISKPART(dev), (int) unit, cmd));
1485
1486 /* Only CONFIGURE and RESCAN can be done without the RAID being initialized. */
1487 switch (cmd) {
1488 case RAIDFRAME_CONFIGURE:
1489 case RAIDFRAME_RESCAN:
1490 break;
1491 default:
1492 if (!rf_inited(rs))
1493 return ENXIO;
1494 }
1495
1496 switch (cmd) {
1497 /* configure the system */
1498 case RAIDFRAME_CONFIGURE:
1499 if ((retcode = rf_getConfiguration(rs, data, &k_cfg)) != 0)
1500 return retcode;
1501 return rf_construct(rs, k_cfg);
1502
1503 /* shutdown the system */
1504 case RAIDFRAME_SHUTDOWN:
1505
1506 part = DISKPART(dev);
1507 pmask = (1 << part);
1508
1509 if ((retcode = raidlock(rs)) != 0)
1510 return retcode;
1511
1512 if (DK_BUSY(dksc, pmask) ||
1513 raidPtr->recon_in_progress != 0 ||
1514 raidPtr->parity_rewrite_in_progress != 0 ||
1515 raidPtr->copyback_in_progress != 0)
1516 retcode = EBUSY;
1517 else {
1518 /* detach and free on close */
1519 rs->sc_flags |= RAIDF_SHUTDOWN;
1520 retcode = 0;
1521 }
1522
1523 raidunlock(rs);
1524
1525 return retcode;
1526 case RAIDFRAME_GET_COMPONENT_LABEL:
1527 return rf_get_component_label(raidPtr, data);
1528
1529 #if RF_DISABLED
1530 case RAIDFRAME_SET_COMPONENT_LABEL:
1531 return rf_set_component_label(raidPtr, data);
1532 #endif
1533
1534 case RAIDFRAME_INIT_LABELS:
1535 return rf_init_component_label(raidPtr, data);
1536
1537 case RAIDFRAME_SET_AUTOCONFIG:
1538 d = rf_set_autoconfig(raidPtr, *(int *) data);
1539 printf("raid%d: New autoconfig value is: %d\n",
1540 raidPtr->raidid, d);
1541 *(int *) data = d;
1542 return retcode;
1543
1544 case RAIDFRAME_SET_ROOT:
1545 d = rf_set_rootpartition(raidPtr, *(int *) data);
1546 printf("raid%d: New rootpartition value is: %d\n",
1547 raidPtr->raidid, d);
1548 *(int *) data = d;
1549 return retcode;
1550
1551 /* initialize all parity */
1552 case RAIDFRAME_REWRITEPARITY:
1553
1554 if (raidPtr->Layout.map->faultsTolerated == 0) {
1555 /* Parity for RAID 0 is trivially correct */
1556 raidPtr->parity_good = RF_RAID_CLEAN;
1557 return 0;
1558 }
1559
1560 if (raidPtr->parity_rewrite_in_progress == 1) {
1561 /* Re-write is already in progress! */
1562 return EINVAL;
1563 }
1564
1565 return RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1566 rf_RewriteParityThread, raidPtr,"raid_parity");
1567
1568 case RAIDFRAME_ADD_HOT_SPARE:
1569 rf_copy_single_component(&component, data);
1570 return rf_add_hot_spare(raidPtr, &component);
1571
1572 /* Remove a non hot-spare component, never implemented in userland */
1573 case RAIDFRAME_DELETE_COMPONENT:
1574 rf_copy_single_component(&component, data);
1575 return rf_delete_component(raidPtr, &component);
1576
1577 case RAIDFRAME_REMOVE_COMPONENT:
1578 rf_copy_single_component(&component, data);
1579 return rf_remove_component(raidPtr, &component);
1580
1581 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1582 rf_copy_single_component(&component, data);
1583 return rf_incorporate_hot_spare(raidPtr, &component);
1584
1585 case RAIDFRAME_REBUILD_IN_PLACE:
1586 return rf_rebuild_in_place(raidPtr, data);
1587
1588 case RAIDFRAME_GET_INFO:
1589 ucfgp = *(RF_DeviceConfig_t **)data;
1590 d_cfg = RF_Malloc(sizeof(*d_cfg));
1591 if (d_cfg == NULL)
1592 return ENOMEM;
1593 retcode = rf_get_info(raidPtr, d_cfg);
1594 if (retcode == 0) {
1595 retcode = copyout(d_cfg, ucfgp, sizeof(*d_cfg));
1596 }
1597 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1598 return retcode;
1599
1600 case RAIDFRAME_CHECK_PARITY:
1601 *(int *) data = raidPtr->parity_good;
1602 return 0;
1603
1604 case RAIDFRAME_PARITYMAP_STATUS:
1605 if (rf_paritymap_ineligible(raidPtr))
1606 return EINVAL;
1607 rf_paritymap_status(raidPtr->parity_map, data);
1608 return 0;
1609
1610 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1611 if (rf_paritymap_ineligible(raidPtr))
1612 return EINVAL;
1613 if (raidPtr->parity_map == NULL)
1614 return ENOENT; /* ??? */
1615 if (rf_paritymap_set_params(raidPtr->parity_map, data, 1) != 0)
1616 return EINVAL;
1617 return 0;
1618
1619 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1620 if (rf_paritymap_ineligible(raidPtr))
1621 return EINVAL;
1622 *(int *) data = rf_paritymap_get_disable(raidPtr);
1623 return 0;
1624
1625 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1626 if (rf_paritymap_ineligible(raidPtr))
1627 return EINVAL;
1628 rf_paritymap_set_disable(raidPtr, *(int *)data);
1629 /* XXX should errors be passed up? */
1630 return 0;
1631
1632 case RAIDFRAME_RESCAN:
1633 return rf_rescan();
1634
1635 case RAIDFRAME_RESET_ACCTOTALS:
1636 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1637 return 0;
1638
1639 case RAIDFRAME_GET_ACCTOTALS:
1640 totals = (RF_AccTotals_t *) data;
1641 *totals = raidPtr->acc_totals;
1642 return 0;
1643
1644 case RAIDFRAME_KEEP_ACCTOTALS:
1645 raidPtr->keep_acc_totals = *(int *)data;
1646 return 0;
1647
1648 case RAIDFRAME_GET_SIZE:
1649 *(int *) data = raidPtr->totalSectors;
1650 return 0;
1651
1652 case RAIDFRAME_FAIL_DISK:
1653 return rf_fail_disk(raidPtr, data);
1654
1655 /* invoke a copyback operation after recon on whatever disk
1656 * needs it, if any */
1657 case RAIDFRAME_COPYBACK:
1658
1659 if (raidPtr->Layout.map->faultsTolerated == 0) {
1660 /* This makes no sense on a RAID 0!! */
1661 return EINVAL;
1662 }
1663
1664 if (raidPtr->copyback_in_progress == 1) {
1665 /* Copyback is already in progress! */
1666 return EINVAL;
1667 }
1668
1669 return RF_CREATE_THREAD(raidPtr->copyback_thread,
1670 rf_CopybackThread, raidPtr, "raid_copyback");
1671
1672 /* return the percentage completion of reconstruction */
1673 case RAIDFRAME_CHECK_RECON_STATUS:
1674 return rf_check_recon_status(raidPtr, data);
1675
1676 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1677 rf_check_recon_status_ext(raidPtr, data);
1678 return 0;
1679
1680 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1681 if (raidPtr->Layout.map->faultsTolerated == 0) {
1682 /* This makes no sense on a RAID 0, so tell the
1683 user it's done. */
1684 *(int *) data = 100;
1685 return 0;
1686 }
1687 if (raidPtr->parity_rewrite_in_progress == 1) {
1688 *(int *) data = 100 *
1689 raidPtr->parity_rewrite_stripes_done /
1690 raidPtr->Layout.numStripe;
1691 } else {
1692 *(int *) data = 100;
1693 }
1694 return 0;
1695
1696 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1697 rf_check_parityrewrite_status_ext(raidPtr, data);
1698 return 0;
1699
1700 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1701 if (raidPtr->Layout.map->faultsTolerated == 0) {
1702 /* This makes no sense on a RAID 0 */
1703 *(int *) data = 100;
1704 return 0;
1705 }
1706 if (raidPtr->copyback_in_progress == 1) {
1707 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1708 raidPtr->Layout.numStripe;
1709 } else {
1710 *(int *) data = 100;
1711 }
1712 return 0;
1713
1714 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1715 rf_check_copyback_status_ext(raidPtr, data);
1716 return 0;
1717
1718 case RAIDFRAME_SET_LAST_UNIT:
1719 for (column = 0; column < raidPtr->numCol; column++)
1720 if (raidPtr->Disks[column].status != rf_ds_optimal)
1721 return EBUSY;
1722
1723 for (column = 0; column < raidPtr->numCol; column++) {
1724 clabel = raidget_component_label(raidPtr, column);
1725 clabel->last_unit = *(int *)data;
1726 raidflush_component_label(raidPtr, column);
1727 }
1728 rs->sc_cflags |= RAIDF_UNIT_CHANGED;
1729 return 0;
1730
1731 /* the sparetable daemon calls this to wait for the kernel to
1732 * need a spare table. this ioctl does not return until a
1733 * spare table is needed. XXX -- calling mpsleep here in the
1734 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1735 * -- I should either compute the spare table in the kernel,
1736 * or have a different -- XXX XXX -- interface (a different
1737 * character device) for delivering the table -- XXX */
1738 #if RF_DISABLED
1739 case RAIDFRAME_SPARET_WAIT:
1740 rf_lock_mutex2(rf_sparet_wait_mutex);
1741 while (!rf_sparet_wait_queue)
1742 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1743 RF_SparetWait_t *waitreq = rf_sparet_wait_queue;
1744 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1745 rf_unlock_mutex2(rf_sparet_wait_mutex);
1746
1747 /* structure assignment */
1748 *((RF_SparetWait_t *) data) = *waitreq;
1749
1750 RF_Free(waitreq, sizeof(*waitreq));
1751 return 0;
1752
1753 /* wakes up a process waiting on SPARET_WAIT and puts an error
1754 * code in it that will cause the dameon to exit */
1755 case RAIDFRAME_ABORT_SPARET_WAIT:
1756 waitreq = RF_Malloc(sizeof(*waitreq));
1757 waitreq->fcol = -1;
1758 rf_lock_mutex2(rf_sparet_wait_mutex);
1759 waitreq->next = rf_sparet_wait_queue;
1760 rf_sparet_wait_queue = waitreq;
1761 rf_broadcast_cond2(rf_sparet_wait_cv);
1762 rf_unlock_mutex2(rf_sparet_wait_mutex);
1763 return 0;
1764
1765 /* used by the spare table daemon to deliver a spare table
1766 * into the kernel */
1767 case RAIDFRAME_SEND_SPARET:
1768
1769 /* install the spare table */
1770 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1771
1772 /* respond to the requestor. the return status of the spare
1773 * table installation is passed in the "fcol" field */
1774 waitred = RF_Malloc(sizeof(*waitreq));
1775 waitreq->fcol = retcode;
1776 rf_lock_mutex2(rf_sparet_wait_mutex);
1777 waitreq->next = rf_sparet_resp_queue;
1778 rf_sparet_resp_queue = waitreq;
1779 rf_broadcast_cond2(rf_sparet_resp_cv);
1780 rf_unlock_mutex2(rf_sparet_wait_mutex);
1781
1782 return retcode;
1783 #endif
1784 default:
1785 /*
1786 * Don't bother trying to load compat modules
1787 * if it is not our ioctl. This is more efficient
1788 * and makes rump tests not depend on compat code
1789 */
1790 if (IOCGROUP(cmd) != 'r')
1791 break;
1792 #ifdef _LP64
1793 if ((l->l_proc->p_flag & PK_32) != 0) {
1794 module_autoload("compat_netbsd32_raid",
1795 MODULE_CLASS_EXEC);
1796 MODULE_HOOK_CALL(raidframe_netbsd32_ioctl_hook,
1797 (rs, cmd, data), enosys(), retcode);
1798 if (retcode != EPASSTHROUGH)
1799 return retcode;
1800 }
1801 #endif
1802 module_autoload("compat_raid_80", MODULE_CLASS_EXEC);
1803 MODULE_HOOK_CALL(raidframe_ioctl_80_hook,
1804 (rs, cmd, data), enosys(), retcode);
1805 if (retcode != EPASSTHROUGH)
1806 return retcode;
1807
1808 module_autoload("compat_raid_50", MODULE_CLASS_EXEC);
1809 MODULE_HOOK_CALL(raidframe_ioctl_50_hook,
1810 (rs, cmd, data), enosys(), retcode);
1811 if (retcode != EPASSTHROUGH)
1812 return retcode;
1813 break; /* fall through to the os-specific code below */
1814
1815 }
1816
1817 if (!raidPtr->valid)
1818 return EINVAL;
1819
1820 /*
1821 * Add support for "regular" device ioctls here.
1822 */
1823
1824 switch (cmd) {
1825 case DIOCGCACHE:
1826 retcode = rf_get_component_caches(raidPtr, (int *)data);
1827 break;
1828
1829 case DIOCCACHESYNC:
1830 retcode = rf_sync_component_caches(raidPtr, *(int *)data);
1831 break;
1832
1833 default:
1834 retcode = dk_ioctl(dksc, dev, cmd, data, flag, l);
1835 break;
1836 }
1837
1838 return retcode;
1839
1840 }
1841
1842
1843 /* raidinit -- complete the rest of the initialization for the
1844 RAIDframe device. */
1845
1846
1847 static void
1848 raidinit(struct raid_softc *rs)
1849 {
1850 cfdata_t cf;
1851 unsigned int unit;
1852 struct dk_softc *dksc = &rs->sc_dksc;
1853 RF_Raid_t *raidPtr = &rs->sc_r;
1854 device_t dev;
1855
1856 unit = raidPtr->raidid;
1857
1858 /* XXX doesn't check bounds. */
1859 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%u", unit);
1860
1861 /* attach the pseudo device */
1862 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1863 cf->cf_name = raid_cd.cd_name;
1864 cf->cf_atname = raid_cd.cd_name;
1865 cf->cf_unit = unit;
1866 cf->cf_fstate = FSTATE_STAR;
1867
1868 dev = config_attach_pseudo(cf);
1869 if (dev == NULL) {
1870 printf("raid%d: config_attach_pseudo failed\n",
1871 raidPtr->raidid);
1872 free(cf, M_RAIDFRAME);
1873 return;
1874 }
1875
1876 /* provide a backpointer to the real softc */
1877 raidsoftc(dev) = rs;
1878
1879 /* disk_attach actually creates space for the CPU disklabel, among
1880 * other things, so it's critical to call this *BEFORE* we try putzing
1881 * with disklabels. */
1882 dk_init(dksc, dev, DKTYPE_RAID);
1883 disk_init(&dksc->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1884
1885 /* XXX There may be a weird interaction here between this, and
1886 * protectedSectors, as used in RAIDframe. */
1887
1888 rs->sc_size = raidPtr->totalSectors;
1889
1890 /* Attach dk and disk subsystems */
1891 dk_attach(dksc);
1892 disk_attach(&dksc->sc_dkdev);
1893 rf_set_geometry(rs, raidPtr);
1894
1895 bufq_alloc(&dksc->sc_bufq, "fcfs", BUFQ_SORT_RAWBLOCK);
1896
1897 /* mark unit as usuable */
1898 rs->sc_flags |= RAIDF_INITED;
1899
1900 dkwedge_discover(&dksc->sc_dkdev);
1901 }
1902
1903 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1904 /* wake up the daemon & tell it to get us a spare table
1905 * XXX
1906 * the entries in the queues should be tagged with the raidPtr
1907 * so that in the extremely rare case that two recons happen at once,
1908 * we know for which device were requesting a spare table
1909 * XXX
1910 *
1911 * XXX This code is not currently used. GO
1912 */
1913 int
1914 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1915 {
1916 int retcode;
1917
1918 rf_lock_mutex2(rf_sparet_wait_mutex);
1919 req->next = rf_sparet_wait_queue;
1920 rf_sparet_wait_queue = req;
1921 rf_broadcast_cond2(rf_sparet_wait_cv);
1922
1923 /* mpsleep unlocks the mutex */
1924 while (!rf_sparet_resp_queue) {
1925 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
1926 }
1927 req = rf_sparet_resp_queue;
1928 rf_sparet_resp_queue = req->next;
1929 rf_unlock_mutex2(rf_sparet_wait_mutex);
1930
1931 retcode = req->fcol;
1932 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1933 * alloc'd */
1934 return retcode;
1935 }
1936 #endif
1937
1938 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1939 * bp & passes it down.
1940 * any calls originating in the kernel must use non-blocking I/O
1941 * do some extra sanity checking to return "appropriate" error values for
1942 * certain conditions (to make some standard utilities work)
1943 *
1944 * Formerly known as: rf_DoAccessKernel
1945 */
1946 void
1947 raidstart(RF_Raid_t *raidPtr)
1948 {
1949 struct raid_softc *rs;
1950 struct dk_softc *dksc;
1951
1952 rs = raidPtr->softc;
1953 dksc = &rs->sc_dksc;
1954 /* quick check to see if anything has died recently */
1955 rf_lock_mutex2(raidPtr->mutex);
1956 if (raidPtr->numNewFailures > 0) {
1957 rf_unlock_mutex2(raidPtr->mutex);
1958 rf_update_component_labels(raidPtr,
1959 RF_NORMAL_COMPONENT_UPDATE);
1960 rf_lock_mutex2(raidPtr->mutex);
1961 raidPtr->numNewFailures--;
1962 }
1963 rf_unlock_mutex2(raidPtr->mutex);
1964
1965 if ((rs->sc_flags & RAIDF_INITED) == 0) {
1966 printf("raid%d: raidstart not ready\n", raidPtr->raidid);
1967 return;
1968 }
1969
1970 dk_start(dksc, NULL);
1971 }
1972
1973 static int
1974 raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp)
1975 {
1976 RF_SectorCount_t num_blocks, pb, sum;
1977 RF_RaidAddr_t raid_addr;
1978 daddr_t blocknum;
1979 int rc;
1980
1981 rf_lock_mutex2(raidPtr->mutex);
1982 if (raidPtr->openings == 0) {
1983 rf_unlock_mutex2(raidPtr->mutex);
1984 return EAGAIN;
1985 }
1986 rf_unlock_mutex2(raidPtr->mutex);
1987
1988 blocknum = bp->b_rawblkno;
1989
1990 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1991 (int) blocknum));
1992
1993 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1994 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1995
1996 /* *THIS* is where we adjust what block we're going to...
1997 * but DO NOT TOUCH bp->b_blkno!!! */
1998 raid_addr = blocknum;
1999
2000 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
2001 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
2002 sum = raid_addr + num_blocks + pb;
2003 if (1 || rf_debugKernelAccess) {
2004 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
2005 (int) raid_addr, (int) sum, (int) num_blocks,
2006 (int) pb, (int) bp->b_resid));
2007 }
2008 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
2009 || (sum < num_blocks) || (sum < pb)) {
2010 rc = ENOSPC;
2011 goto done;
2012 }
2013 /*
2014 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
2015 */
2016
2017 if (bp->b_bcount & raidPtr->sectorMask) {
2018 rc = ENOSPC;
2019 goto done;
2020 }
2021 db1_printf(("Calling DoAccess..\n"));
2022
2023
2024 rf_lock_mutex2(raidPtr->mutex);
2025 raidPtr->openings--;
2026 rf_unlock_mutex2(raidPtr->mutex);
2027
2028 /* don't ever condition on bp->b_flags & B_WRITE.
2029 * always condition on B_READ instead */
2030
2031 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
2032 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
2033 raid_addr, num_blocks,
2034 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
2035
2036 done:
2037 return rc;
2038 }
2039
2040 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
2041
2042 int
2043 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
2044 {
2045 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2046 struct buf *bp;
2047
2048 req->queue = queue;
2049 bp = req->bp;
2050
2051 switch (req->type) {
2052 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2053 /* XXX need to do something extra here.. */
2054 /* I'm leaving this in, as I've never actually seen it used,
2055 * and I'd like folks to report it... GO */
2056 printf("%s: WAKEUP CALLED\n", __func__);
2057 queue->numOutstanding++;
2058
2059 bp->b_flags = 0;
2060 bp->b_private = req;
2061
2062 KernelWakeupFunc(bp);
2063 break;
2064
2065 case RF_IO_TYPE_READ:
2066 case RF_IO_TYPE_WRITE:
2067 #if RF_ACC_TRACE > 0
2068 if (req->tracerec) {
2069 RF_ETIMER_START(req->tracerec->timer);
2070 }
2071 #endif
2072 InitBP(bp, queue->rf_cinfo->ci_vp,
2073 op, queue->rf_cinfo->ci_dev,
2074 req->sectorOffset, req->numSector,
2075 req->buf, KernelWakeupFunc, (void *) req,
2076 queue->raidPtr->logBytesPerSector);
2077
2078 if (rf_debugKernelAccess) {
2079 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2080 (long) bp->b_blkno));
2081 }
2082 queue->numOutstanding++;
2083 queue->last_deq_sector = req->sectorOffset;
2084 /* acc wouldn't have been let in if there were any pending
2085 * reqs at any other priority */
2086 queue->curPriority = req->priority;
2087
2088 db1_printf(("Going for %c to unit %d col %d\n",
2089 req->type, queue->raidPtr->raidid,
2090 queue->col));
2091 db1_printf(("sector %d count %d (%d bytes) %d\n",
2092 (int) req->sectorOffset, (int) req->numSector,
2093 (int) (req->numSector <<
2094 queue->raidPtr->logBytesPerSector),
2095 (int) queue->raidPtr->logBytesPerSector));
2096
2097 /*
2098 * XXX: drop lock here since this can block at
2099 * least with backing SCSI devices. Retake it
2100 * to minimize fuss with calling interfaces.
2101 */
2102
2103 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2104 bdev_strategy(bp);
2105 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2106 break;
2107
2108 default:
2109 panic("bad req->type in rf_DispatchKernelIO");
2110 }
2111 db1_printf(("Exiting from DispatchKernelIO\n"));
2112
2113 return 0;
2114 }
2115 /* this is the callback function associated with a I/O invoked from
2116 kernel code.
2117 */
2118 static void
2119 KernelWakeupFunc(struct buf *bp)
2120 {
2121 RF_DiskQueueData_t *req = NULL;
2122 RF_DiskQueue_t *queue;
2123
2124 db1_printf(("recovering the request queue:\n"));
2125
2126 req = bp->b_private;
2127
2128 queue = (RF_DiskQueue_t *) req->queue;
2129
2130 rf_lock_mutex2(queue->raidPtr->iodone_lock);
2131
2132 #if RF_ACC_TRACE > 0
2133 if (req->tracerec) {
2134 RF_ETIMER_STOP(req->tracerec->timer);
2135 RF_ETIMER_EVAL(req->tracerec->timer);
2136 rf_lock_mutex2(rf_tracing_mutex);
2137 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2138 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2139 req->tracerec->num_phys_ios++;
2140 rf_unlock_mutex2(rf_tracing_mutex);
2141 }
2142 #endif
2143
2144 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2145 * ballistic, and mark the component as hosed... */
2146
2147 if (bp->b_error != 0) {
2148 /* Mark the disk as dead */
2149 /* but only mark it once... */
2150 /* and only if it wouldn't leave this RAID set
2151 completely broken */
2152 if (((queue->raidPtr->Disks[queue->col].status ==
2153 rf_ds_optimal) ||
2154 (queue->raidPtr->Disks[queue->col].status ==
2155 rf_ds_used_spare)) &&
2156 (queue->raidPtr->numFailures <
2157 queue->raidPtr->Layout.map->faultsTolerated)) {
2158 printf("raid%d: IO Error (%d). Marking %s as failed.\n",
2159 queue->raidPtr->raidid,
2160 bp->b_error,
2161 queue->raidPtr->Disks[queue->col].devname);
2162 queue->raidPtr->Disks[queue->col].status =
2163 rf_ds_failed;
2164 queue->raidPtr->status = rf_rs_degraded;
2165 queue->raidPtr->numFailures++;
2166 queue->raidPtr->numNewFailures++;
2167 } else { /* Disk is already dead... */
2168 /* printf("Disk already marked as dead!\n"); */
2169 }
2170
2171 }
2172
2173 /* Fill in the error value */
2174 req->error = bp->b_error;
2175
2176 /* Drop this one on the "finished" queue... */
2177 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2178
2179 /* Let the raidio thread know there is work to be done. */
2180 rf_signal_cond2(queue->raidPtr->iodone_cv);
2181
2182 rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2183 }
2184
2185
2186 /*
2187 * initialize a buf structure for doing an I/O in the kernel.
2188 */
2189 static void
2190 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2191 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2192 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector)
2193 {
2194 bp->b_flags = rw_flag | (bp->b_flags & rf_b_pass);
2195 bp->b_oflags = 0;
2196 bp->b_cflags = 0;
2197 bp->b_bcount = numSect << logBytesPerSector;
2198 bp->b_bufsize = bp->b_bcount;
2199 bp->b_error = 0;
2200 bp->b_dev = dev;
2201 bp->b_data = bf;
2202 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2203 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2204 if (bp->b_bcount == 0) {
2205 panic("bp->b_bcount is zero in InitBP!!");
2206 }
2207 bp->b_iodone = cbFunc;
2208 bp->b_private = cbArg;
2209 }
2210
2211 /*
2212 * Wait interruptibly for an exclusive lock.
2213 *
2214 * XXX
2215 * Several drivers do this; it should be abstracted and made MP-safe.
2216 * (Hmm... where have we seen this warning before :-> GO )
2217 */
2218 static int
2219 raidlock(struct raid_softc *rs)
2220 {
2221 int error;
2222
2223 error = 0;
2224 mutex_enter(&rs->sc_mutex);
2225 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2226 rs->sc_flags |= RAIDF_WANTED;
2227 error = cv_wait_sig(&rs->sc_cv, &rs->sc_mutex);
2228 if (error != 0)
2229 goto done;
2230 }
2231 rs->sc_flags |= RAIDF_LOCKED;
2232 done:
2233 mutex_exit(&rs->sc_mutex);
2234 return error;
2235 }
2236 /*
2237 * Unlock and wake up any waiters.
2238 */
2239 static void
2240 raidunlock(struct raid_softc *rs)
2241 {
2242
2243 mutex_enter(&rs->sc_mutex);
2244 rs->sc_flags &= ~RAIDF_LOCKED;
2245 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2246 rs->sc_flags &= ~RAIDF_WANTED;
2247 cv_broadcast(&rs->sc_cv);
2248 }
2249 mutex_exit(&rs->sc_mutex);
2250 }
2251
2252
2253 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2254 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2255 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2256
2257 static daddr_t
2258 rf_component_info_offset(void)
2259 {
2260
2261 return RF_COMPONENT_INFO_OFFSET;
2262 }
2263
2264 static daddr_t
2265 rf_component_info_size(unsigned secsize)
2266 {
2267 daddr_t info_size;
2268
2269 KASSERT(secsize);
2270 if (secsize > RF_COMPONENT_INFO_SIZE)
2271 info_size = secsize;
2272 else
2273 info_size = RF_COMPONENT_INFO_SIZE;
2274
2275 return info_size;
2276 }
2277
2278 static daddr_t
2279 rf_parity_map_offset(RF_Raid_t *raidPtr)
2280 {
2281 daddr_t map_offset;
2282
2283 KASSERT(raidPtr->bytesPerSector);
2284 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2285 map_offset = raidPtr->bytesPerSector;
2286 else
2287 map_offset = RF_COMPONENT_INFO_SIZE;
2288 map_offset += rf_component_info_offset();
2289
2290 return map_offset;
2291 }
2292
2293 static daddr_t
2294 rf_parity_map_size(RF_Raid_t *raidPtr)
2295 {
2296 daddr_t map_size;
2297
2298 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2299 map_size = raidPtr->bytesPerSector;
2300 else
2301 map_size = RF_PARITY_MAP_SIZE;
2302
2303 return map_size;
2304 }
2305
2306 int
2307 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2308 {
2309 RF_ComponentLabel_t *clabel;
2310
2311 clabel = raidget_component_label(raidPtr, col);
2312 clabel->clean = RF_RAID_CLEAN;
2313 raidflush_component_label(raidPtr, col);
2314 return(0);
2315 }
2316
2317
2318 int
2319 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2320 {
2321 RF_ComponentLabel_t *clabel;
2322
2323 clabel = raidget_component_label(raidPtr, col);
2324 clabel->clean = RF_RAID_DIRTY;
2325 raidflush_component_label(raidPtr, col);
2326 return(0);
2327 }
2328
2329 int
2330 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2331 {
2332 KASSERT(raidPtr->bytesPerSector);
2333
2334 return raidread_component_label(raidPtr->bytesPerSector,
2335 raidPtr->Disks[col].dev,
2336 raidPtr->raid_cinfo[col].ci_vp,
2337 &raidPtr->raid_cinfo[col].ci_label);
2338 }
2339
2340 RF_ComponentLabel_t *
2341 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2342 {
2343 return &raidPtr->raid_cinfo[col].ci_label;
2344 }
2345
2346 int
2347 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2348 {
2349 RF_ComponentLabel_t *label;
2350
2351 label = &raidPtr->raid_cinfo[col].ci_label;
2352 label->mod_counter = raidPtr->mod_counter;
2353 #ifndef RF_NO_PARITY_MAP
2354 label->parity_map_modcount = label->mod_counter;
2355 #endif
2356 return raidwrite_component_label(raidPtr->bytesPerSector,
2357 raidPtr->Disks[col].dev,
2358 raidPtr->raid_cinfo[col].ci_vp, label);
2359 }
2360
2361 /*
2362 * Swap the label endianness.
2363 *
2364 * Everything in the component label is 4-byte-swapped except the version,
2365 * which is kept in the byte-swapped version at all times, and indicates
2366 * for the writer that a swap is necessary.
2367 *
2368 * For reads it is expected that out_label == clabel, but writes expect
2369 * separate labels so only the re-swapped label is written out to disk,
2370 * leaving the swapped-except-version internally.
2371 *
2372 * Only support swapping label version 2.
2373 */
2374 static void
2375 rf_swap_label(RF_ComponentLabel_t *clabel, RF_ComponentLabel_t *out_label)
2376 {
2377 int *in, *out, *in_last;
2378
2379 KASSERT(clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION));
2380
2381 /* Don't swap the label, but do copy it. */
2382 out_label->version = clabel->version;
2383
2384 in = &clabel->serial_number;
2385 in_last = &clabel->future_use2[42];
2386 out = &out_label->serial_number;
2387
2388 for (; in < in_last; in++, out++)
2389 *out = bswap32(*in);
2390 }
2391
2392 static int
2393 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2394 RF_ComponentLabel_t *clabel)
2395 {
2396 int error;
2397
2398 error = raidread_component_area(dev, b_vp, clabel,
2399 sizeof(RF_ComponentLabel_t),
2400 rf_component_info_offset(),
2401 rf_component_info_size(secsize));
2402
2403 if (error == 0 &&
2404 clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) {
2405 rf_swap_label(clabel, clabel);
2406 }
2407
2408 return error;
2409 }
2410
2411 /* ARGSUSED */
2412 static int
2413 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2414 size_t msize, daddr_t offset, daddr_t dsize)
2415 {
2416 struct buf *bp;
2417 int error;
2418
2419 /* XXX should probably ensure that we don't try to do this if
2420 someone has changed rf_protected_sectors. */
2421
2422 if (b_vp == NULL) {
2423 /* For whatever reason, this component is not valid.
2424 Don't try to read a component label from it. */
2425 return(EINVAL);
2426 }
2427
2428 /* get a block of the appropriate size... */
2429 bp = geteblk((int)dsize);
2430 bp->b_dev = dev;
2431
2432 /* get our ducks in a row for the read */
2433 bp->b_blkno = offset / DEV_BSIZE;
2434 bp->b_bcount = dsize;
2435 bp->b_flags |= B_READ;
2436 bp->b_resid = dsize;
2437
2438 bdev_strategy(bp);
2439 error = biowait(bp);
2440
2441 if (!error) {
2442 memcpy(data, bp->b_data, msize);
2443 }
2444
2445 brelse(bp, 0);
2446 return(error);
2447 }
2448
2449 static int
2450 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2451 RF_ComponentLabel_t *clabel)
2452 {
2453 RF_ComponentLabel_t *clabel_write = clabel;
2454 RF_ComponentLabel_t lclabel;
2455 int error;
2456
2457 if (clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) {
2458 clabel_write = &lclabel;
2459 rf_swap_label(clabel, clabel_write);
2460 }
2461 error = raidwrite_component_area(dev, b_vp, clabel_write,
2462 sizeof(RF_ComponentLabel_t),
2463 rf_component_info_offset(),
2464 rf_component_info_size(secsize));
2465
2466 return error;
2467 }
2468
2469 /* ARGSUSED */
2470 static int
2471 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2472 size_t msize, daddr_t offset, daddr_t dsize)
2473 {
2474 struct buf *bp;
2475 int error;
2476
2477 /* get a block of the appropriate size... */
2478 bp = geteblk((int)dsize);
2479 bp->b_dev = dev;
2480
2481 /* get our ducks in a row for the write */
2482 bp->b_blkno = offset / DEV_BSIZE;
2483 bp->b_bcount = dsize;
2484 bp->b_flags |= B_WRITE;
2485 bp->b_resid = dsize;
2486
2487 memset(bp->b_data, 0, dsize);
2488 memcpy(bp->b_data, data, msize);
2489
2490 bdev_strategy(bp);
2491 error = biowait(bp);
2492 brelse(bp, 0);
2493 if (error) {
2494 #if 1
2495 printf("Failed to write RAID component info!\n");
2496 #endif
2497 }
2498
2499 return(error);
2500 }
2501
2502 void
2503 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2504 {
2505 int c;
2506
2507 for (c = 0; c < raidPtr->numCol; c++) {
2508 /* Skip dead disks. */
2509 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2510 continue;
2511 /* XXXjld: what if an error occurs here? */
2512 raidwrite_component_area(raidPtr->Disks[c].dev,
2513 raidPtr->raid_cinfo[c].ci_vp, map,
2514 RF_PARITYMAP_NBYTE,
2515 rf_parity_map_offset(raidPtr),
2516 rf_parity_map_size(raidPtr));
2517 }
2518 }
2519
2520 void
2521 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2522 {
2523 struct rf_paritymap_ondisk tmp;
2524 int c,first;
2525
2526 first=1;
2527 for (c = 0; c < raidPtr->numCol; c++) {
2528 /* Skip dead disks. */
2529 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2530 continue;
2531 raidread_component_area(raidPtr->Disks[c].dev,
2532 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2533 RF_PARITYMAP_NBYTE,
2534 rf_parity_map_offset(raidPtr),
2535 rf_parity_map_size(raidPtr));
2536 if (first) {
2537 memcpy(map, &tmp, sizeof(*map));
2538 first = 0;
2539 } else {
2540 rf_paritymap_merge(map, &tmp);
2541 }
2542 }
2543 }
2544
2545 void
2546 rf_markalldirty(RF_Raid_t *raidPtr)
2547 {
2548 RF_ComponentLabel_t *clabel;
2549 int sparecol;
2550 int c;
2551 int j;
2552 int scol = -1;
2553
2554 raidPtr->mod_counter++;
2555 for (c = 0; c < raidPtr->numCol; c++) {
2556 /* we don't want to touch (at all) a disk that has
2557 failed */
2558 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2559 clabel = raidget_component_label(raidPtr, c);
2560 if (clabel->status == rf_ds_spared) {
2561 /* XXX do something special...
2562 but whatever you do, don't
2563 try to access it!! */
2564 } else {
2565 raidmarkdirty(raidPtr, c);
2566 }
2567 }
2568 }
2569
2570 for (c = 0; c < raidPtr->numSpare ; c++) {
2571 sparecol = raidPtr->numCol + c;
2572
2573 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2574 /*
2575
2576 we claim this disk is "optimal" if it's
2577 rf_ds_used_spare, as that means it should be
2578 directly substitutable for the disk it replaced.
2579 We note that too...
2580
2581 */
2582
2583 for(j=0;j<raidPtr->numCol;j++) {
2584 if (raidPtr->Disks[j].spareCol == sparecol) {
2585 scol = j;
2586 break;
2587 }
2588 }
2589
2590 clabel = raidget_component_label(raidPtr, sparecol);
2591 /* make sure status is noted */
2592
2593 raid_init_component_label(raidPtr, clabel);
2594
2595 clabel->row = 0;
2596 clabel->column = scol;
2597 /* Note: we *don't* change status from rf_ds_used_spare
2598 to rf_ds_optimal */
2599 /* clabel.status = rf_ds_optimal; */
2600
2601 raidmarkdirty(raidPtr, sparecol);
2602 }
2603 }
2604 }
2605
2606
2607 void
2608 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2609 {
2610 RF_ComponentLabel_t *clabel;
2611 int sparecol;
2612 int c;
2613 int j;
2614 int scol;
2615 struct raid_softc *rs = raidPtr->softc;
2616
2617 scol = -1;
2618
2619 /* XXX should do extra checks to make sure things really are clean,
2620 rather than blindly setting the clean bit... */
2621
2622 raidPtr->mod_counter++;
2623
2624 for (c = 0; c < raidPtr->numCol; c++) {
2625 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2626 clabel = raidget_component_label(raidPtr, c);
2627 /* make sure status is noted */
2628 clabel->status = rf_ds_optimal;
2629
2630 /* note what unit we are configured as */
2631 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2632 clabel->last_unit = raidPtr->raidid;
2633
2634 raidflush_component_label(raidPtr, c);
2635 if (final == RF_FINAL_COMPONENT_UPDATE) {
2636 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2637 raidmarkclean(raidPtr, c);
2638 }
2639 }
2640 }
2641 /* else we don't touch it.. */
2642 }
2643
2644 for (c = 0; c < raidPtr->numSpare ; c++) {
2645 sparecol = raidPtr->numCol + c;
2646
2647 /* Need to ensure that the reconstruct actually completed! */
2648 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2649 /*
2650
2651 we claim this disk is "optimal" if it's
2652 rf_ds_used_spare, as that means it should be
2653 directly substitutable for the disk it replaced.
2654 We note that too...
2655
2656 */
2657
2658 for(j=0;j<raidPtr->numCol;j++) {
2659 if (raidPtr->Disks[j].spareCol == sparecol) {
2660 scol = j;
2661 break;
2662 }
2663 }
2664
2665 /* XXX shouldn't *really* need this... */
2666 clabel = raidget_component_label(raidPtr, sparecol);
2667 /* make sure status is noted */
2668
2669 raid_init_component_label(raidPtr, clabel);
2670
2671 clabel->column = scol;
2672 clabel->status = rf_ds_optimal;
2673 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2674 clabel->last_unit = raidPtr->raidid;
2675
2676 raidflush_component_label(raidPtr, sparecol);
2677 if (final == RF_FINAL_COMPONENT_UPDATE) {
2678 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2679 raidmarkclean(raidPtr, sparecol);
2680 }
2681 }
2682 }
2683 }
2684 }
2685
2686 void
2687 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2688 {
2689
2690 if (vp != NULL) {
2691 if (auto_configured == 1) {
2692 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2693 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2694 vput(vp);
2695
2696 } else {
2697 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2698 }
2699 }
2700 }
2701
2702
2703 void
2704 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2705 {
2706 int r,c;
2707 struct vnode *vp;
2708 int acd;
2709
2710
2711 /* We take this opportunity to close the vnodes like we should.. */
2712
2713 for (c = 0; c < raidPtr->numCol; c++) {
2714 vp = raidPtr->raid_cinfo[c].ci_vp;
2715 acd = raidPtr->Disks[c].auto_configured;
2716 rf_close_component(raidPtr, vp, acd);
2717 raidPtr->raid_cinfo[c].ci_vp = NULL;
2718 raidPtr->Disks[c].auto_configured = 0;
2719 }
2720
2721 for (r = 0; r < raidPtr->numSpare; r++) {
2722 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2723 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2724 rf_close_component(raidPtr, vp, acd);
2725 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2726 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2727 }
2728 }
2729
2730
2731 static void
2732 rf_ReconThread(struct rf_recon_req_internal *req)
2733 {
2734 int s;
2735 RF_Raid_t *raidPtr;
2736
2737 s = splbio();
2738 raidPtr = (RF_Raid_t *) req->raidPtr;
2739 raidPtr->recon_in_progress = 1;
2740
2741 if (req->flags & RF_FDFLAGS_RECON_FORCE) {
2742 raidPtr->forceRecon = 1;
2743 }
2744
2745 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2746 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2747
2748 if (req->flags & RF_FDFLAGS_RECON_FORCE) {
2749 raidPtr->forceRecon = 0;
2750 }
2751
2752 RF_Free(req, sizeof(*req));
2753
2754 raidPtr->recon_in_progress = 0;
2755 splx(s);
2756
2757 /* That's all... */
2758 kthread_exit(0); /* does not return */
2759 }
2760
2761 static void
2762 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2763 {
2764 int retcode;
2765 int s;
2766
2767 raidPtr->parity_rewrite_stripes_done = 0;
2768 raidPtr->parity_rewrite_in_progress = 1;
2769 s = splbio();
2770 retcode = rf_RewriteParity(raidPtr);
2771 splx(s);
2772 if (retcode) {
2773 printf("raid%d: Error re-writing parity (%d)!\n",
2774 raidPtr->raidid, retcode);
2775 } else {
2776 /* set the clean bit! If we shutdown correctly,
2777 the clean bit on each component label will get
2778 set */
2779 raidPtr->parity_good = RF_RAID_CLEAN;
2780 }
2781 raidPtr->parity_rewrite_in_progress = 0;
2782
2783 /* Anyone waiting for us to stop? If so, inform them... */
2784 if (raidPtr->waitShutdown) {
2785 rf_lock_mutex2(raidPtr->rad_lock);
2786 cv_broadcast(&raidPtr->parity_rewrite_cv);
2787 rf_unlock_mutex2(raidPtr->rad_lock);
2788 }
2789
2790 /* That's all... */
2791 kthread_exit(0); /* does not return */
2792 }
2793
2794
2795 static void
2796 rf_CopybackThread(RF_Raid_t *raidPtr)
2797 {
2798 int s;
2799
2800 raidPtr->copyback_in_progress = 1;
2801 s = splbio();
2802 rf_CopybackReconstructedData(raidPtr);
2803 splx(s);
2804 raidPtr->copyback_in_progress = 0;
2805
2806 /* That's all... */
2807 kthread_exit(0); /* does not return */
2808 }
2809
2810
2811 static void
2812 rf_ReconstructInPlaceThread(struct rf_recon_req_internal *req)
2813 {
2814 int s;
2815 RF_Raid_t *raidPtr;
2816
2817 s = splbio();
2818 raidPtr = req->raidPtr;
2819 raidPtr->recon_in_progress = 1;
2820
2821 if (req->flags & RF_FDFLAGS_RECON_FORCE) {
2822 raidPtr->forceRecon = 1;
2823 }
2824
2825 rf_ReconstructInPlace(raidPtr, req->col);
2826
2827 if (req->flags & RF_FDFLAGS_RECON_FORCE) {
2828 raidPtr->forceRecon = 0;
2829 }
2830
2831 RF_Free(req, sizeof(*req));
2832 raidPtr->recon_in_progress = 0;
2833 splx(s);
2834
2835 /* That's all... */
2836 kthread_exit(0); /* does not return */
2837 }
2838
2839 static RF_AutoConfig_t *
2840 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
2841 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
2842 unsigned secsize)
2843 {
2844 int good_one = 0;
2845 RF_ComponentLabel_t *clabel;
2846 RF_AutoConfig_t *ac;
2847
2848 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_WAITOK);
2849
2850 if (!raidread_component_label(secsize, dev, vp, clabel)) {
2851 /* Got the label. Does it look reasonable? */
2852 if (rf_reasonable_label(clabel, numsecs) &&
2853 (rf_component_label_partitionsize(clabel) <= size)) {
2854 #ifdef DEBUG
2855 printf("Component on: %s: %llu\n",
2856 cname, (unsigned long long)size);
2857 rf_print_component_label(clabel);
2858 #endif
2859 /* if it's reasonable, add it, else ignore it. */
2860 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
2861 M_WAITOK);
2862 strlcpy(ac->devname, cname, sizeof(ac->devname));
2863 ac->dev = dev;
2864 ac->vp = vp;
2865 ac->clabel = clabel;
2866 ac->next = ac_list;
2867 ac_list = ac;
2868 good_one = 1;
2869 }
2870 }
2871 if (!good_one) {
2872 /* cleanup */
2873 free(clabel, M_RAIDFRAME);
2874 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2875 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2876 vput(vp);
2877 }
2878 return ac_list;
2879 }
2880
2881 static RF_AutoConfig_t *
2882 rf_find_raid_components(void)
2883 {
2884 struct vnode *vp;
2885 struct disklabel label;
2886 device_t dv;
2887 deviter_t di;
2888 dev_t dev;
2889 int bmajor, bminor, wedge, rf_part_found;
2890 int error;
2891 int i;
2892 RF_AutoConfig_t *ac_list;
2893 uint64_t numsecs;
2894 unsigned secsize;
2895 int dowedges;
2896
2897 /* initialize the AutoConfig list */
2898 ac_list = NULL;
2899
2900 /*
2901 * we begin by trolling through *all* the devices on the system *twice*
2902 * first we scan for wedges, second for other devices. This avoids
2903 * using a raw partition instead of a wedge that covers the whole disk
2904 */
2905
2906 for (dowedges=1; dowedges>=0; --dowedges) {
2907 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
2908 dv = deviter_next(&di)) {
2909
2910 /* we are only interested in disks */
2911 if (device_class(dv) != DV_DISK)
2912 continue;
2913
2914 /* we don't care about floppies */
2915 if (device_is_a(dv, "fd")) {
2916 continue;
2917 }
2918
2919 /* we don't care about CDs. */
2920 if (device_is_a(dv, "cd")) {
2921 continue;
2922 }
2923
2924 /* we don't care about md. */
2925 if (device_is_a(dv, "md")) {
2926 continue;
2927 }
2928
2929 /* hdfd is the Atari/Hades floppy driver */
2930 if (device_is_a(dv, "hdfd")) {
2931 continue;
2932 }
2933
2934 /* fdisa is the Atari/Milan floppy driver */
2935 if (device_is_a(dv, "fdisa")) {
2936 continue;
2937 }
2938
2939 /* we don't care about spiflash */
2940 if (device_is_a(dv, "spiflash")) {
2941 continue;
2942 }
2943
2944 /* are we in the wedges pass ? */
2945 wedge = device_is_a(dv, "dk");
2946 if (wedge != dowedges) {
2947 continue;
2948 }
2949
2950 /* need to find the device_name_to_block_device_major stuff */
2951 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
2952
2953 rf_part_found = 0; /*No raid partition as yet*/
2954
2955 /* get a vnode for the raw partition of this disk */
2956 bminor = minor(device_unit(dv));
2957 dev = wedge ? makedev(bmajor, bminor) :
2958 MAKEDISKDEV(bmajor, bminor, RAW_PART);
2959 if (bdevvp(dev, &vp))
2960 panic("RAID can't alloc vnode");
2961
2962 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2963 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
2964
2965 if (error) {
2966 /* "Who cares." Continue looking
2967 for something that exists*/
2968 vput(vp);
2969 continue;
2970 }
2971
2972 error = getdisksize(vp, &numsecs, &secsize);
2973 if (error) {
2974 /*
2975 * Pseudo devices like vnd and cgd can be
2976 * opened but may still need some configuration.
2977 * Ignore these quietly.
2978 */
2979 if (error != ENXIO)
2980 printf("RAIDframe: can't get disk size"
2981 " for dev %s (%d)\n",
2982 device_xname(dv), error);
2983 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2984 vput(vp);
2985 continue;
2986 }
2987 if (wedge) {
2988 struct dkwedge_info dkw;
2989 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
2990 NOCRED);
2991 if (error) {
2992 printf("RAIDframe: can't get wedge info for "
2993 "dev %s (%d)\n", device_xname(dv), error);
2994 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2995 vput(vp);
2996 continue;
2997 }
2998
2999 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
3000 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3001 vput(vp);
3002 continue;
3003 }
3004
3005 VOP_UNLOCK(vp);
3006 ac_list = rf_get_component(ac_list, dev, vp,
3007 device_xname(dv), dkw.dkw_size, numsecs, secsize);
3008 rf_part_found = 1; /*There is a raid component on this disk*/
3009 continue;
3010 }
3011
3012 /* Ok, the disk exists. Go get the disklabel. */
3013 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
3014 if (error) {
3015 /*
3016 * XXX can't happen - open() would
3017 * have errored out (or faked up one)
3018 */
3019 if (error != ENOTTY)
3020 printf("RAIDframe: can't get label for dev "
3021 "%s (%d)\n", device_xname(dv), error);
3022 }
3023
3024 /* don't need this any more. We'll allocate it again
3025 a little later if we really do... */
3026 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3027 vput(vp);
3028
3029 if (error)
3030 continue;
3031
3032 rf_part_found = 0; /*No raid partitions yet*/
3033 for (i = 0; i < label.d_npartitions; i++) {
3034 char cname[sizeof(ac_list->devname)];
3035
3036 /* We only support partitions marked as RAID */
3037 if (label.d_partitions[i].p_fstype != FS_RAID)
3038 continue;
3039
3040 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
3041 if (bdevvp(dev, &vp))
3042 panic("RAID can't alloc vnode");
3043
3044 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3045 error = VOP_OPEN(vp, FREAD, NOCRED);
3046 if (error) {
3047 /* Not quite a 'whatever'. In
3048 * this situation we know
3049 * there is a FS_RAID
3050 * partition, but we can't
3051 * open it. The most likely
3052 * reason is that the
3053 * partition is already in
3054 * use by another RAID set.
3055 * So note that we've already
3056 * found a partition on this
3057 * disk so we don't attempt
3058 * to use the raw disk later. */
3059 rf_part_found = 1;
3060 vput(vp);
3061 continue;
3062 }
3063 VOP_UNLOCK(vp);
3064 snprintf(cname, sizeof(cname), "%s%c",
3065 device_xname(dv), 'a' + i);
3066 ac_list = rf_get_component(ac_list, dev, vp, cname,
3067 label.d_partitions[i].p_size, numsecs, secsize);
3068 rf_part_found = 1; /*There is at least one raid partition on this disk*/
3069 }
3070
3071 /*
3072 *If there is no raid component on this disk, either in a
3073 *disklabel or inside a wedge, check the raw partition as well,
3074 *as it is possible to configure raid components on raw disk
3075 *devices.
3076 */
3077
3078 if (!rf_part_found) {
3079 char cname[sizeof(ac_list->devname)];
3080
3081 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
3082 if (bdevvp(dev, &vp))
3083 panic("RAID can't alloc vnode");
3084
3085 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3086
3087 error = VOP_OPEN(vp, FREAD, NOCRED);
3088 if (error) {
3089 /* Whatever... */
3090 vput(vp);
3091 continue;
3092 }
3093 VOP_UNLOCK(vp);
3094 snprintf(cname, sizeof(cname), "%s%c",
3095 device_xname(dv), 'a' + RAW_PART);
3096 ac_list = rf_get_component(ac_list, dev, vp, cname,
3097 label.d_partitions[RAW_PART].p_size, numsecs, secsize);
3098 }
3099 }
3100 deviter_release(&di);
3101 }
3102 return ac_list;
3103 }
3104
3105 int
3106 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3107 {
3108
3109 if ((clabel->version==RF_COMPONENT_LABEL_VERSION_1 ||
3110 clabel->version==RF_COMPONENT_LABEL_VERSION ||
3111 clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) &&
3112 (clabel->clean == RF_RAID_CLEAN ||
3113 clabel->clean == RF_RAID_DIRTY) &&
3114 clabel->row >=0 &&
3115 clabel->column >= 0 &&
3116 clabel->num_rows > 0 &&
3117 clabel->num_columns > 0 &&
3118 clabel->row < clabel->num_rows &&
3119 clabel->column < clabel->num_columns &&
3120 clabel->blockSize > 0 &&
3121 /*
3122 * numBlocksHi may contain garbage, but it is ok since
3123 * the type is unsigned. If it is really garbage,
3124 * rf_fix_old_label_size() will fix it.
3125 */
3126 rf_component_label_numblocks(clabel) > 0) {
3127 /*
3128 * label looks reasonable enough...
3129 * let's make sure it has no old garbage.
3130 */
3131 if (numsecs)
3132 rf_fix_old_label_size(clabel, numsecs);
3133 return(1);
3134 }
3135 return(0);
3136 }
3137
3138
3139 /*
3140 * For reasons yet unknown, some old component labels have garbage in
3141 * the newer numBlocksHi region, and this causes lossage. Since those
3142 * disks will also have numsecs set to less than 32 bits of sectors,
3143 * we can determine when this corruption has occurred, and fix it.
3144 *
3145 * The exact same problem, with the same unknown reason, happens to
3146 * the partitionSizeHi member as well.
3147 */
3148 static void
3149 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3150 {
3151
3152 if (numsecs < ((uint64_t)1 << 32)) {
3153 if (clabel->numBlocksHi) {
3154 printf("WARNING: total sectors < 32 bits, yet "
3155 "numBlocksHi set\n"
3156 "WARNING: resetting numBlocksHi to zero.\n");
3157 clabel->numBlocksHi = 0;
3158 }
3159
3160 if (clabel->partitionSizeHi) {
3161 printf("WARNING: total sectors < 32 bits, yet "
3162 "partitionSizeHi set\n"
3163 "WARNING: resetting partitionSizeHi to zero.\n");
3164 clabel->partitionSizeHi = 0;
3165 }
3166 }
3167 }
3168
3169
3170 #ifdef DEBUG
3171 void
3172 rf_print_component_label(RF_ComponentLabel_t *clabel)
3173 {
3174 uint64_t numBlocks;
3175 static const char *rp[] = {
3176 "No", "Force", "Soft", "*invalid*"
3177 };
3178
3179
3180 numBlocks = rf_component_label_numblocks(clabel);
3181
3182 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3183 clabel->row, clabel->column,
3184 clabel->num_rows, clabel->num_columns);
3185 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3186 clabel->version, clabel->serial_number,
3187 clabel->mod_counter);
3188 printf(" Clean: %s Status: %d\n",
3189 clabel->clean ? "Yes" : "No", clabel->status);
3190 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3191 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3192 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3193 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3194 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3195 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]);
3196 printf(" Last configured as: raid%d\n", clabel->last_unit);
3197 #if 0
3198 printf(" Config order: %d\n", clabel->config_order);
3199 #endif
3200
3201 }
3202 #endif
3203
3204 static RF_ConfigSet_t *
3205 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3206 {
3207 RF_AutoConfig_t *ac;
3208 RF_ConfigSet_t *config_sets;
3209 RF_ConfigSet_t *cset;
3210 RF_AutoConfig_t *ac_next;
3211
3212
3213 config_sets = NULL;
3214
3215 /* Go through the AutoConfig list, and figure out which components
3216 belong to what sets. */
3217 ac = ac_list;
3218 while(ac!=NULL) {
3219 /* we're going to putz with ac->next, so save it here
3220 for use at the end of the loop */
3221 ac_next = ac->next;
3222
3223 if (config_sets == NULL) {
3224 /* will need at least this one... */
3225 config_sets = malloc(sizeof(RF_ConfigSet_t),
3226 M_RAIDFRAME, M_WAITOK);
3227 /* this one is easy :) */
3228 config_sets->ac = ac;
3229 config_sets->next = NULL;
3230 config_sets->rootable = 0;
3231 ac->next = NULL;
3232 } else {
3233 /* which set does this component fit into? */
3234 cset = config_sets;
3235 while(cset!=NULL) {
3236 if (rf_does_it_fit(cset, ac)) {
3237 /* looks like it matches... */
3238 ac->next = cset->ac;
3239 cset->ac = ac;
3240 break;
3241 }
3242 cset = cset->next;
3243 }
3244 if (cset==NULL) {
3245 /* didn't find a match above... new set..*/
3246 cset = malloc(sizeof(RF_ConfigSet_t),
3247 M_RAIDFRAME, M_WAITOK);
3248 cset->ac = ac;
3249 ac->next = NULL;
3250 cset->next = config_sets;
3251 cset->rootable = 0;
3252 config_sets = cset;
3253 }
3254 }
3255 ac = ac_next;
3256 }
3257
3258
3259 return(config_sets);
3260 }
3261
3262 static int
3263 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3264 {
3265 RF_ComponentLabel_t *clabel1, *clabel2;
3266
3267 /* If this one matches the *first* one in the set, that's good
3268 enough, since the other members of the set would have been
3269 through here too... */
3270 /* note that we are not checking partitionSize here..
3271
3272 Note that we are also not checking the mod_counters here.
3273 If everything else matches except the mod_counter, that's
3274 good enough for this test. We will deal with the mod_counters
3275 a little later in the autoconfiguration process.
3276
3277 (clabel1->mod_counter == clabel2->mod_counter) &&
3278
3279 The reason we don't check for this is that failed disks
3280 will have lower modification counts. If those disks are
3281 not added to the set they used to belong to, then they will
3282 form their own set, which may result in 2 different sets,
3283 for example, competing to be configured at raid0, and
3284 perhaps competing to be the root filesystem set. If the
3285 wrong ones get configured, or both attempt to become /,
3286 weird behaviour and or serious lossage will occur. Thus we
3287 need to bring them into the fold here, and kick them out at
3288 a later point.
3289
3290 */
3291
3292 clabel1 = cset->ac->clabel;
3293 clabel2 = ac->clabel;
3294 if ((clabel1->version == clabel2->version) &&
3295 (clabel1->serial_number == clabel2->serial_number) &&
3296 (clabel1->num_rows == clabel2->num_rows) &&
3297 (clabel1->num_columns == clabel2->num_columns) &&
3298 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3299 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3300 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3301 (clabel1->parityConfig == clabel2->parityConfig) &&
3302 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3303 (clabel1->blockSize == clabel2->blockSize) &&
3304 rf_component_label_numblocks(clabel1) ==
3305 rf_component_label_numblocks(clabel2) &&
3306 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3307 (clabel1->root_partition == clabel2->root_partition) &&
3308 (clabel1->last_unit == clabel2->last_unit) &&
3309 (clabel1->config_order == clabel2->config_order)) {
3310 /* if it get's here, it almost *has* to be a match */
3311 } else {
3312 /* it's not consistent with somebody in the set..
3313 punt */
3314 return(0);
3315 }
3316 /* all was fine.. it must fit... */
3317 return(1);
3318 }
3319
3320 static int
3321 rf_have_enough_components(RF_ConfigSet_t *cset)
3322 {
3323 RF_AutoConfig_t *ac;
3324 RF_AutoConfig_t *auto_config;
3325 RF_ComponentLabel_t *clabel;
3326 int c;
3327 int num_cols;
3328 int num_missing;
3329 int mod_counter;
3330 int mod_counter_found;
3331 int even_pair_failed;
3332 char parity_type;
3333
3334
3335 /* check to see that we have enough 'live' components
3336 of this set. If so, we can configure it if necessary */
3337
3338 num_cols = cset->ac->clabel->num_columns;
3339 parity_type = cset->ac->clabel->parityConfig;
3340
3341 /* XXX Check for duplicate components!?!?!? */
3342
3343 /* Determine what the mod_counter is supposed to be for this set. */
3344
3345 mod_counter_found = 0;
3346 mod_counter = 0;
3347 ac = cset->ac;
3348 while(ac!=NULL) {
3349 if (mod_counter_found==0) {
3350 mod_counter = ac->clabel->mod_counter;
3351 mod_counter_found = 1;
3352 } else {
3353 if (ac->clabel->mod_counter > mod_counter) {
3354 mod_counter = ac->clabel->mod_counter;
3355 }
3356 }
3357 ac = ac->next;
3358 }
3359
3360 num_missing = 0;
3361 auto_config = cset->ac;
3362
3363 even_pair_failed = 0;
3364 for(c=0; c<num_cols; c++) {
3365 ac = auto_config;
3366 while(ac!=NULL) {
3367 if ((ac->clabel->column == c) &&
3368 (ac->clabel->mod_counter == mod_counter)) {
3369 /* it's this one... */
3370 #ifdef DEBUG
3371 printf("Found: %s at %d\n",
3372 ac->devname,c);
3373 #endif
3374 break;
3375 }
3376 ac=ac->next;
3377 }
3378 if (ac==NULL) {
3379 /* Didn't find one here! */
3380 /* special case for RAID 1, especially
3381 where there are more than 2
3382 components (where RAIDframe treats
3383 things a little differently :( ) */
3384 if (parity_type == '1') {
3385 if (c%2 == 0) { /* even component */
3386 even_pair_failed = 1;
3387 } else { /* odd component. If
3388 we're failed, and
3389 so is the even
3390 component, it's
3391 "Good Night, Charlie" */
3392 if (even_pair_failed == 1) {
3393 return(0);
3394 }
3395 }
3396 } else {
3397 /* normal accounting */
3398 num_missing++;
3399 }
3400 }
3401 if ((parity_type == '1') && (c%2 == 1)) {
3402 /* Just did an even component, and we didn't
3403 bail.. reset the even_pair_failed flag,
3404 and go on to the next component.... */
3405 even_pair_failed = 0;
3406 }
3407 }
3408
3409 clabel = cset->ac->clabel;
3410
3411 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3412 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3413 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3414 /* XXX this needs to be made *much* more general */
3415 /* Too many failures */
3416 return(0);
3417 }
3418 /* otherwise, all is well, and we've got enough to take a kick
3419 at autoconfiguring this set */
3420 return(1);
3421 }
3422
3423 static void
3424 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3425 RF_Raid_t *raidPtr)
3426 {
3427 RF_ComponentLabel_t *clabel;
3428 int i;
3429
3430 clabel = ac->clabel;
3431
3432 /* 1. Fill in the common stuff */
3433 config->numCol = clabel->num_columns;
3434 config->numSpare = 0; /* XXX should this be set here? */
3435 config->sectPerSU = clabel->sectPerSU;
3436 config->SUsPerPU = clabel->SUsPerPU;
3437 config->SUsPerRU = clabel->SUsPerRU;
3438 config->parityConfig = clabel->parityConfig;
3439 /* XXX... */
3440 strcpy(config->diskQueueType,"fifo");
3441 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3442 config->layoutSpecificSize = 0; /* XXX ?? */
3443
3444 while(ac!=NULL) {
3445 /* row/col values will be in range due to the checks
3446 in reasonable_label() */
3447 strcpy(config->devnames[0][ac->clabel->column],
3448 ac->devname);
3449 ac = ac->next;
3450 }
3451
3452 for(i=0;i<RF_MAXDBGV;i++) {
3453 config->debugVars[i][0] = 0;
3454 }
3455 }
3456
3457 static int
3458 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3459 {
3460 RF_ComponentLabel_t *clabel;
3461 int column;
3462 int sparecol;
3463
3464 raidPtr->autoconfigure = new_value;
3465
3466 for(column=0; column<raidPtr->numCol; column++) {
3467 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3468 clabel = raidget_component_label(raidPtr, column);
3469 clabel->autoconfigure = new_value;
3470 raidflush_component_label(raidPtr, column);
3471 }
3472 }
3473 for(column = 0; column < raidPtr->numSpare ; column++) {
3474 sparecol = raidPtr->numCol + column;
3475
3476 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3477 clabel = raidget_component_label(raidPtr, sparecol);
3478 clabel->autoconfigure = new_value;
3479 raidflush_component_label(raidPtr, sparecol);
3480 }
3481 }
3482 return(new_value);
3483 }
3484
3485 static int
3486 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3487 {
3488 RF_ComponentLabel_t *clabel;
3489 int column;
3490 int sparecol;
3491
3492 raidPtr->root_partition = new_value;
3493 for(column=0; column<raidPtr->numCol; column++) {
3494 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3495 clabel = raidget_component_label(raidPtr, column);
3496 clabel->root_partition = new_value;
3497 raidflush_component_label(raidPtr, column);
3498 }
3499 }
3500 for (column = 0; column < raidPtr->numSpare ; column++) {
3501 sparecol = raidPtr->numCol + column;
3502
3503 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3504 clabel = raidget_component_label(raidPtr, sparecol);
3505 clabel->root_partition = new_value;
3506 raidflush_component_label(raidPtr, sparecol);
3507 }
3508 }
3509 return(new_value);
3510 }
3511
3512 static void
3513 rf_release_all_vps(RF_ConfigSet_t *cset)
3514 {
3515 RF_AutoConfig_t *ac;
3516
3517 ac = cset->ac;
3518 while(ac!=NULL) {
3519 /* Close the vp, and give it back */
3520 if (ac->vp) {
3521 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3522 VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
3523 vput(ac->vp);
3524 ac->vp = NULL;
3525 }
3526 ac = ac->next;
3527 }
3528 }
3529
3530
3531 static void
3532 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3533 {
3534 RF_AutoConfig_t *ac;
3535 RF_AutoConfig_t *next_ac;
3536
3537 ac = cset->ac;
3538 while(ac!=NULL) {
3539 next_ac = ac->next;
3540 /* nuke the label */
3541 free(ac->clabel, M_RAIDFRAME);
3542 /* cleanup the config structure */
3543 free(ac, M_RAIDFRAME);
3544 /* "next.." */
3545 ac = next_ac;
3546 }
3547 /* and, finally, nuke the config set */
3548 free(cset, M_RAIDFRAME);
3549 }
3550
3551
3552 void
3553 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3554 {
3555 /* avoid over-writing byteswapped version. */
3556 if (clabel->version != bswap32(RF_COMPONENT_LABEL_VERSION))
3557 clabel->version = RF_COMPONENT_LABEL_VERSION;
3558 clabel->serial_number = raidPtr->serial_number;
3559 clabel->mod_counter = raidPtr->mod_counter;
3560
3561 clabel->num_rows = 1;
3562 clabel->num_columns = raidPtr->numCol;
3563 clabel->clean = RF_RAID_DIRTY; /* not clean */
3564 clabel->status = rf_ds_optimal; /* "It's good!" */
3565
3566 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3567 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3568 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3569
3570 clabel->blockSize = raidPtr->bytesPerSector;
3571 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3572
3573 /* XXX not portable */
3574 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3575 clabel->maxOutstanding = raidPtr->maxOutstanding;
3576 clabel->autoconfigure = raidPtr->autoconfigure;
3577 clabel->root_partition = raidPtr->root_partition;
3578 clabel->last_unit = raidPtr->raidid;
3579 clabel->config_order = raidPtr->config_order;
3580
3581 #ifndef RF_NO_PARITY_MAP
3582 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3583 #endif
3584 }
3585
3586 static struct raid_softc *
3587 rf_auto_config_set(RF_ConfigSet_t *cset)
3588 {
3589 RF_Raid_t *raidPtr;
3590 RF_Config_t *config;
3591 int raidID;
3592 struct raid_softc *sc;
3593
3594 #ifdef DEBUG
3595 printf("RAID autoconfigure\n");
3596 #endif
3597
3598 /* 1. Create a config structure */
3599 config = malloc(sizeof(*config), M_RAIDFRAME, M_WAITOK|M_ZERO);
3600
3601 /*
3602 2. Figure out what RAID ID this one is supposed to live at
3603 See if we can get the same RAID dev that it was configured
3604 on last time..
3605 */
3606
3607 raidID = cset->ac->clabel->last_unit;
3608 for (sc = raidget(raidID, false); sc && sc->sc_r.valid != 0;
3609 sc = raidget(++raidID, false))
3610 continue;
3611 #ifdef DEBUG
3612 printf("Configuring raid%d:\n",raidID);
3613 #endif
3614
3615 if (sc == NULL)
3616 sc = raidget(raidID, true);
3617 raidPtr = &sc->sc_r;
3618
3619 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3620 raidPtr->softc = sc;
3621 raidPtr->raidid = raidID;
3622 raidPtr->openings = RAIDOUTSTANDING;
3623
3624 /* 3. Build the configuration structure */
3625 rf_create_configuration(cset->ac, config, raidPtr);
3626
3627 /* 4. Do the configuration */
3628 if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3629 raidinit(sc);
3630
3631 rf_markalldirty(raidPtr);
3632 raidPtr->autoconfigure = 1; /* XXX do this here? */
3633 switch (cset->ac->clabel->root_partition) {
3634 case 1: /* Force Root */
3635 case 2: /* Soft Root: root when boot partition part of raid */
3636 /*
3637 * everything configured just fine. Make a note
3638 * that this set is eligible to be root,
3639 * or forced to be root
3640 */
3641 cset->rootable = cset->ac->clabel->root_partition;
3642 /* XXX do this here? */
3643 raidPtr->root_partition = cset->rootable;
3644 break;
3645 default:
3646 break;
3647 }
3648 } else {
3649 raidput(sc);
3650 sc = NULL;
3651 }
3652
3653 /* 5. Cleanup */
3654 free(config, M_RAIDFRAME);
3655 return sc;
3656 }
3657
3658 void
3659 rf_pool_init(RF_Raid_t *raidPtr, char *w_chan, struct pool *p, size_t size, const char *pool_name,
3660 size_t xmin, size_t xmax)
3661 {
3662
3663 /* Format: raid%d_foo */
3664 snprintf(w_chan, RF_MAX_POOLNAMELEN, "raid%d_%s", raidPtr->raidid, pool_name);
3665
3666 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3667 pool_sethiwat(p, xmax);
3668 pool_prime(p, xmin);
3669 }
3670
3671
3672 /*
3673 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buffer queue
3674 * to see if there is IO pending and if that IO could possibly be done
3675 * for a given RAID set. Returns 0 if IO is waiting and can be done, 1
3676 * otherwise.
3677 *
3678 */
3679 int
3680 rf_buf_queue_check(RF_Raid_t *raidPtr)
3681 {
3682 struct raid_softc *rs;
3683 struct dk_softc *dksc;
3684
3685 rs = raidPtr->softc;
3686 dksc = &rs->sc_dksc;
3687
3688 if ((rs->sc_flags & RAIDF_INITED) == 0)
3689 return 1;
3690
3691 if (dk_strategy_pending(dksc) && raidPtr->openings > 0) {
3692 /* there is work to do */
3693 return 0;
3694 }
3695 /* default is nothing to do */
3696 return 1;
3697 }
3698
3699 int
3700 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3701 {
3702 uint64_t numsecs;
3703 unsigned secsize;
3704 int error;
3705
3706 error = getdisksize(vp, &numsecs, &secsize);
3707 if (error == 0) {
3708 diskPtr->blockSize = secsize;
3709 diskPtr->numBlocks = numsecs - rf_protectedSectors;
3710 diskPtr->partitionSize = numsecs;
3711 return 0;
3712 }
3713 return error;
3714 }
3715
3716 static int
3717 raid_match(device_t self, cfdata_t cfdata, void *aux)
3718 {
3719 return 1;
3720 }
3721
3722 static void
3723 raid_attach(device_t parent, device_t self, void *aux)
3724 {
3725 }
3726
3727
3728 static int
3729 raid_detach(device_t self, int flags)
3730 {
3731 int error;
3732 struct raid_softc *rs = raidsoftc(self);
3733
3734 if (rs == NULL)
3735 return ENXIO;
3736
3737 if ((error = raidlock(rs)) != 0)
3738 return error;
3739
3740 error = raid_detach_unlocked(rs);
3741
3742 raidunlock(rs);
3743
3744 /* XXX raid can be referenced here */
3745
3746 if (error)
3747 return error;
3748
3749 /* Free the softc */
3750 raidput(rs);
3751
3752 return 0;
3753 }
3754
3755 static void
3756 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
3757 {
3758 struct dk_softc *dksc = &rs->sc_dksc;
3759 struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
3760
3761 memset(dg, 0, sizeof(*dg));
3762
3763 dg->dg_secperunit = raidPtr->totalSectors;
3764 dg->dg_secsize = raidPtr->bytesPerSector;
3765 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
3766 dg->dg_ntracks = 4 * raidPtr->numCol;
3767
3768 disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, NULL);
3769 }
3770
3771 /*
3772 * Get cache info for all the components (including spares).
3773 * Returns intersection of all the cache flags of all disks, or first
3774 * error if any encountered.
3775 * XXXfua feature flags can change as spares are added - lock down somehow
3776 */
3777 static int
3778 rf_get_component_caches(RF_Raid_t *raidPtr, int *data)
3779 {
3780 int c;
3781 int error;
3782 int dkwhole = 0, dkpart;
3783
3784 for (c = 0; c < raidPtr->numCol + raidPtr->numSpare; c++) {
3785 /*
3786 * Check any non-dead disk, even when currently being
3787 * reconstructed.
3788 */
3789 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
3790 error = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp,
3791 DIOCGCACHE, &dkpart, FREAD, NOCRED);
3792 if (error) {
3793 if (error != ENODEV) {
3794 printf("raid%d: get cache for component %s failed\n",
3795 raidPtr->raidid,
3796 raidPtr->Disks[c].devname);
3797 }
3798
3799 return error;
3800 }
3801
3802 if (c == 0)
3803 dkwhole = dkpart;
3804 else
3805 dkwhole = DKCACHE_COMBINE(dkwhole, dkpart);
3806 }
3807 }
3808
3809 *data = dkwhole;
3810
3811 return 0;
3812 }
3813
3814 /*
3815 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3816 * We end up returning whatever error was returned by the first cache flush
3817 * that fails.
3818 */
3819
3820 static int
3821 rf_sync_component_cache(RF_Raid_t *raidPtr, int c, int force)
3822 {
3823 int e = 0;
3824 for (int i = 0; i < 5; i++) {
3825 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3826 &force, FWRITE, NOCRED);
3827 if (!e || e == ENODEV)
3828 return e;
3829 printf("raid%d: cache flush[%d] to component %s failed (%d)\n",
3830 raidPtr->raidid, i, raidPtr->Disks[c].devname, e);
3831 }
3832 return e;
3833 }
3834
3835 int
3836 rf_sync_component_caches(RF_Raid_t *raidPtr, int force)
3837 {
3838 int c, error;
3839
3840 error = 0;
3841 for (c = 0; c < raidPtr->numCol; c++) {
3842 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3843 int e = rf_sync_component_cache(raidPtr, c, force);
3844 if (e && !error)
3845 error = e;
3846 }
3847 }
3848
3849 for (c = 0; c < raidPtr->numSpare ; c++) {
3850 int sparecol = raidPtr->numCol + c;
3851
3852 /* Need to ensure that the reconstruct actually completed! */
3853 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3854 int e = rf_sync_component_cache(raidPtr, sparecol,
3855 force);
3856 if (e && !error)
3857 error = e;
3858 }
3859 }
3860 return error;
3861 }
3862
3863 /* Fill in info with the current status */
3864 void
3865 rf_check_recon_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3866 {
3867
3868 memset(info, 0, sizeof(*info));
3869
3870 if (raidPtr->status != rf_rs_reconstructing) {
3871 info->total = 100;
3872 info->completed = 100;
3873 } else {
3874 info->total = raidPtr->reconControl->numRUsTotal;
3875 info->completed = raidPtr->reconControl->numRUsComplete;
3876 }
3877 info->remaining = info->total - info->completed;
3878 }
3879
3880 /* Fill in info with the current status */
3881 void
3882 rf_check_parityrewrite_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3883 {
3884
3885 memset(info, 0, sizeof(*info));
3886
3887 if (raidPtr->parity_rewrite_in_progress == 1) {
3888 info->total = raidPtr->Layout.numStripe;
3889 info->completed = raidPtr->parity_rewrite_stripes_done;
3890 } else {
3891 info->completed = 100;
3892 info->total = 100;
3893 }
3894 info->remaining = info->total - info->completed;
3895 }
3896
3897 /* Fill in info with the current status */
3898 void
3899 rf_check_copyback_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3900 {
3901
3902 memset(info, 0, sizeof(*info));
3903
3904 if (raidPtr->copyback_in_progress == 1) {
3905 info->total = raidPtr->Layout.numStripe;
3906 info->completed = raidPtr->copyback_stripes_done;
3907 info->remaining = info->total - info->completed;
3908 } else {
3909 info->remaining = 0;
3910 info->completed = 100;
3911 info->total = 100;
3912 }
3913 }
3914
3915 /* Fill in config with the current info */
3916 int
3917 rf_get_info(RF_Raid_t *raidPtr, RF_DeviceConfig_t *config)
3918 {
3919 int d, i, j;
3920
3921 if (!raidPtr->valid)
3922 return ENODEV;
3923 config->cols = raidPtr->numCol;
3924 config->ndevs = raidPtr->numCol;
3925 if (config->ndevs >= RF_MAX_DISKS)
3926 return ENOMEM;
3927 config->nspares = raidPtr->numSpare;
3928 if (config->nspares >= RF_MAX_DISKS)
3929 return ENOMEM;
3930 config->maxqdepth = raidPtr->maxQueueDepth;
3931 d = 0;
3932 for (j = 0; j < config->cols; j++) {
3933 config->devs[d] = raidPtr->Disks[j];
3934 d++;
3935 }
3936 for (i = 0; i < config->nspares; i++) {
3937 config->spares[i] = raidPtr->Disks[raidPtr->numCol + i];
3938 if (config->spares[i].status == rf_ds_rebuilding_spare) {
3939 /* raidctl(8) expects to see this as a used spare */
3940 config->spares[i].status = rf_ds_used_spare;
3941 }
3942 }
3943 return 0;
3944 }
3945
3946 int
3947 rf_get_component_label(RF_Raid_t *raidPtr, void *data)
3948 {
3949 RF_ComponentLabel_t *clabel = (RF_ComponentLabel_t *)data;
3950 RF_ComponentLabel_t *raid_clabel;
3951 int column = clabel->column;
3952
3953 if ((column < 0) || (column >= raidPtr->numCol + raidPtr->numSpare))
3954 return EINVAL;
3955 raid_clabel = raidget_component_label(raidPtr, column);
3956 memcpy(clabel, raid_clabel, sizeof *clabel);
3957 /* Fix-up for userland. */
3958 if (clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION))
3959 clabel->version = RF_COMPONENT_LABEL_VERSION;
3960
3961 return 0;
3962 }
3963
3964 /*
3965 * Module interface
3966 */
3967
3968 MODULE(MODULE_CLASS_DRIVER, raid, "dk_subr,bufq_fcfs");
3969
3970 #ifdef _MODULE
3971 CFDRIVER_DECL(raid, DV_DISK, NULL);
3972 #endif
3973
3974 static int raid_modcmd(modcmd_t, void *);
3975 static int raid_modcmd_init(void);
3976 static int raid_modcmd_fini(void);
3977
3978 static int
3979 raid_modcmd(modcmd_t cmd, void *data)
3980 {
3981 int error;
3982
3983 error = 0;
3984 switch (cmd) {
3985 case MODULE_CMD_INIT:
3986 error = raid_modcmd_init();
3987 break;
3988 case MODULE_CMD_FINI:
3989 error = raid_modcmd_fini();
3990 break;
3991 default:
3992 error = ENOTTY;
3993 break;
3994 }
3995 return error;
3996 }
3997
3998 static int
3999 raid_modcmd_init(void)
4000 {
4001 int error;
4002 int bmajor, cmajor;
4003
4004 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
4005 mutex_enter(&raid_lock);
4006 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
4007 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
4008 rf_init_cond2(rf_sparet_wait_cv, "sparetw");
4009 rf_init_cond2(rf_sparet_resp_cv, "rfgst");
4010
4011 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
4012 #endif
4013
4014 bmajor = cmajor = -1;
4015 error = devsw_attach("raid", &raid_bdevsw, &bmajor,
4016 &raid_cdevsw, &cmajor);
4017 if (error != 0 && error != EEXIST) {
4018 aprint_error("%s: devsw_attach failed %d\n", __func__, error);
4019 mutex_exit(&raid_lock);
4020 return error;
4021 }
4022 #ifdef _MODULE
4023 error = config_cfdriver_attach(&raid_cd);
4024 if (error != 0) {
4025 aprint_error("%s: config_cfdriver_attach failed %d\n",
4026 __func__, error);
4027 devsw_detach(&raid_bdevsw, &raid_cdevsw);
4028 mutex_exit(&raid_lock);
4029 return error;
4030 }
4031 #endif
4032 error = config_cfattach_attach(raid_cd.cd_name, &raid_ca);
4033 if (error != 0) {
4034 aprint_error("%s: config_cfattach_attach failed %d\n",
4035 __func__, error);
4036 #ifdef _MODULE
4037 config_cfdriver_detach(&raid_cd);
4038 #endif
4039 devsw_detach(&raid_bdevsw, &raid_cdevsw);
4040 mutex_exit(&raid_lock);
4041 return error;
4042 }
4043
4044 raidautoconfigdone = false;
4045
4046 mutex_exit(&raid_lock);
4047
4048 if (error == 0) {
4049 if (rf_BootRaidframe(true) == 0)
4050 aprint_verbose("Kernelized RAIDframe activated\n");
4051 else
4052 panic("Serious error activating RAID!!");
4053 }
4054
4055 /*
4056 * Register a finalizer which will be used to auto-config RAID
4057 * sets once all real hardware devices have been found.
4058 */
4059 error = config_finalize_register(NULL, rf_autoconfig);
4060 if (error != 0) {
4061 aprint_error("WARNING: unable to register RAIDframe "
4062 "finalizer\n");
4063 error = 0;
4064 }
4065
4066 return error;
4067 }
4068
4069 static int
4070 raid_modcmd_fini(void)
4071 {
4072 int error;
4073
4074 mutex_enter(&raid_lock);
4075
4076 /* Don't allow unload if raid device(s) exist. */
4077 if (!LIST_EMPTY(&raids)) {
4078 mutex_exit(&raid_lock);
4079 return EBUSY;
4080 }
4081
4082 error = config_cfattach_detach(raid_cd.cd_name, &raid_ca);
4083 if (error != 0) {
4084 aprint_error("%s: cannot detach cfattach\n",__func__);
4085 mutex_exit(&raid_lock);
4086 return error;
4087 }
4088 #ifdef _MODULE
4089 error = config_cfdriver_detach(&raid_cd);
4090 if (error != 0) {
4091 aprint_error("%s: cannot detach cfdriver\n",__func__);
4092 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
4093 mutex_exit(&raid_lock);
4094 return error;
4095 }
4096 #endif
4097 devsw_detach(&raid_bdevsw, &raid_cdevsw);
4098 rf_BootRaidframe(false);
4099 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
4100 rf_destroy_mutex2(rf_sparet_wait_mutex);
4101 rf_destroy_cond2(rf_sparet_wait_cv);
4102 rf_destroy_cond2(rf_sparet_resp_cv);
4103 #endif
4104 mutex_exit(&raid_lock);
4105 mutex_destroy(&raid_lock);
4106
4107 return error;
4108 }
4109