rf_netbsdkintf.c revision 1.403 1 /* $NetBSD: rf_netbsdkintf.c,v 1.403 2022/03/11 01:59:33 mrg Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: cd.c 1.6 90/11/28$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
89 * School of Computer Science
90 * Carnegie Mellon University
91 * Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97 /***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.403 2022/03/11 01:59:33 mrg Exp $");
105
106 #ifdef _KERNEL_OPT
107 #include "opt_raid_autoconfig.h"
108 #include "opt_compat_netbsd32.h"
109 #endif
110
111 #include <sys/param.h>
112 #include <sys/errno.h>
113 #include <sys/pool.h>
114 #include <sys/proc.h>
115 #include <sys/queue.h>
116 #include <sys/disk.h>
117 #include <sys/device.h>
118 #include <sys/stat.h>
119 #include <sys/ioctl.h>
120 #include <sys/fcntl.h>
121 #include <sys/systm.h>
122 #include <sys/vnode.h>
123 #include <sys/disklabel.h>
124 #include <sys/conf.h>
125 #include <sys/buf.h>
126 #include <sys/bufq.h>
127 #include <sys/reboot.h>
128 #include <sys/kauth.h>
129 #include <sys/module.h>
130 #include <sys/compat_stub.h>
131
132 #include <prop/proplib.h>
133
134 #include <dev/raidframe/raidframevar.h>
135 #include <dev/raidframe/raidframeio.h>
136 #include <dev/raidframe/rf_paritymap.h>
137
138 #include "rf_raid.h"
139 #include "rf_copyback.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_desc.h"
143 #include "rf_diskqueue.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_kintf.h"
147 #include "rf_options.h"
148 #include "rf_driver.h"
149 #include "rf_parityscan.h"
150 #include "rf_threadstuff.h"
151
152 #include "ioconf.h"
153
154 #ifdef DEBUG
155 int rf_kdebug_level = 0;
156 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
157 #else /* DEBUG */
158 #define db1_printf(a) { }
159 #endif /* DEBUG */
160
161 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
162 static rf_declare_mutex2(rf_sparet_wait_mutex);
163 static rf_declare_cond2(rf_sparet_wait_cv);
164 static rf_declare_cond2(rf_sparet_resp_cv);
165
166 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
167 * spare table */
168 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
169 * installation process */
170 #endif
171
172 const int rf_b_pass = (B_PHYS|B_RAW|B_MEDIA_FLAGS);
173
174 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
175
176 /* prototypes */
177 static void KernelWakeupFunc(struct buf *);
178 static void InitBP(struct buf *, struct vnode *, unsigned,
179 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
180 void *, int);
181 static void raidinit(struct raid_softc *);
182 static int raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp);
183 static int rf_get_component_caches(RF_Raid_t *raidPtr, int *);
184
185 static int raid_match(device_t, cfdata_t, void *);
186 static void raid_attach(device_t, device_t, void *);
187 static int raid_detach(device_t, int);
188
189 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
190 daddr_t, daddr_t);
191 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
192 daddr_t, daddr_t, int);
193
194 static int raidwrite_component_label(unsigned,
195 dev_t, struct vnode *, RF_ComponentLabel_t *);
196 static int raidread_component_label(unsigned,
197 dev_t, struct vnode *, RF_ComponentLabel_t *);
198
199 static int raid_diskstart(device_t, struct buf *bp);
200 static int raid_dumpblocks(device_t, void *, daddr_t, int);
201 static int raid_lastclose(device_t);
202
203 static dev_type_open(raidopen);
204 static dev_type_close(raidclose);
205 static dev_type_read(raidread);
206 static dev_type_write(raidwrite);
207 static dev_type_ioctl(raidioctl);
208 static dev_type_strategy(raidstrategy);
209 static dev_type_dump(raiddump);
210 static dev_type_size(raidsize);
211
212 const struct bdevsw raid_bdevsw = {
213 .d_open = raidopen,
214 .d_close = raidclose,
215 .d_strategy = raidstrategy,
216 .d_ioctl = raidioctl,
217 .d_dump = raiddump,
218 .d_psize = raidsize,
219 .d_discard = nodiscard,
220 .d_flag = D_DISK
221 };
222
223 const struct cdevsw raid_cdevsw = {
224 .d_open = raidopen,
225 .d_close = raidclose,
226 .d_read = raidread,
227 .d_write = raidwrite,
228 .d_ioctl = raidioctl,
229 .d_stop = nostop,
230 .d_tty = notty,
231 .d_poll = nopoll,
232 .d_mmap = nommap,
233 .d_kqfilter = nokqfilter,
234 .d_discard = nodiscard,
235 .d_flag = D_DISK
236 };
237
238 static struct dkdriver rf_dkdriver = {
239 .d_open = raidopen,
240 .d_close = raidclose,
241 .d_strategy = raidstrategy,
242 .d_diskstart = raid_diskstart,
243 .d_dumpblocks = raid_dumpblocks,
244 .d_lastclose = raid_lastclose,
245 .d_minphys = minphys
246 };
247
248 #define raidunit(x) DISKUNIT(x)
249 #define raidsoftc(dev) (((struct raid_softc *)device_private(dev))->sc_r.softc)
250
251 extern struct cfdriver raid_cd;
252 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
253 raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
254 DVF_DETACH_SHUTDOWN);
255
256 /* Internal representation of a rf_recon_req */
257 struct rf_recon_req_internal {
258 RF_RowCol_t col;
259 RF_ReconReqFlags_t flags;
260 void *raidPtr;
261 };
262
263 /*
264 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
265 * Be aware that large numbers can allow the driver to consume a lot of
266 * kernel memory, especially on writes, and in degraded mode reads.
267 *
268 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
269 * a single 64K write will typically require 64K for the old data,
270 * 64K for the old parity, and 64K for the new parity, for a total
271 * of 192K (if the parity buffer is not re-used immediately).
272 * Even it if is used immediately, that's still 128K, which when multiplied
273 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
274 *
275 * Now in degraded mode, for example, a 64K read on the above setup may
276 * require data reconstruction, which will require *all* of the 4 remaining
277 * disks to participate -- 4 * 32K/disk == 128K again.
278 */
279
280 #ifndef RAIDOUTSTANDING
281 #define RAIDOUTSTANDING 6
282 #endif
283
284 #define RAIDLABELDEV(dev) \
285 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
286
287 /* declared here, and made public, for the benefit of KVM stuff.. */
288
289 static int raidlock(struct raid_softc *);
290 static void raidunlock(struct raid_softc *);
291
292 static int raid_detach_unlocked(struct raid_softc *);
293
294 static void rf_markalldirty(RF_Raid_t *);
295 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
296
297 static void rf_ReconThread(struct rf_recon_req_internal *);
298 static void rf_RewriteParityThread(RF_Raid_t *raidPtr);
299 static void rf_CopybackThread(RF_Raid_t *raidPtr);
300 static void rf_ReconstructInPlaceThread(struct rf_recon_req_internal *);
301 static int rf_autoconfig(device_t);
302 static int rf_rescan(void);
303 static void rf_buildroothack(RF_ConfigSet_t *);
304
305 static RF_AutoConfig_t *rf_find_raid_components(void);
306 static RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
307 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
308 static void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
309 static int rf_set_autoconfig(RF_Raid_t *, int);
310 static int rf_set_rootpartition(RF_Raid_t *, int);
311 static void rf_release_all_vps(RF_ConfigSet_t *);
312 static void rf_cleanup_config_set(RF_ConfigSet_t *);
313 static int rf_have_enough_components(RF_ConfigSet_t *);
314 static struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
315 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
316
317 /*
318 * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
319 * Note that this is overridden by having RAID_AUTOCONFIG as an option
320 * in the kernel config file.
321 */
322 #ifdef RAID_AUTOCONFIG
323 int raidautoconfig = 1;
324 #else
325 int raidautoconfig = 0;
326 #endif
327 static bool raidautoconfigdone = false;
328
329 struct pool rf_alloclist_pool; /* AllocList */
330
331 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
332 static kmutex_t raid_lock;
333
334 static struct raid_softc *
335 raidcreate(int unit) {
336 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
337 sc->sc_unit = unit;
338 cv_init(&sc->sc_cv, "raidunit");
339 mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_NONE);
340 return sc;
341 }
342
343 static void
344 raiddestroy(struct raid_softc *sc) {
345 cv_destroy(&sc->sc_cv);
346 mutex_destroy(&sc->sc_mutex);
347 kmem_free(sc, sizeof(*sc));
348 }
349
350 static struct raid_softc *
351 raidget(int unit, bool create) {
352 struct raid_softc *sc;
353 if (unit < 0) {
354 #ifdef DIAGNOSTIC
355 panic("%s: unit %d!", __func__, unit);
356 #endif
357 return NULL;
358 }
359 mutex_enter(&raid_lock);
360 LIST_FOREACH(sc, &raids, sc_link) {
361 if (sc->sc_unit == unit) {
362 mutex_exit(&raid_lock);
363 return sc;
364 }
365 }
366 mutex_exit(&raid_lock);
367 if (!create)
368 return NULL;
369 sc = raidcreate(unit);
370 mutex_enter(&raid_lock);
371 LIST_INSERT_HEAD(&raids, sc, sc_link);
372 mutex_exit(&raid_lock);
373 return sc;
374 }
375
376 static void
377 raidput(struct raid_softc *sc) {
378 mutex_enter(&raid_lock);
379 LIST_REMOVE(sc, sc_link);
380 mutex_exit(&raid_lock);
381 raiddestroy(sc);
382 }
383
384 void
385 raidattach(int num)
386 {
387
388 /*
389 * Device attachment and associated initialization now occurs
390 * as part of the module initialization.
391 */
392 }
393
394 static int
395 rf_autoconfig(device_t self)
396 {
397 RF_AutoConfig_t *ac_list;
398 RF_ConfigSet_t *config_sets;
399
400 if (!raidautoconfig || raidautoconfigdone == true)
401 return 0;
402
403 /* XXX This code can only be run once. */
404 raidautoconfigdone = true;
405
406 #ifdef __HAVE_CPU_BOOTCONF
407 /*
408 * 0. find the boot device if needed first so we can use it later
409 * this needs to be done before we autoconfigure any raid sets,
410 * because if we use wedges we are not going to be able to open
411 * the boot device later
412 */
413 if (booted_device == NULL)
414 cpu_bootconf();
415 #endif
416 /* 1. locate all RAID components on the system */
417 aprint_debug("Searching for RAID components...\n");
418 ac_list = rf_find_raid_components();
419
420 /* 2. Sort them into their respective sets. */
421 config_sets = rf_create_auto_sets(ac_list);
422
423 /*
424 * 3. Evaluate each set and configure the valid ones.
425 * This gets done in rf_buildroothack().
426 */
427 rf_buildroothack(config_sets);
428
429 return 1;
430 }
431
432 int
433 rf_inited(const struct raid_softc *rs) {
434 return (rs->sc_flags & RAIDF_INITED) != 0;
435 }
436
437 RF_Raid_t *
438 rf_get_raid(struct raid_softc *rs) {
439 return &rs->sc_r;
440 }
441
442 int
443 rf_get_unit(const struct raid_softc *rs) {
444 return rs->sc_unit;
445 }
446
447 static int
448 rf_containsboot(RF_Raid_t *r, device_t bdv) {
449 const char *bootname;
450 size_t len;
451
452 /* if bdv is NULL, the set can't contain it. exit early. */
453 if (bdv == NULL)
454 return 0;
455
456 bootname = device_xname(bdv);
457 len = strlen(bootname);
458
459 for (int col = 0; col < r->numCol; col++) {
460 const char *devname = r->Disks[col].devname;
461 devname += sizeof("/dev/") - 1;
462 if (strncmp(devname, "dk", 2) == 0) {
463 const char *parent =
464 dkwedge_get_parent_name(r->Disks[col].dev);
465 if (parent != NULL)
466 devname = parent;
467 }
468 if (strncmp(devname, bootname, len) == 0) {
469 struct raid_softc *sc = r->softc;
470 aprint_debug("raid%d includes boot device %s\n",
471 sc->sc_unit, devname);
472 return 1;
473 }
474 }
475 return 0;
476 }
477
478 static int
479 rf_rescan(void)
480 {
481 RF_AutoConfig_t *ac_list;
482 RF_ConfigSet_t *config_sets, *cset, *next_cset;
483 struct raid_softc *sc;
484 int raid_added;
485
486 ac_list = rf_find_raid_components();
487 config_sets = rf_create_auto_sets(ac_list);
488
489 raid_added = 1;
490 while (raid_added > 0) {
491 raid_added = 0;
492 cset = config_sets;
493 while (cset != NULL) {
494 next_cset = cset->next;
495 if (rf_have_enough_components(cset) &&
496 cset->ac->clabel->autoconfigure == 1) {
497 sc = rf_auto_config_set(cset);
498 if (sc != NULL) {
499 aprint_debug("raid%d: configured ok, rootable %d\n",
500 sc->sc_unit, cset->rootable);
501 /* We added one RAID set */
502 raid_added++;
503 } else {
504 /* The autoconfig didn't work :( */
505 aprint_debug("Autoconfig failed\n");
506 rf_release_all_vps(cset);
507 }
508 } else {
509 /* we're not autoconfiguring this set...
510 release the associated resources */
511 rf_release_all_vps(cset);
512 }
513 /* cleanup */
514 rf_cleanup_config_set(cset);
515 cset = next_cset;
516 }
517 if (raid_added > 0) {
518 /* We added at least one RAID set, so re-scan for recursive RAID */
519 ac_list = rf_find_raid_components();
520 config_sets = rf_create_auto_sets(ac_list);
521 }
522 }
523
524 return 0;
525 }
526
527
528 static void
529 rf_buildroothack(RF_ConfigSet_t *config_sets)
530 {
531 RF_AutoConfig_t *ac_list;
532 RF_ConfigSet_t *cset;
533 RF_ConfigSet_t *next_cset;
534 int num_root;
535 int raid_added;
536 struct raid_softc *sc, *rsc;
537 struct dk_softc *dksc = NULL; /* XXX gcc -Os: may be used uninit. */
538
539 sc = rsc = NULL;
540 num_root = 0;
541
542 raid_added = 1;
543 while (raid_added > 0) {
544 raid_added = 0;
545 cset = config_sets;
546 while (cset != NULL) {
547 next_cset = cset->next;
548 if (rf_have_enough_components(cset) &&
549 cset->ac->clabel->autoconfigure == 1) {
550 sc = rf_auto_config_set(cset);
551 if (sc != NULL) {
552 aprint_debug("raid%d: configured ok, rootable %d\n",
553 sc->sc_unit, cset->rootable);
554 /* We added one RAID set */
555 raid_added++;
556 if (cset->rootable) {
557 rsc = sc;
558 num_root++;
559 }
560 } else {
561 /* The autoconfig didn't work :( */
562 aprint_debug("Autoconfig failed\n");
563 rf_release_all_vps(cset);
564 }
565 } else {
566 /* we're not autoconfiguring this set...
567 release the associated resources */
568 rf_release_all_vps(cset);
569 }
570 /* cleanup */
571 rf_cleanup_config_set(cset);
572 cset = next_cset;
573 }
574 if (raid_added > 0) {
575 /* We added at least one RAID set, so re-scan for recursive RAID */
576 ac_list = rf_find_raid_components();
577 config_sets = rf_create_auto_sets(ac_list);
578 }
579 }
580
581 /* if the user has specified what the root device should be
582 then we don't touch booted_device or boothowto... */
583
584 if (rootspec != NULL) {
585 aprint_debug("%s: rootspec %s\n", __func__, rootspec);
586 return;
587 }
588
589 /* we found something bootable... */
590
591 /*
592 * XXX: The following code assumes that the root raid
593 * is the first ('a') partition. This is about the best
594 * we can do with a BSD disklabel, but we might be able
595 * to do better with a GPT label, by setting a specified
596 * attribute to indicate the root partition. We can then
597 * stash the partition number in the r->root_partition
598 * high bits (the bottom 2 bits are already used). For
599 * now we just set booted_partition to 0 when we override
600 * root.
601 */
602 if (num_root == 1) {
603 device_t candidate_root;
604 dksc = &rsc->sc_dksc;
605 if (dksc->sc_dkdev.dk_nwedges != 0) {
606 char cname[sizeof(cset->ac->devname)];
607 /* XXX: assume partition 'a' first */
608 snprintf(cname, sizeof(cname), "%s%c",
609 device_xname(dksc->sc_dev), 'a');
610 candidate_root = dkwedge_find_by_wname(cname);
611 aprint_debug("%s: candidate wedge root=%s\n", __func__,
612 cname);
613 if (candidate_root == NULL) {
614 /*
615 * If that is not found, because we don't use
616 * disklabel, return the first dk child
617 * XXX: we can skip the 'a' check above
618 * and always do this...
619 */
620 size_t i = 0;
621 candidate_root = dkwedge_find_by_parent(
622 device_xname(dksc->sc_dev), &i);
623 }
624 aprint_debug("%s: candidate wedge root=%p\n", __func__,
625 candidate_root);
626 } else
627 candidate_root = dksc->sc_dev;
628 aprint_debug("%s: candidate root=%p booted_device=%p "
629 "root_partition=%d contains_boot=%d\n",
630 __func__, candidate_root, booted_device,
631 rsc->sc_r.root_partition,
632 rf_containsboot(&rsc->sc_r, booted_device));
633 /* XXX the check for booted_device == NULL can probably be
634 * dropped, now that rf_containsboot handles that case.
635 */
636 if (booted_device == NULL ||
637 rsc->sc_r.root_partition == 1 ||
638 rf_containsboot(&rsc->sc_r, booted_device)) {
639 booted_device = candidate_root;
640 booted_method = "raidframe/single";
641 booted_partition = 0; /* XXX assume 'a' */
642 aprint_debug("%s: set booted_device=%s(%p)\n", __func__,
643 device_xname(booted_device), booted_device);
644 }
645 } else if (num_root > 1) {
646 aprint_debug("%s: many roots=%d, %p\n", __func__, num_root,
647 booted_device);
648
649 /*
650 * Maybe the MD code can help. If it cannot, then
651 * setroot() will discover that we have no
652 * booted_device and will ask the user if nothing was
653 * hardwired in the kernel config file
654 */
655 if (booted_device == NULL)
656 return;
657
658 num_root = 0;
659 mutex_enter(&raid_lock);
660 LIST_FOREACH(sc, &raids, sc_link) {
661 RF_Raid_t *r = &sc->sc_r;
662 if (r->valid == 0)
663 continue;
664
665 if (r->root_partition == 0)
666 continue;
667
668 if (rf_containsboot(r, booted_device)) {
669 num_root++;
670 rsc = sc;
671 dksc = &rsc->sc_dksc;
672 }
673 }
674 mutex_exit(&raid_lock);
675
676 if (num_root == 1) {
677 booted_device = dksc->sc_dev;
678 booted_method = "raidframe/multi";
679 booted_partition = 0; /* XXX assume 'a' */
680 } else {
681 /* we can't guess.. require the user to answer... */
682 boothowto |= RB_ASKNAME;
683 }
684 }
685 }
686
687 static int
688 raidsize(dev_t dev)
689 {
690 struct raid_softc *rs;
691 struct dk_softc *dksc;
692 unsigned int unit;
693
694 unit = raidunit(dev);
695 if ((rs = raidget(unit, false)) == NULL)
696 return -1;
697 dksc = &rs->sc_dksc;
698
699 if ((rs->sc_flags & RAIDF_INITED) == 0)
700 return -1;
701
702 return dk_size(dksc, dev);
703 }
704
705 static int
706 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
707 {
708 unsigned int unit;
709 struct raid_softc *rs;
710 struct dk_softc *dksc;
711
712 unit = raidunit(dev);
713 if ((rs = raidget(unit, false)) == NULL)
714 return ENXIO;
715 dksc = &rs->sc_dksc;
716
717 if ((rs->sc_flags & RAIDF_INITED) == 0)
718 return ENODEV;
719
720 /*
721 Note that blkno is relative to this particular partition.
722 By adding adding RF_PROTECTED_SECTORS, we get a value that
723 is relative to the partition used for the underlying component.
724 */
725 blkno += RF_PROTECTED_SECTORS;
726
727 return dk_dump(dksc, dev, blkno, va, size, DK_DUMP_RECURSIVE);
728 }
729
730 static int
731 raid_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk)
732 {
733 struct raid_softc *rs = raidsoftc(dev);
734 const struct bdevsw *bdev;
735 RF_Raid_t *raidPtr;
736 int c, sparecol, j, scol, dumpto;
737 int error = 0;
738
739 raidPtr = &rs->sc_r;
740
741 /* we only support dumping to RAID 1 sets */
742 if (raidPtr->Layout.numDataCol != 1 ||
743 raidPtr->Layout.numParityCol != 1)
744 return EINVAL;
745
746 if ((error = raidlock(rs)) != 0)
747 return error;
748
749 /* figure out what device is alive.. */
750
751 /*
752 Look for a component to dump to. The preference for the
753 component to dump to is as follows:
754 1) the first component
755 2) a used_spare of the first component
756 3) the second component
757 4) a used_spare of the second component
758 */
759
760 dumpto = -1;
761 for (c = 0; c < raidPtr->numCol; c++) {
762 if (raidPtr->Disks[c].status == rf_ds_optimal) {
763 /* this might be the one */
764 dumpto = c;
765 break;
766 }
767 }
768
769 /*
770 At this point we have possibly selected a live component.
771 If we didn't find a live ocmponent, we now check to see
772 if there is a relevant spared component.
773 */
774
775 for (c = 0; c < raidPtr->numSpare; c++) {
776 sparecol = raidPtr->numCol + c;
777 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
778 /* How about this one? */
779 scol = -1;
780 for(j=0;j<raidPtr->numCol;j++) {
781 if (raidPtr->Disks[j].spareCol == sparecol) {
782 scol = j;
783 break;
784 }
785 }
786 if (scol == 0) {
787 /*
788 We must have found a spared first
789 component! We'll take that over
790 anything else found so far. (We
791 couldn't have found a real first
792 component before, since this is a
793 used spare, and it's saying that
794 it's replacing the first
795 component.) On reboot (with
796 autoconfiguration turned on)
797 sparecol will become the first
798 component (component0) of this set.
799 */
800 dumpto = sparecol;
801 break;
802 } else if (scol != -1) {
803 /*
804 Must be a spared second component.
805 We'll dump to that if we havn't found
806 anything else so far.
807 */
808 if (dumpto == -1)
809 dumpto = sparecol;
810 }
811 }
812 }
813
814 if (dumpto == -1) {
815 /* we couldn't find any live components to dump to!?!?
816 */
817 error = EINVAL;
818 goto out;
819 }
820
821 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
822 if (bdev == NULL) {
823 error = ENXIO;
824 goto out;
825 }
826
827 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
828 blkno, va, nblk * raidPtr->bytesPerSector);
829
830 out:
831 raidunlock(rs);
832
833 return error;
834 }
835
836 /* ARGSUSED */
837 static int
838 raidopen(dev_t dev, int flags, int fmt,
839 struct lwp *l)
840 {
841 int unit = raidunit(dev);
842 struct raid_softc *rs;
843 struct dk_softc *dksc;
844 int error = 0;
845 int part, pmask;
846
847 if ((rs = raidget(unit, true)) == NULL)
848 return ENXIO;
849 if ((error = raidlock(rs)) != 0)
850 return error;
851
852 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
853 error = EBUSY;
854 goto bad;
855 }
856
857 dksc = &rs->sc_dksc;
858
859 part = DISKPART(dev);
860 pmask = (1 << part);
861
862 if (!DK_BUSY(dksc, pmask) &&
863 ((rs->sc_flags & RAIDF_INITED) != 0)) {
864 /* First one... mark things as dirty... Note that we *MUST*
865 have done a configure before this. I DO NOT WANT TO BE
866 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
867 THAT THEY BELONG TOGETHER!!!!! */
868 /* XXX should check to see if we're only open for reading
869 here... If so, we needn't do this, but then need some
870 other way of keeping track of what's happened.. */
871
872 rf_markalldirty(&rs->sc_r);
873 }
874
875 if ((rs->sc_flags & RAIDF_INITED) != 0)
876 error = dk_open(dksc, dev, flags, fmt, l);
877
878 bad:
879 raidunlock(rs);
880
881 return error;
882
883
884 }
885
886 static int
887 raid_lastclose(device_t self)
888 {
889 struct raid_softc *rs = raidsoftc(self);
890
891 /* Last one... device is not unconfigured yet.
892 Device shutdown has taken care of setting the
893 clean bits if RAIDF_INITED is not set
894 mark things as clean... */
895
896 rf_update_component_labels(&rs->sc_r,
897 RF_FINAL_COMPONENT_UPDATE);
898
899 /* pass to unlocked code */
900 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
901 rs->sc_flags |= RAIDF_DETACH;
902
903 return 0;
904 }
905
906 /* ARGSUSED */
907 static int
908 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
909 {
910 int unit = raidunit(dev);
911 struct raid_softc *rs;
912 struct dk_softc *dksc;
913 cfdata_t cf;
914 int error = 0, do_detach = 0, do_put = 0;
915
916 if ((rs = raidget(unit, false)) == NULL)
917 return ENXIO;
918 dksc = &rs->sc_dksc;
919
920 if ((error = raidlock(rs)) != 0)
921 return error;
922
923 if ((rs->sc_flags & RAIDF_INITED) != 0) {
924 error = dk_close(dksc, dev, flags, fmt, l);
925 if ((rs->sc_flags & RAIDF_DETACH) != 0)
926 do_detach = 1;
927 } else if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
928 do_put = 1;
929
930 raidunlock(rs);
931
932 if (do_detach) {
933 /* free the pseudo device attach bits */
934 cf = device_cfdata(dksc->sc_dev);
935 error = config_detach(dksc->sc_dev, 0);
936 if (error == 0)
937 free(cf, M_RAIDFRAME);
938 } else if (do_put) {
939 raidput(rs);
940 }
941
942 return error;
943
944 }
945
946 static void
947 raid_wakeup(RF_Raid_t *raidPtr)
948 {
949 rf_lock_mutex2(raidPtr->iodone_lock);
950 rf_signal_cond2(raidPtr->iodone_cv);
951 rf_unlock_mutex2(raidPtr->iodone_lock);
952 }
953
954 static void
955 raidstrategy(struct buf *bp)
956 {
957 unsigned int unit;
958 struct raid_softc *rs;
959 struct dk_softc *dksc;
960 RF_Raid_t *raidPtr;
961
962 unit = raidunit(bp->b_dev);
963 if ((rs = raidget(unit, false)) == NULL) {
964 bp->b_error = ENXIO;
965 goto fail;
966 }
967 if ((rs->sc_flags & RAIDF_INITED) == 0) {
968 bp->b_error = ENXIO;
969 goto fail;
970 }
971 dksc = &rs->sc_dksc;
972 raidPtr = &rs->sc_r;
973
974 /* Queue IO only */
975 if (dk_strategy_defer(dksc, bp))
976 goto done;
977
978 /* schedule the IO to happen at the next convenient time */
979 raid_wakeup(raidPtr);
980
981 done:
982 return;
983
984 fail:
985 bp->b_resid = bp->b_bcount;
986 biodone(bp);
987 }
988
989 static int
990 raid_diskstart(device_t dev, struct buf *bp)
991 {
992 struct raid_softc *rs = raidsoftc(dev);
993 RF_Raid_t *raidPtr;
994
995 raidPtr = &rs->sc_r;
996 if (!raidPtr->valid) {
997 db1_printf(("raid is not valid..\n"));
998 return ENODEV;
999 }
1000
1001 /* XXX */
1002 bp->b_resid = 0;
1003
1004 return raiddoaccess(raidPtr, bp);
1005 }
1006
1007 void
1008 raiddone(RF_Raid_t *raidPtr, struct buf *bp)
1009 {
1010 struct raid_softc *rs;
1011 struct dk_softc *dksc;
1012
1013 rs = raidPtr->softc;
1014 dksc = &rs->sc_dksc;
1015
1016 dk_done(dksc, bp);
1017
1018 rf_lock_mutex2(raidPtr->mutex);
1019 raidPtr->openings++;
1020 rf_unlock_mutex2(raidPtr->mutex);
1021
1022 /* schedule more IO */
1023 raid_wakeup(raidPtr);
1024 }
1025
1026 /* ARGSUSED */
1027 static int
1028 raidread(dev_t dev, struct uio *uio, int flags)
1029 {
1030 int unit = raidunit(dev);
1031 struct raid_softc *rs;
1032
1033 if ((rs = raidget(unit, false)) == NULL)
1034 return ENXIO;
1035
1036 if ((rs->sc_flags & RAIDF_INITED) == 0)
1037 return ENXIO;
1038
1039 return physio(raidstrategy, NULL, dev, B_READ, minphys, uio);
1040
1041 }
1042
1043 /* ARGSUSED */
1044 static int
1045 raidwrite(dev_t dev, struct uio *uio, int flags)
1046 {
1047 int unit = raidunit(dev);
1048 struct raid_softc *rs;
1049
1050 if ((rs = raidget(unit, false)) == NULL)
1051 return ENXIO;
1052
1053 if ((rs->sc_flags & RAIDF_INITED) == 0)
1054 return ENXIO;
1055
1056 return physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio);
1057
1058 }
1059
1060 static int
1061 raid_detach_unlocked(struct raid_softc *rs)
1062 {
1063 struct dk_softc *dksc = &rs->sc_dksc;
1064 RF_Raid_t *raidPtr;
1065 int error;
1066
1067 raidPtr = &rs->sc_r;
1068
1069 if (DK_BUSY(dksc, 0) ||
1070 raidPtr->recon_in_progress != 0 ||
1071 raidPtr->parity_rewrite_in_progress != 0 ||
1072 raidPtr->copyback_in_progress != 0)
1073 return EBUSY;
1074
1075 if ((rs->sc_flags & RAIDF_INITED) == 0)
1076 return 0;
1077
1078 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1079
1080 if ((error = rf_Shutdown(raidPtr)) != 0)
1081 return error;
1082
1083 rs->sc_flags &= ~RAIDF_INITED;
1084
1085 /* Kill off any queued buffers */
1086 dk_drain(dksc);
1087 bufq_free(dksc->sc_bufq);
1088
1089 /* Detach the disk. */
1090 dkwedge_delall(&dksc->sc_dkdev);
1091 disk_detach(&dksc->sc_dkdev);
1092 disk_destroy(&dksc->sc_dkdev);
1093 dk_detach(dksc);
1094
1095 return 0;
1096 }
1097
1098 static bool
1099 rf_must_be_initialized(const struct raid_softc *rs, u_long cmd)
1100 {
1101 switch (cmd) {
1102 case RAIDFRAME_ADD_HOT_SPARE:
1103 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1104 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1105 case RAIDFRAME_CHECK_PARITY:
1106 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1107 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1108 case RAIDFRAME_CHECK_RECON_STATUS:
1109 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1110 case RAIDFRAME_COPYBACK:
1111 case RAIDFRAME_DELETE_COMPONENT:
1112 case RAIDFRAME_FAIL_DISK:
1113 case RAIDFRAME_GET_ACCTOTALS:
1114 case RAIDFRAME_GET_COMPONENT_LABEL:
1115 case RAIDFRAME_GET_INFO:
1116 case RAIDFRAME_GET_SIZE:
1117 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1118 case RAIDFRAME_INIT_LABELS:
1119 case RAIDFRAME_KEEP_ACCTOTALS:
1120 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1121 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1122 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1123 case RAIDFRAME_PARITYMAP_STATUS:
1124 case RAIDFRAME_REBUILD_IN_PLACE:
1125 case RAIDFRAME_REMOVE_HOT_SPARE:
1126 case RAIDFRAME_RESET_ACCTOTALS:
1127 case RAIDFRAME_REWRITEPARITY:
1128 case RAIDFRAME_SET_AUTOCONFIG:
1129 case RAIDFRAME_SET_COMPONENT_LABEL:
1130 case RAIDFRAME_SET_ROOT:
1131 return (rs->sc_flags & RAIDF_INITED) == 0;
1132 }
1133 return false;
1134 }
1135
1136 int
1137 rf_fail_disk(RF_Raid_t *raidPtr, struct rf_recon_req *rr)
1138 {
1139 struct rf_recon_req_internal *rrint;
1140
1141 if (raidPtr->Layout.map->faultsTolerated == 0) {
1142 /* Can't do this on a RAID 0!! */
1143 return EINVAL;
1144 }
1145
1146 if (rr->col < 0 || rr->col >= raidPtr->numCol) {
1147 /* bad column */
1148 return EINVAL;
1149 }
1150
1151 rf_lock_mutex2(raidPtr->mutex);
1152 if (raidPtr->status == rf_rs_reconstructing) {
1153 /* you can't fail a disk while we're reconstructing! */
1154 /* XXX wrong for RAID6 */
1155 goto out;
1156 }
1157 if ((raidPtr->Disks[rr->col].status == rf_ds_optimal) &&
1158 (raidPtr->numFailures > 0)) {
1159 /* some other component has failed. Let's not make
1160 things worse. XXX wrong for RAID6 */
1161 goto out;
1162 }
1163 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1164 /* Can't fail a spared disk! */
1165 goto out;
1166 }
1167 rf_unlock_mutex2(raidPtr->mutex);
1168
1169 /* make a copy of the recon request so that we don't rely on
1170 * the user's buffer */
1171 rrint = RF_Malloc(sizeof(*rrint));
1172 if (rrint == NULL)
1173 return(ENOMEM);
1174 rrint->col = rr->col;
1175 rrint->flags = rr->flags;
1176 rrint->raidPtr = raidPtr;
1177
1178 return RF_CREATE_THREAD(raidPtr->recon_thread, rf_ReconThread,
1179 rrint, "raid_recon");
1180 out:
1181 rf_unlock_mutex2(raidPtr->mutex);
1182 return EINVAL;
1183 }
1184
1185 static int
1186 rf_copyinspecificbuf(RF_Config_t *k_cfg)
1187 {
1188 /* allocate a buffer for the layout-specific data, and copy it in */
1189 if (k_cfg->layoutSpecificSize == 0)
1190 return 0;
1191
1192 if (k_cfg->layoutSpecificSize > 10000) {
1193 /* sanity check */
1194 return EINVAL;
1195 }
1196
1197 u_char *specific_buf;
1198 specific_buf = RF_Malloc(k_cfg->layoutSpecificSize);
1199 if (specific_buf == NULL)
1200 return ENOMEM;
1201
1202 int retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1203 k_cfg->layoutSpecificSize);
1204 if (retcode) {
1205 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1206 db1_printf(("%s: retcode=%d copyin.2\n", __func__, retcode));
1207 return retcode;
1208 }
1209
1210 k_cfg->layoutSpecific = specific_buf;
1211 return 0;
1212 }
1213
1214 static int
1215 rf_getConfiguration(struct raid_softc *rs, void *data, RF_Config_t **k_cfg)
1216 {
1217 RF_Config_t *u_cfg = *((RF_Config_t **) data);
1218
1219 if (rs->sc_r.valid) {
1220 /* There is a valid RAID set running on this unit! */
1221 printf("raid%d: Device already configured!\n", rs->sc_unit);
1222 return EINVAL;
1223 }
1224
1225 /* copy-in the configuration information */
1226 /* data points to a pointer to the configuration structure */
1227 *k_cfg = RF_Malloc(sizeof(**k_cfg));
1228 if (*k_cfg == NULL) {
1229 return ENOMEM;
1230 }
1231 int retcode = copyin(u_cfg, *k_cfg, sizeof(RF_Config_t));
1232 if (retcode == 0)
1233 return 0;
1234 RF_Free(*k_cfg, sizeof(RF_Config_t));
1235 db1_printf(("%s: retcode=%d copyin.1\n", __func__, retcode));
1236 rs->sc_flags |= RAIDF_SHUTDOWN;
1237 return retcode;
1238 }
1239
1240 int
1241 rf_construct(struct raid_softc *rs, RF_Config_t *k_cfg)
1242 {
1243 int retcode;
1244 RF_Raid_t *raidPtr = &rs->sc_r;
1245
1246 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1247
1248 if ((retcode = rf_copyinspecificbuf(k_cfg)) != 0)
1249 goto out;
1250
1251 /* should do some kind of sanity check on the configuration.
1252 * Store the sum of all the bytes in the last byte? */
1253
1254 /* configure the system */
1255
1256 /*
1257 * Clear the entire RAID descriptor, just to make sure
1258 * there is no stale data left in the case of a
1259 * reconfiguration
1260 */
1261 memset(raidPtr, 0, sizeof(*raidPtr));
1262 raidPtr->softc = rs;
1263 raidPtr->raidid = rs->sc_unit;
1264
1265 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1266
1267 if (retcode == 0) {
1268 /* allow this many simultaneous IO's to
1269 this RAID device */
1270 raidPtr->openings = RAIDOUTSTANDING;
1271
1272 raidinit(rs);
1273 raid_wakeup(raidPtr);
1274 rf_markalldirty(raidPtr);
1275 }
1276
1277 /* free the buffers. No return code here. */
1278 if (k_cfg->layoutSpecificSize) {
1279 RF_Free(k_cfg->layoutSpecific, k_cfg->layoutSpecificSize);
1280 }
1281 out:
1282 RF_Free(k_cfg, sizeof(RF_Config_t));
1283 if (retcode) {
1284 /*
1285 * If configuration failed, set sc_flags so that we
1286 * will detach the device when we close it.
1287 */
1288 rs->sc_flags |= RAIDF_SHUTDOWN;
1289 }
1290 return retcode;
1291 }
1292
1293 #if RF_DISABLED
1294 static int
1295 rf_set_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
1296 {
1297
1298 /* XXX check the label for valid stuff... */
1299 /* Note that some things *should not* get modified --
1300 the user should be re-initing the labels instead of
1301 trying to patch things.
1302 */
1303 #ifdef DEBUG
1304 int raidid = raidPtr->raidid;
1305 printf("raid%d: Got component label:\n", raidid);
1306 printf("raid%d: Version: %d\n", raidid, clabel->version);
1307 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1308 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1309 printf("raid%d: Column: %d\n", raidid, clabel->column);
1310 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1311 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1312 printf("raid%d: Status: %d\n", raidid, clabel->status);
1313 #endif /* DEBUG */
1314 clabel->row = 0;
1315 int column = clabel->column;
1316
1317 if ((column < 0) || (column >= raidPtr->numCol)) {
1318 return(EINVAL);
1319 }
1320
1321 /* XXX this isn't allowed to do anything for now :-) */
1322
1323 /* XXX and before it is, we need to fill in the rest
1324 of the fields!?!?!?! */
1325 memcpy(raidget_component_label(raidPtr, column),
1326 clabel, sizeof(*clabel));
1327 raidflush_component_label(raidPtr, column);
1328 return 0;
1329 }
1330 #endif
1331
1332 static int
1333 rf_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
1334 {
1335 /*
1336 we only want the serial number from
1337 the above. We get all the rest of the information
1338 from the config that was used to create this RAID
1339 set.
1340 */
1341
1342 raidPtr->serial_number = clabel->serial_number;
1343
1344 for (int column = 0; column < raidPtr->numCol; column++) {
1345 RF_RaidDisk_t *diskPtr = &raidPtr->Disks[column];
1346 if (RF_DEAD_DISK(diskPtr->status))
1347 continue;
1348 RF_ComponentLabel_t *ci_label = raidget_component_label(
1349 raidPtr, column);
1350 /* Zeroing this is important. */
1351 memset(ci_label, 0, sizeof(*ci_label));
1352 raid_init_component_label(raidPtr, ci_label);
1353 ci_label->serial_number = raidPtr->serial_number;
1354 ci_label->row = 0; /* we dont' pretend to support more */
1355 rf_component_label_set_partitionsize(ci_label,
1356 diskPtr->partitionSize);
1357 ci_label->column = column;
1358 raidflush_component_label(raidPtr, column);
1359 /* XXXjld what about the spares? */
1360 }
1361
1362 return 0;
1363 }
1364
1365 static int
1366 rf_rebuild_in_place(RF_Raid_t *raidPtr, RF_SingleComponent_t *componentPtr)
1367 {
1368
1369 if (raidPtr->Layout.map->faultsTolerated == 0) {
1370 /* Can't do this on a RAID 0!! */
1371 return EINVAL;
1372 }
1373
1374 if (raidPtr->recon_in_progress == 1) {
1375 /* a reconstruct is already in progress! */
1376 return EINVAL;
1377 }
1378
1379 RF_SingleComponent_t component;
1380 memcpy(&component, componentPtr, sizeof(RF_SingleComponent_t));
1381 component.row = 0; /* we don't support any more */
1382 int column = component.column;
1383
1384 if ((column < 0) || (column >= raidPtr->numCol)) {
1385 return EINVAL;
1386 }
1387
1388 rf_lock_mutex2(raidPtr->mutex);
1389 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1390 (raidPtr->numFailures > 0)) {
1391 /* XXX 0 above shouldn't be constant!!! */
1392 /* some component other than this has failed.
1393 Let's not make things worse than they already
1394 are... */
1395 printf("raid%d: Unable to reconstruct to disk at:\n",
1396 raidPtr->raidid);
1397 printf("raid%d: Col: %d Too many failures.\n",
1398 raidPtr->raidid, column);
1399 rf_unlock_mutex2(raidPtr->mutex);
1400 return EINVAL;
1401 }
1402
1403 if (raidPtr->Disks[column].status == rf_ds_reconstructing) {
1404 printf("raid%d: Unable to reconstruct to disk at:\n",
1405 raidPtr->raidid);
1406 printf("raid%d: Col: %d "
1407 "Reconstruction already occurring!\n",
1408 raidPtr->raidid, column);
1409
1410 rf_unlock_mutex2(raidPtr->mutex);
1411 return EINVAL;
1412 }
1413
1414 if (raidPtr->Disks[column].status == rf_ds_spared) {
1415 rf_unlock_mutex2(raidPtr->mutex);
1416 return EINVAL;
1417 }
1418
1419 rf_unlock_mutex2(raidPtr->mutex);
1420
1421 struct rf_recon_req_internal *rrint;
1422 rrint = RF_Malloc(sizeof(*rrint));
1423 if (rrint == NULL)
1424 return ENOMEM;
1425
1426 rrint->col = column;
1427 rrint->raidPtr = raidPtr;
1428
1429 return RF_CREATE_THREAD(raidPtr->recon_thread,
1430 rf_ReconstructInPlaceThread, rrint, "raid_reconip");
1431 }
1432
1433 static int
1434 rf_check_recon_status(RF_Raid_t *raidPtr, int *data)
1435 {
1436 /*
1437 * This makes no sense on a RAID 0, or if we are not reconstructing
1438 * so tell the user it's done.
1439 */
1440 if (raidPtr->Layout.map->faultsTolerated == 0 ||
1441 raidPtr->status != rf_rs_reconstructing) {
1442 *data = 100;
1443 return 0;
1444 }
1445 if (raidPtr->reconControl->numRUsTotal == 0) {
1446 *data = 0;
1447 return 0;
1448 }
1449 *data = (raidPtr->reconControl->numRUsComplete * 100
1450 / raidPtr->reconControl->numRUsTotal);
1451 return 0;
1452 }
1453
1454 static int
1455 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1456 {
1457 int unit = raidunit(dev);
1458 int part, pmask;
1459 struct raid_softc *rs;
1460 struct dk_softc *dksc;
1461 RF_Config_t *k_cfg;
1462 RF_Raid_t *raidPtr;
1463 RF_AccTotals_t *totals;
1464 RF_SingleComponent_t component;
1465 RF_DeviceConfig_t *d_cfg, *ucfgp;
1466 int retcode = 0;
1467 int column;
1468 RF_ComponentLabel_t *clabel;
1469 RF_SingleComponent_t *sparePtr,*componentPtr;
1470 int d;
1471
1472 if ((rs = raidget(unit, false)) == NULL)
1473 return ENXIO;
1474
1475 dksc = &rs->sc_dksc;
1476 raidPtr = &rs->sc_r;
1477
1478 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1479 (int) DISKPART(dev), (int) unit, cmd));
1480
1481 /* Must be initialized for these... */
1482 if (rf_must_be_initialized(rs, cmd))
1483 return ENXIO;
1484
1485 switch (cmd) {
1486 /* configure the system */
1487 case RAIDFRAME_CONFIGURE:
1488 if ((retcode = rf_getConfiguration(rs, data, &k_cfg)) != 0)
1489 return retcode;
1490 return rf_construct(rs, k_cfg);
1491
1492 /* shutdown the system */
1493 case RAIDFRAME_SHUTDOWN:
1494
1495 part = DISKPART(dev);
1496 pmask = (1 << part);
1497
1498 if ((retcode = raidlock(rs)) != 0)
1499 return retcode;
1500
1501 if (DK_BUSY(dksc, pmask) ||
1502 raidPtr->recon_in_progress != 0 ||
1503 raidPtr->parity_rewrite_in_progress != 0 ||
1504 raidPtr->copyback_in_progress != 0)
1505 retcode = EBUSY;
1506 else {
1507 /* detach and free on close */
1508 rs->sc_flags |= RAIDF_SHUTDOWN;
1509 retcode = 0;
1510 }
1511
1512 raidunlock(rs);
1513
1514 return retcode;
1515 case RAIDFRAME_GET_COMPONENT_LABEL:
1516 return rf_get_component_label(raidPtr, data);
1517
1518 #if RF_DISABLED
1519 case RAIDFRAME_SET_COMPONENT_LABEL:
1520 return rf_set_component_label(raidPtr, data);
1521 #endif
1522
1523 case RAIDFRAME_INIT_LABELS:
1524 return rf_init_component_label(raidPtr, data);
1525
1526 case RAIDFRAME_SET_AUTOCONFIG:
1527 d = rf_set_autoconfig(raidPtr, *(int *) data);
1528 printf("raid%d: New autoconfig value is: %d\n",
1529 raidPtr->raidid, d);
1530 *(int *) data = d;
1531 return retcode;
1532
1533 case RAIDFRAME_SET_ROOT:
1534 d = rf_set_rootpartition(raidPtr, *(int *) data);
1535 printf("raid%d: New rootpartition value is: %d\n",
1536 raidPtr->raidid, d);
1537 *(int *) data = d;
1538 return retcode;
1539
1540 /* initialize all parity */
1541 case RAIDFRAME_REWRITEPARITY:
1542
1543 if (raidPtr->Layout.map->faultsTolerated == 0) {
1544 /* Parity for RAID 0 is trivially correct */
1545 raidPtr->parity_good = RF_RAID_CLEAN;
1546 return 0;
1547 }
1548
1549 if (raidPtr->parity_rewrite_in_progress == 1) {
1550 /* Re-write is already in progress! */
1551 return EINVAL;
1552 }
1553
1554 return RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1555 rf_RewriteParityThread, raidPtr,"raid_parity");
1556
1557 case RAIDFRAME_ADD_HOT_SPARE:
1558 sparePtr = (RF_SingleComponent_t *) data;
1559 memcpy(&component, sparePtr, sizeof(RF_SingleComponent_t));
1560 return rf_add_hot_spare(raidPtr, &component);
1561
1562 case RAIDFRAME_REMOVE_HOT_SPARE:
1563 return retcode;
1564
1565 case RAIDFRAME_DELETE_COMPONENT:
1566 componentPtr = (RF_SingleComponent_t *)data;
1567 memcpy(&component, componentPtr, sizeof(RF_SingleComponent_t));
1568 return rf_delete_component(raidPtr, &component);
1569
1570 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1571 componentPtr = (RF_SingleComponent_t *)data;
1572 memcpy(&component, componentPtr, sizeof(RF_SingleComponent_t));
1573 return rf_incorporate_hot_spare(raidPtr, &component);
1574
1575 case RAIDFRAME_REBUILD_IN_PLACE:
1576 return rf_rebuild_in_place(raidPtr, data);
1577
1578 case RAIDFRAME_GET_INFO:
1579 ucfgp = *(RF_DeviceConfig_t **)data;
1580 d_cfg = RF_Malloc(sizeof(*d_cfg));
1581 if (d_cfg == NULL)
1582 return ENOMEM;
1583 retcode = rf_get_info(raidPtr, d_cfg);
1584 if (retcode == 0) {
1585 retcode = copyout(d_cfg, ucfgp, sizeof(*d_cfg));
1586 }
1587 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1588 return retcode;
1589
1590 case RAIDFRAME_CHECK_PARITY:
1591 *(int *) data = raidPtr->parity_good;
1592 return 0;
1593
1594 case RAIDFRAME_PARITYMAP_STATUS:
1595 if (rf_paritymap_ineligible(raidPtr))
1596 return EINVAL;
1597 rf_paritymap_status(raidPtr->parity_map, data);
1598 return 0;
1599
1600 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1601 if (rf_paritymap_ineligible(raidPtr))
1602 return EINVAL;
1603 if (raidPtr->parity_map == NULL)
1604 return ENOENT; /* ??? */
1605 if (rf_paritymap_set_params(raidPtr->parity_map, data, 1) != 0)
1606 return EINVAL;
1607 return 0;
1608
1609 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1610 if (rf_paritymap_ineligible(raidPtr))
1611 return EINVAL;
1612 *(int *) data = rf_paritymap_get_disable(raidPtr);
1613 return 0;
1614
1615 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1616 if (rf_paritymap_ineligible(raidPtr))
1617 return EINVAL;
1618 rf_paritymap_set_disable(raidPtr, *(int *)data);
1619 /* XXX should errors be passed up? */
1620 return 0;
1621
1622 case RAIDFRAME_RESCAN:
1623 return rf_rescan();
1624
1625 case RAIDFRAME_RESET_ACCTOTALS:
1626 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1627 return 0;
1628
1629 case RAIDFRAME_GET_ACCTOTALS:
1630 totals = (RF_AccTotals_t *) data;
1631 *totals = raidPtr->acc_totals;
1632 return 0;
1633
1634 case RAIDFRAME_KEEP_ACCTOTALS:
1635 raidPtr->keep_acc_totals = *(int *)data;
1636 return 0;
1637
1638 case RAIDFRAME_GET_SIZE:
1639 *(int *) data = raidPtr->totalSectors;
1640 return 0;
1641
1642 case RAIDFRAME_FAIL_DISK:
1643 return rf_fail_disk(raidPtr, data);
1644
1645 /* invoke a copyback operation after recon on whatever disk
1646 * needs it, if any */
1647 case RAIDFRAME_COPYBACK:
1648
1649 if (raidPtr->Layout.map->faultsTolerated == 0) {
1650 /* This makes no sense on a RAID 0!! */
1651 return EINVAL;
1652 }
1653
1654 if (raidPtr->copyback_in_progress == 1) {
1655 /* Copyback is already in progress! */
1656 return EINVAL;
1657 }
1658
1659 return RF_CREATE_THREAD(raidPtr->copyback_thread,
1660 rf_CopybackThread, raidPtr, "raid_copyback");
1661
1662 /* return the percentage completion of reconstruction */
1663 case RAIDFRAME_CHECK_RECON_STATUS:
1664 return rf_check_recon_status(raidPtr, data);
1665
1666 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1667 rf_check_recon_status_ext(raidPtr, data);
1668 return 0;
1669
1670 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1671 if (raidPtr->Layout.map->faultsTolerated == 0) {
1672 /* This makes no sense on a RAID 0, so tell the
1673 user it's done. */
1674 *(int *) data = 100;
1675 return 0;
1676 }
1677 if (raidPtr->parity_rewrite_in_progress == 1) {
1678 *(int *) data = 100 *
1679 raidPtr->parity_rewrite_stripes_done /
1680 raidPtr->Layout.numStripe;
1681 } else {
1682 *(int *) data = 100;
1683 }
1684 return 0;
1685
1686 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1687 rf_check_parityrewrite_status_ext(raidPtr, data);
1688 return 0;
1689
1690 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1691 if (raidPtr->Layout.map->faultsTolerated == 0) {
1692 /* This makes no sense on a RAID 0 */
1693 *(int *) data = 100;
1694 return 0;
1695 }
1696 if (raidPtr->copyback_in_progress == 1) {
1697 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1698 raidPtr->Layout.numStripe;
1699 } else {
1700 *(int *) data = 100;
1701 }
1702 return 0;
1703
1704 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1705 rf_check_copyback_status_ext(raidPtr, data);
1706 return 0;
1707
1708 case RAIDFRAME_SET_LAST_UNIT:
1709 for (column = 0; column < raidPtr->numCol; column++)
1710 if (raidPtr->Disks[column].status != rf_ds_optimal)
1711 return EBUSY;
1712
1713 for (column = 0; column < raidPtr->numCol; column++) {
1714 clabel = raidget_component_label(raidPtr, column);
1715 clabel->last_unit = *(int *)data;
1716 raidflush_component_label(raidPtr, column);
1717 }
1718 rs->sc_cflags |= RAIDF_UNIT_CHANGED;
1719 return 0;
1720
1721 /* the sparetable daemon calls this to wait for the kernel to
1722 * need a spare table. this ioctl does not return until a
1723 * spare table is needed. XXX -- calling mpsleep here in the
1724 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1725 * -- I should either compute the spare table in the kernel,
1726 * or have a different -- XXX XXX -- interface (a different
1727 * character device) for delivering the table -- XXX */
1728 #if RF_DISABLED
1729 case RAIDFRAME_SPARET_WAIT:
1730 rf_lock_mutex2(rf_sparet_wait_mutex);
1731 while (!rf_sparet_wait_queue)
1732 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1733 RF_SparetWait_t *waitreq = rf_sparet_wait_queue;
1734 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1735 rf_unlock_mutex2(rf_sparet_wait_mutex);
1736
1737 /* structure assignment */
1738 *((RF_SparetWait_t *) data) = *waitreq;
1739
1740 RF_Free(waitreq, sizeof(*waitreq));
1741 return 0;
1742
1743 /* wakes up a process waiting on SPARET_WAIT and puts an error
1744 * code in it that will cause the dameon to exit */
1745 case RAIDFRAME_ABORT_SPARET_WAIT:
1746 waitreq = RF_Malloc(sizeof(*waitreq));
1747 waitreq->fcol = -1;
1748 rf_lock_mutex2(rf_sparet_wait_mutex);
1749 waitreq->next = rf_sparet_wait_queue;
1750 rf_sparet_wait_queue = waitreq;
1751 rf_broadcast_cond2(rf_sparet_wait_cv);
1752 rf_unlock_mutex2(rf_sparet_wait_mutex);
1753 return 0;
1754
1755 /* used by the spare table daemon to deliver a spare table
1756 * into the kernel */
1757 case RAIDFRAME_SEND_SPARET:
1758
1759 /* install the spare table */
1760 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1761
1762 /* respond to the requestor. the return status of the spare
1763 * table installation is passed in the "fcol" field */
1764 waitred = RF_Malloc(sizeof(*waitreq));
1765 waitreq->fcol = retcode;
1766 rf_lock_mutex2(rf_sparet_wait_mutex);
1767 waitreq->next = rf_sparet_resp_queue;
1768 rf_sparet_resp_queue = waitreq;
1769 rf_broadcast_cond2(rf_sparet_resp_cv);
1770 rf_unlock_mutex2(rf_sparet_wait_mutex);
1771
1772 return retcode;
1773 #endif
1774 default:
1775 /*
1776 * Don't bother trying to load compat modules
1777 * if it is not our ioctl. This is more efficient
1778 * and makes rump tests not depend on compat code
1779 */
1780 if (IOCGROUP(cmd) != 'r')
1781 break;
1782 #ifdef _LP64
1783 if ((l->l_proc->p_flag & PK_32) != 0) {
1784 module_autoload("compat_netbsd32_raid",
1785 MODULE_CLASS_EXEC);
1786 MODULE_HOOK_CALL(raidframe_netbsd32_ioctl_hook,
1787 (rs, cmd, data), enosys(), retcode);
1788 if (retcode != EPASSTHROUGH)
1789 return retcode;
1790 }
1791 #endif
1792 module_autoload("compat_raid_80", MODULE_CLASS_EXEC);
1793 MODULE_HOOK_CALL(raidframe_ioctl_80_hook,
1794 (rs, cmd, data), enosys(), retcode);
1795 if (retcode != EPASSTHROUGH)
1796 return retcode;
1797
1798 module_autoload("compat_raid_50", MODULE_CLASS_EXEC);
1799 MODULE_HOOK_CALL(raidframe_ioctl_50_hook,
1800 (rs, cmd, data), enosys(), retcode);
1801 if (retcode != EPASSTHROUGH)
1802 return retcode;
1803 break; /* fall through to the os-specific code below */
1804
1805 }
1806
1807 if (!raidPtr->valid)
1808 return EINVAL;
1809
1810 /*
1811 * Add support for "regular" device ioctls here.
1812 */
1813
1814 switch (cmd) {
1815 case DIOCGCACHE:
1816 retcode = rf_get_component_caches(raidPtr, (int *)data);
1817 break;
1818
1819 case DIOCCACHESYNC:
1820 retcode = rf_sync_component_caches(raidPtr, *(int *)data);
1821 break;
1822
1823 default:
1824 retcode = dk_ioctl(dksc, dev, cmd, data, flag, l);
1825 break;
1826 }
1827
1828 return retcode;
1829
1830 }
1831
1832
1833 /* raidinit -- complete the rest of the initialization for the
1834 RAIDframe device. */
1835
1836
1837 static void
1838 raidinit(struct raid_softc *rs)
1839 {
1840 cfdata_t cf;
1841 unsigned int unit;
1842 struct dk_softc *dksc = &rs->sc_dksc;
1843 RF_Raid_t *raidPtr = &rs->sc_r;
1844 device_t dev;
1845
1846 unit = raidPtr->raidid;
1847
1848 /* XXX doesn't check bounds. */
1849 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%u", unit);
1850
1851 /* attach the pseudo device */
1852 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1853 cf->cf_name = raid_cd.cd_name;
1854 cf->cf_atname = raid_cd.cd_name;
1855 cf->cf_unit = unit;
1856 cf->cf_fstate = FSTATE_STAR;
1857
1858 dev = config_attach_pseudo(cf);
1859 if (dev == NULL) {
1860 printf("raid%d: config_attach_pseudo failed\n",
1861 raidPtr->raidid);
1862 free(cf, M_RAIDFRAME);
1863 return;
1864 }
1865
1866 /* provide a backpointer to the real softc */
1867 raidsoftc(dev) = rs;
1868
1869 /* disk_attach actually creates space for the CPU disklabel, among
1870 * other things, so it's critical to call this *BEFORE* we try putzing
1871 * with disklabels. */
1872 dk_init(dksc, dev, DKTYPE_RAID);
1873 disk_init(&dksc->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1874
1875 /* XXX There may be a weird interaction here between this, and
1876 * protectedSectors, as used in RAIDframe. */
1877
1878 rs->sc_size = raidPtr->totalSectors;
1879
1880 /* Attach dk and disk subsystems */
1881 dk_attach(dksc);
1882 disk_attach(&dksc->sc_dkdev);
1883 rf_set_geometry(rs, raidPtr);
1884
1885 bufq_alloc(&dksc->sc_bufq, "fcfs", BUFQ_SORT_RAWBLOCK);
1886
1887 /* mark unit as usuable */
1888 rs->sc_flags |= RAIDF_INITED;
1889
1890 dkwedge_discover(&dksc->sc_dkdev);
1891 }
1892
1893 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1894 /* wake up the daemon & tell it to get us a spare table
1895 * XXX
1896 * the entries in the queues should be tagged with the raidPtr
1897 * so that in the extremely rare case that two recons happen at once,
1898 * we know for which device were requesting a spare table
1899 * XXX
1900 *
1901 * XXX This code is not currently used. GO
1902 */
1903 int
1904 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1905 {
1906 int retcode;
1907
1908 rf_lock_mutex2(rf_sparet_wait_mutex);
1909 req->next = rf_sparet_wait_queue;
1910 rf_sparet_wait_queue = req;
1911 rf_broadcast_cond2(rf_sparet_wait_cv);
1912
1913 /* mpsleep unlocks the mutex */
1914 while (!rf_sparet_resp_queue) {
1915 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
1916 }
1917 req = rf_sparet_resp_queue;
1918 rf_sparet_resp_queue = req->next;
1919 rf_unlock_mutex2(rf_sparet_wait_mutex);
1920
1921 retcode = req->fcol;
1922 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1923 * alloc'd */
1924 return retcode;
1925 }
1926 #endif
1927
1928 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1929 * bp & passes it down.
1930 * any calls originating in the kernel must use non-blocking I/O
1931 * do some extra sanity checking to return "appropriate" error values for
1932 * certain conditions (to make some standard utilities work)
1933 *
1934 * Formerly known as: rf_DoAccessKernel
1935 */
1936 void
1937 raidstart(RF_Raid_t *raidPtr)
1938 {
1939 struct raid_softc *rs;
1940 struct dk_softc *dksc;
1941
1942 rs = raidPtr->softc;
1943 dksc = &rs->sc_dksc;
1944 /* quick check to see if anything has died recently */
1945 rf_lock_mutex2(raidPtr->mutex);
1946 if (raidPtr->numNewFailures > 0) {
1947 rf_unlock_mutex2(raidPtr->mutex);
1948 rf_update_component_labels(raidPtr,
1949 RF_NORMAL_COMPONENT_UPDATE);
1950 rf_lock_mutex2(raidPtr->mutex);
1951 raidPtr->numNewFailures--;
1952 }
1953 rf_unlock_mutex2(raidPtr->mutex);
1954
1955 if ((rs->sc_flags & RAIDF_INITED) == 0) {
1956 printf("raid%d: raidstart not ready\n", raidPtr->raidid);
1957 return;
1958 }
1959
1960 dk_start(dksc, NULL);
1961 }
1962
1963 static int
1964 raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp)
1965 {
1966 RF_SectorCount_t num_blocks, pb, sum;
1967 RF_RaidAddr_t raid_addr;
1968 daddr_t blocknum;
1969 int rc;
1970
1971 rf_lock_mutex2(raidPtr->mutex);
1972 if (raidPtr->openings == 0) {
1973 rf_unlock_mutex2(raidPtr->mutex);
1974 return EAGAIN;
1975 }
1976 rf_unlock_mutex2(raidPtr->mutex);
1977
1978 blocknum = bp->b_rawblkno;
1979
1980 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1981 (int) blocknum));
1982
1983 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1984 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1985
1986 /* *THIS* is where we adjust what block we're going to...
1987 * but DO NOT TOUCH bp->b_blkno!!! */
1988 raid_addr = blocknum;
1989
1990 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1991 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1992 sum = raid_addr + num_blocks + pb;
1993 if (1 || rf_debugKernelAccess) {
1994 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1995 (int) raid_addr, (int) sum, (int) num_blocks,
1996 (int) pb, (int) bp->b_resid));
1997 }
1998 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1999 || (sum < num_blocks) || (sum < pb)) {
2000 rc = ENOSPC;
2001 goto done;
2002 }
2003 /*
2004 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
2005 */
2006
2007 if (bp->b_bcount & raidPtr->sectorMask) {
2008 rc = ENOSPC;
2009 goto done;
2010 }
2011 db1_printf(("Calling DoAccess..\n"));
2012
2013
2014 rf_lock_mutex2(raidPtr->mutex);
2015 raidPtr->openings--;
2016 rf_unlock_mutex2(raidPtr->mutex);
2017
2018 /* don't ever condition on bp->b_flags & B_WRITE.
2019 * always condition on B_READ instead */
2020
2021 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
2022 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
2023 raid_addr, num_blocks,
2024 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
2025
2026 done:
2027 return rc;
2028 }
2029
2030 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
2031
2032 int
2033 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
2034 {
2035 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2036 struct buf *bp;
2037
2038 req->queue = queue;
2039 bp = req->bp;
2040
2041 switch (req->type) {
2042 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2043 /* XXX need to do something extra here.. */
2044 /* I'm leaving this in, as I've never actually seen it used,
2045 * and I'd like folks to report it... GO */
2046 printf("%s: WAKEUP CALLED\n", __func__);
2047 queue->numOutstanding++;
2048
2049 bp->b_flags = 0;
2050 bp->b_private = req;
2051
2052 KernelWakeupFunc(bp);
2053 break;
2054
2055 case RF_IO_TYPE_READ:
2056 case RF_IO_TYPE_WRITE:
2057 #if RF_ACC_TRACE > 0
2058 if (req->tracerec) {
2059 RF_ETIMER_START(req->tracerec->timer);
2060 }
2061 #endif
2062 InitBP(bp, queue->rf_cinfo->ci_vp,
2063 op, queue->rf_cinfo->ci_dev,
2064 req->sectorOffset, req->numSector,
2065 req->buf, KernelWakeupFunc, (void *) req,
2066 queue->raidPtr->logBytesPerSector);
2067
2068 if (rf_debugKernelAccess) {
2069 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2070 (long) bp->b_blkno));
2071 }
2072 queue->numOutstanding++;
2073 queue->last_deq_sector = req->sectorOffset;
2074 /* acc wouldn't have been let in if there were any pending
2075 * reqs at any other priority */
2076 queue->curPriority = req->priority;
2077
2078 db1_printf(("Going for %c to unit %d col %d\n",
2079 req->type, queue->raidPtr->raidid,
2080 queue->col));
2081 db1_printf(("sector %d count %d (%d bytes) %d\n",
2082 (int) req->sectorOffset, (int) req->numSector,
2083 (int) (req->numSector <<
2084 queue->raidPtr->logBytesPerSector),
2085 (int) queue->raidPtr->logBytesPerSector));
2086
2087 /*
2088 * XXX: drop lock here since this can block at
2089 * least with backing SCSI devices. Retake it
2090 * to minimize fuss with calling interfaces.
2091 */
2092
2093 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2094 bdev_strategy(bp);
2095 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2096 break;
2097
2098 default:
2099 panic("bad req->type in rf_DispatchKernelIO");
2100 }
2101 db1_printf(("Exiting from DispatchKernelIO\n"));
2102
2103 return 0;
2104 }
2105 /* this is the callback function associated with a I/O invoked from
2106 kernel code.
2107 */
2108 static void
2109 KernelWakeupFunc(struct buf *bp)
2110 {
2111 RF_DiskQueueData_t *req = NULL;
2112 RF_DiskQueue_t *queue;
2113
2114 db1_printf(("recovering the request queue:\n"));
2115
2116 req = bp->b_private;
2117
2118 queue = (RF_DiskQueue_t *) req->queue;
2119
2120 rf_lock_mutex2(queue->raidPtr->iodone_lock);
2121
2122 #if RF_ACC_TRACE > 0
2123 if (req->tracerec) {
2124 RF_ETIMER_STOP(req->tracerec->timer);
2125 RF_ETIMER_EVAL(req->tracerec->timer);
2126 rf_lock_mutex2(rf_tracing_mutex);
2127 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2128 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2129 req->tracerec->num_phys_ios++;
2130 rf_unlock_mutex2(rf_tracing_mutex);
2131 }
2132 #endif
2133
2134 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2135 * ballistic, and mark the component as hosed... */
2136
2137 if (bp->b_error != 0) {
2138 /* Mark the disk as dead */
2139 /* but only mark it once... */
2140 /* and only if it wouldn't leave this RAID set
2141 completely broken */
2142 if (((queue->raidPtr->Disks[queue->col].status ==
2143 rf_ds_optimal) ||
2144 (queue->raidPtr->Disks[queue->col].status ==
2145 rf_ds_used_spare)) &&
2146 (queue->raidPtr->numFailures <
2147 queue->raidPtr->Layout.map->faultsTolerated)) {
2148 printf("raid%d: IO Error (%d). Marking %s as failed.\n",
2149 queue->raidPtr->raidid,
2150 bp->b_error,
2151 queue->raidPtr->Disks[queue->col].devname);
2152 queue->raidPtr->Disks[queue->col].status =
2153 rf_ds_failed;
2154 queue->raidPtr->status = rf_rs_degraded;
2155 queue->raidPtr->numFailures++;
2156 queue->raidPtr->numNewFailures++;
2157 } else { /* Disk is already dead... */
2158 /* printf("Disk already marked as dead!\n"); */
2159 }
2160
2161 }
2162
2163 /* Fill in the error value */
2164 req->error = bp->b_error;
2165
2166 /* Drop this one on the "finished" queue... */
2167 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2168
2169 /* Let the raidio thread know there is work to be done. */
2170 rf_signal_cond2(queue->raidPtr->iodone_cv);
2171
2172 rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2173 }
2174
2175
2176 /*
2177 * initialize a buf structure for doing an I/O in the kernel.
2178 */
2179 static void
2180 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2181 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2182 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector)
2183 {
2184 bp->b_flags = rw_flag | (bp->b_flags & rf_b_pass);
2185 bp->b_oflags = 0;
2186 bp->b_cflags = 0;
2187 bp->b_bcount = numSect << logBytesPerSector;
2188 bp->b_bufsize = bp->b_bcount;
2189 bp->b_error = 0;
2190 bp->b_dev = dev;
2191 bp->b_data = bf;
2192 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2193 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2194 if (bp->b_bcount == 0) {
2195 panic("bp->b_bcount is zero in InitBP!!");
2196 }
2197 bp->b_iodone = cbFunc;
2198 bp->b_private = cbArg;
2199 }
2200
2201 /*
2202 * Wait interruptibly for an exclusive lock.
2203 *
2204 * XXX
2205 * Several drivers do this; it should be abstracted and made MP-safe.
2206 * (Hmm... where have we seen this warning before :-> GO )
2207 */
2208 static int
2209 raidlock(struct raid_softc *rs)
2210 {
2211 int error;
2212
2213 error = 0;
2214 mutex_enter(&rs->sc_mutex);
2215 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2216 rs->sc_flags |= RAIDF_WANTED;
2217 error = cv_wait_sig(&rs->sc_cv, &rs->sc_mutex);
2218 if (error != 0)
2219 goto done;
2220 }
2221 rs->sc_flags |= RAIDF_LOCKED;
2222 done:
2223 mutex_exit(&rs->sc_mutex);
2224 return error;
2225 }
2226 /*
2227 * Unlock and wake up any waiters.
2228 */
2229 static void
2230 raidunlock(struct raid_softc *rs)
2231 {
2232
2233 mutex_enter(&rs->sc_mutex);
2234 rs->sc_flags &= ~RAIDF_LOCKED;
2235 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2236 rs->sc_flags &= ~RAIDF_WANTED;
2237 cv_broadcast(&rs->sc_cv);
2238 }
2239 mutex_exit(&rs->sc_mutex);
2240 }
2241
2242
2243 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2244 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2245 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2246
2247 static daddr_t
2248 rf_component_info_offset(void)
2249 {
2250
2251 return RF_COMPONENT_INFO_OFFSET;
2252 }
2253
2254 static daddr_t
2255 rf_component_info_size(unsigned secsize)
2256 {
2257 daddr_t info_size;
2258
2259 KASSERT(secsize);
2260 if (secsize > RF_COMPONENT_INFO_SIZE)
2261 info_size = secsize;
2262 else
2263 info_size = RF_COMPONENT_INFO_SIZE;
2264
2265 return info_size;
2266 }
2267
2268 static daddr_t
2269 rf_parity_map_offset(RF_Raid_t *raidPtr)
2270 {
2271 daddr_t map_offset;
2272
2273 KASSERT(raidPtr->bytesPerSector);
2274 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2275 map_offset = raidPtr->bytesPerSector;
2276 else
2277 map_offset = RF_COMPONENT_INFO_SIZE;
2278 map_offset += rf_component_info_offset();
2279
2280 return map_offset;
2281 }
2282
2283 static daddr_t
2284 rf_parity_map_size(RF_Raid_t *raidPtr)
2285 {
2286 daddr_t map_size;
2287
2288 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2289 map_size = raidPtr->bytesPerSector;
2290 else
2291 map_size = RF_PARITY_MAP_SIZE;
2292
2293 return map_size;
2294 }
2295
2296 int
2297 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2298 {
2299 RF_ComponentLabel_t *clabel;
2300
2301 clabel = raidget_component_label(raidPtr, col);
2302 clabel->clean = RF_RAID_CLEAN;
2303 raidflush_component_label(raidPtr, col);
2304 return(0);
2305 }
2306
2307
2308 int
2309 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2310 {
2311 RF_ComponentLabel_t *clabel;
2312
2313 clabel = raidget_component_label(raidPtr, col);
2314 clabel->clean = RF_RAID_DIRTY;
2315 raidflush_component_label(raidPtr, col);
2316 return(0);
2317 }
2318
2319 int
2320 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2321 {
2322 KASSERT(raidPtr->bytesPerSector);
2323
2324 return raidread_component_label(raidPtr->bytesPerSector,
2325 raidPtr->Disks[col].dev,
2326 raidPtr->raid_cinfo[col].ci_vp,
2327 &raidPtr->raid_cinfo[col].ci_label);
2328 }
2329
2330 RF_ComponentLabel_t *
2331 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2332 {
2333 return &raidPtr->raid_cinfo[col].ci_label;
2334 }
2335
2336 int
2337 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2338 {
2339 RF_ComponentLabel_t *label;
2340
2341 label = &raidPtr->raid_cinfo[col].ci_label;
2342 label->mod_counter = raidPtr->mod_counter;
2343 #ifndef RF_NO_PARITY_MAP
2344 label->parity_map_modcount = label->mod_counter;
2345 #endif
2346 return raidwrite_component_label(raidPtr->bytesPerSector,
2347 raidPtr->Disks[col].dev,
2348 raidPtr->raid_cinfo[col].ci_vp, label);
2349 }
2350
2351 /*
2352 * Swap the label endianness.
2353 *
2354 * Everything in the component label is 4-byte-swapped except the version,
2355 * which is kept in the byte-swapped version at all times, and indicates
2356 * for the writer that a swap is necessary.
2357 *
2358 * For reads it is expected that out_label == clabel, but writes expect
2359 * separate labels so only the re-swapped label is written out to disk,
2360 * leaving the swapped-except-version internally.
2361 *
2362 * Only support swapping label version 2.
2363 */
2364 static void
2365 rf_swap_label(RF_ComponentLabel_t *clabel, RF_ComponentLabel_t *out_label)
2366 {
2367 int *in, *out, *in_last;
2368
2369 KASSERT(clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION));
2370
2371 /* Don't swap the label, but do copy it. */
2372 out_label->version = clabel->version;
2373
2374 in = &clabel->serial_number;
2375 in_last = &clabel->future_use2[42];
2376 out = &out_label->serial_number;
2377
2378 for (; in < in_last; in++, out++)
2379 *out = bswap32(*in);
2380 }
2381
2382 static int
2383 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2384 RF_ComponentLabel_t *clabel)
2385 {
2386 int error;
2387
2388 error = raidread_component_area(dev, b_vp, clabel,
2389 sizeof(RF_ComponentLabel_t),
2390 rf_component_info_offset(),
2391 rf_component_info_size(secsize));
2392
2393 if (error == 0 &&
2394 clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) {
2395 rf_swap_label(clabel, clabel);
2396 }
2397
2398 return error;
2399 }
2400
2401 /* ARGSUSED */
2402 static int
2403 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2404 size_t msize, daddr_t offset, daddr_t dsize)
2405 {
2406 struct buf *bp;
2407 int error;
2408
2409 /* XXX should probably ensure that we don't try to do this if
2410 someone has changed rf_protected_sectors. */
2411
2412 if (b_vp == NULL) {
2413 /* For whatever reason, this component is not valid.
2414 Don't try to read a component label from it. */
2415 return(EINVAL);
2416 }
2417
2418 /* get a block of the appropriate size... */
2419 bp = geteblk((int)dsize);
2420 bp->b_dev = dev;
2421
2422 /* get our ducks in a row for the read */
2423 bp->b_blkno = offset / DEV_BSIZE;
2424 bp->b_bcount = dsize;
2425 bp->b_flags |= B_READ;
2426 bp->b_resid = dsize;
2427
2428 bdev_strategy(bp);
2429 error = biowait(bp);
2430
2431 if (!error) {
2432 memcpy(data, bp->b_data, msize);
2433 }
2434
2435 brelse(bp, 0);
2436 return(error);
2437 }
2438
2439 static int
2440 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2441 RF_ComponentLabel_t *clabel)
2442 {
2443 RF_ComponentLabel_t *clabel_write = clabel;
2444 RF_ComponentLabel_t lclabel;
2445 int error;
2446
2447 if (clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) {
2448 clabel_write = &lclabel;
2449 rf_swap_label(clabel, clabel_write);
2450 }
2451 error = raidwrite_component_area(dev, b_vp, clabel_write,
2452 sizeof(RF_ComponentLabel_t),
2453 rf_component_info_offset(),
2454 rf_component_info_size(secsize), 0);
2455
2456 return error;
2457 }
2458
2459 /* ARGSUSED */
2460 static int
2461 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2462 size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2463 {
2464 struct buf *bp;
2465 int error;
2466
2467 /* get a block of the appropriate size... */
2468 bp = geteblk((int)dsize);
2469 bp->b_dev = dev;
2470
2471 /* get our ducks in a row for the write */
2472 bp->b_blkno = offset / DEV_BSIZE;
2473 bp->b_bcount = dsize;
2474 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2475 bp->b_resid = dsize;
2476
2477 memset(bp->b_data, 0, dsize);
2478 memcpy(bp->b_data, data, msize);
2479
2480 bdev_strategy(bp);
2481 if (asyncp)
2482 return 0;
2483 error = biowait(bp);
2484 brelse(bp, 0);
2485 if (error) {
2486 #if 1
2487 printf("Failed to write RAID component info!\n");
2488 #endif
2489 }
2490
2491 return(error);
2492 }
2493
2494 void
2495 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2496 {
2497 int c;
2498
2499 for (c = 0; c < raidPtr->numCol; c++) {
2500 /* Skip dead disks. */
2501 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2502 continue;
2503 /* XXXjld: what if an error occurs here? */
2504 raidwrite_component_area(raidPtr->Disks[c].dev,
2505 raidPtr->raid_cinfo[c].ci_vp, map,
2506 RF_PARITYMAP_NBYTE,
2507 rf_parity_map_offset(raidPtr),
2508 rf_parity_map_size(raidPtr), 0);
2509 }
2510 }
2511
2512 void
2513 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2514 {
2515 struct rf_paritymap_ondisk tmp;
2516 int c,first;
2517
2518 first=1;
2519 for (c = 0; c < raidPtr->numCol; c++) {
2520 /* Skip dead disks. */
2521 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2522 continue;
2523 raidread_component_area(raidPtr->Disks[c].dev,
2524 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2525 RF_PARITYMAP_NBYTE,
2526 rf_parity_map_offset(raidPtr),
2527 rf_parity_map_size(raidPtr));
2528 if (first) {
2529 memcpy(map, &tmp, sizeof(*map));
2530 first = 0;
2531 } else {
2532 rf_paritymap_merge(map, &tmp);
2533 }
2534 }
2535 }
2536
2537 void
2538 rf_markalldirty(RF_Raid_t *raidPtr)
2539 {
2540 RF_ComponentLabel_t *clabel;
2541 int sparecol;
2542 int c;
2543 int j;
2544 int scol = -1;
2545
2546 raidPtr->mod_counter++;
2547 for (c = 0; c < raidPtr->numCol; c++) {
2548 /* we don't want to touch (at all) a disk that has
2549 failed */
2550 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2551 clabel = raidget_component_label(raidPtr, c);
2552 if (clabel->status == rf_ds_spared) {
2553 /* XXX do something special...
2554 but whatever you do, don't
2555 try to access it!! */
2556 } else {
2557 raidmarkdirty(raidPtr, c);
2558 }
2559 }
2560 }
2561
2562 for( c = 0; c < raidPtr->numSpare ; c++) {
2563 sparecol = raidPtr->numCol + c;
2564 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2565 /*
2566
2567 we claim this disk is "optimal" if it's
2568 rf_ds_used_spare, as that means it should be
2569 directly substitutable for the disk it replaced.
2570 We note that too...
2571
2572 */
2573
2574 for(j=0;j<raidPtr->numCol;j++) {
2575 if (raidPtr->Disks[j].spareCol == sparecol) {
2576 scol = j;
2577 break;
2578 }
2579 }
2580
2581 clabel = raidget_component_label(raidPtr, sparecol);
2582 /* make sure status is noted */
2583
2584 raid_init_component_label(raidPtr, clabel);
2585
2586 clabel->row = 0;
2587 clabel->column = scol;
2588 /* Note: we *don't* change status from rf_ds_used_spare
2589 to rf_ds_optimal */
2590 /* clabel.status = rf_ds_optimal; */
2591
2592 raidmarkdirty(raidPtr, sparecol);
2593 }
2594 }
2595 }
2596
2597
2598 void
2599 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2600 {
2601 RF_ComponentLabel_t *clabel;
2602 int sparecol;
2603 int c;
2604 int j;
2605 int scol;
2606 struct raid_softc *rs = raidPtr->softc;
2607
2608 scol = -1;
2609
2610 /* XXX should do extra checks to make sure things really are clean,
2611 rather than blindly setting the clean bit... */
2612
2613 raidPtr->mod_counter++;
2614
2615 for (c = 0; c < raidPtr->numCol; c++) {
2616 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2617 clabel = raidget_component_label(raidPtr, c);
2618 /* make sure status is noted */
2619 clabel->status = rf_ds_optimal;
2620
2621 /* note what unit we are configured as */
2622 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2623 clabel->last_unit = raidPtr->raidid;
2624
2625 raidflush_component_label(raidPtr, c);
2626 if (final == RF_FINAL_COMPONENT_UPDATE) {
2627 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2628 raidmarkclean(raidPtr, c);
2629 }
2630 }
2631 }
2632 /* else we don't touch it.. */
2633 }
2634
2635 for( c = 0; c < raidPtr->numSpare ; c++) {
2636 sparecol = raidPtr->numCol + c;
2637 /* Need to ensure that the reconstruct actually completed! */
2638 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2639 /*
2640
2641 we claim this disk is "optimal" if it's
2642 rf_ds_used_spare, as that means it should be
2643 directly substitutable for the disk it replaced.
2644 We note that too...
2645
2646 */
2647
2648 for(j=0;j<raidPtr->numCol;j++) {
2649 if (raidPtr->Disks[j].spareCol == sparecol) {
2650 scol = j;
2651 break;
2652 }
2653 }
2654
2655 /* XXX shouldn't *really* need this... */
2656 clabel = raidget_component_label(raidPtr, sparecol);
2657 /* make sure status is noted */
2658
2659 raid_init_component_label(raidPtr, clabel);
2660
2661 clabel->column = scol;
2662 clabel->status = rf_ds_optimal;
2663 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2664 clabel->last_unit = raidPtr->raidid;
2665
2666 raidflush_component_label(raidPtr, sparecol);
2667 if (final == RF_FINAL_COMPONENT_UPDATE) {
2668 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2669 raidmarkclean(raidPtr, sparecol);
2670 }
2671 }
2672 }
2673 }
2674 }
2675
2676 void
2677 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2678 {
2679
2680 if (vp != NULL) {
2681 if (auto_configured == 1) {
2682 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2683 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2684 vput(vp);
2685
2686 } else {
2687 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2688 }
2689 }
2690 }
2691
2692
2693 void
2694 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2695 {
2696 int r,c;
2697 struct vnode *vp;
2698 int acd;
2699
2700
2701 /* We take this opportunity to close the vnodes like we should.. */
2702
2703 for (c = 0; c < raidPtr->numCol; c++) {
2704 vp = raidPtr->raid_cinfo[c].ci_vp;
2705 acd = raidPtr->Disks[c].auto_configured;
2706 rf_close_component(raidPtr, vp, acd);
2707 raidPtr->raid_cinfo[c].ci_vp = NULL;
2708 raidPtr->Disks[c].auto_configured = 0;
2709 }
2710
2711 for (r = 0; r < raidPtr->numSpare; r++) {
2712 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2713 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2714 rf_close_component(raidPtr, vp, acd);
2715 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2716 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2717 }
2718 }
2719
2720
2721 static void
2722 rf_ReconThread(struct rf_recon_req_internal *req)
2723 {
2724 int s;
2725 RF_Raid_t *raidPtr;
2726
2727 s = splbio();
2728 raidPtr = (RF_Raid_t *) req->raidPtr;
2729 raidPtr->recon_in_progress = 1;
2730
2731 if (req->flags & RF_FDFLAGS_RECON_FORCE) {
2732 raidPtr->forceRecon = 1;
2733 }
2734
2735 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2736 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2737
2738 if (req->flags & RF_FDFLAGS_RECON_FORCE) {
2739 raidPtr->forceRecon = 0;
2740 }
2741
2742 RF_Free(req, sizeof(*req));
2743
2744 raidPtr->recon_in_progress = 0;
2745 splx(s);
2746
2747 /* That's all... */
2748 kthread_exit(0); /* does not return */
2749 }
2750
2751 static void
2752 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2753 {
2754 int retcode;
2755 int s;
2756
2757 raidPtr->parity_rewrite_stripes_done = 0;
2758 raidPtr->parity_rewrite_in_progress = 1;
2759 s = splbio();
2760 retcode = rf_RewriteParity(raidPtr);
2761 splx(s);
2762 if (retcode) {
2763 printf("raid%d: Error re-writing parity (%d)!\n",
2764 raidPtr->raidid, retcode);
2765 } else {
2766 /* set the clean bit! If we shutdown correctly,
2767 the clean bit on each component label will get
2768 set */
2769 raidPtr->parity_good = RF_RAID_CLEAN;
2770 }
2771 raidPtr->parity_rewrite_in_progress = 0;
2772
2773 /* Anyone waiting for us to stop? If so, inform them... */
2774 if (raidPtr->waitShutdown) {
2775 rf_lock_mutex2(raidPtr->rad_lock);
2776 cv_broadcast(&raidPtr->parity_rewrite_cv);
2777 rf_unlock_mutex2(raidPtr->rad_lock);
2778 }
2779
2780 /* That's all... */
2781 kthread_exit(0); /* does not return */
2782 }
2783
2784
2785 static void
2786 rf_CopybackThread(RF_Raid_t *raidPtr)
2787 {
2788 int s;
2789
2790 raidPtr->copyback_in_progress = 1;
2791 s = splbio();
2792 rf_CopybackReconstructedData(raidPtr);
2793 splx(s);
2794 raidPtr->copyback_in_progress = 0;
2795
2796 /* That's all... */
2797 kthread_exit(0); /* does not return */
2798 }
2799
2800
2801 static void
2802 rf_ReconstructInPlaceThread(struct rf_recon_req_internal *req)
2803 {
2804 int s;
2805 RF_Raid_t *raidPtr;
2806
2807 s = splbio();
2808 raidPtr = req->raidPtr;
2809 raidPtr->recon_in_progress = 1;
2810
2811 if (req->flags & RF_FDFLAGS_RECON_FORCE) {
2812 raidPtr->forceRecon = 1;
2813 }
2814
2815 rf_ReconstructInPlace(raidPtr, req->col);
2816
2817 if (req->flags & RF_FDFLAGS_RECON_FORCE) {
2818 raidPtr->forceRecon = 0;
2819 }
2820
2821 RF_Free(req, sizeof(*req));
2822 raidPtr->recon_in_progress = 0;
2823 splx(s);
2824
2825 /* That's all... */
2826 kthread_exit(0); /* does not return */
2827 }
2828
2829 static RF_AutoConfig_t *
2830 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
2831 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
2832 unsigned secsize)
2833 {
2834 int good_one = 0;
2835 RF_ComponentLabel_t *clabel;
2836 RF_AutoConfig_t *ac;
2837
2838 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_WAITOK);
2839
2840 if (!raidread_component_label(secsize, dev, vp, clabel)) {
2841 /* Got the label. Does it look reasonable? */
2842 if (rf_reasonable_label(clabel, numsecs) &&
2843 (rf_component_label_partitionsize(clabel) <= size)) {
2844 #ifdef DEBUG
2845 printf("Component on: %s: %llu\n",
2846 cname, (unsigned long long)size);
2847 rf_print_component_label(clabel);
2848 #endif
2849 /* if it's reasonable, add it, else ignore it. */
2850 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
2851 M_WAITOK);
2852 strlcpy(ac->devname, cname, sizeof(ac->devname));
2853 ac->dev = dev;
2854 ac->vp = vp;
2855 ac->clabel = clabel;
2856 ac->next = ac_list;
2857 ac_list = ac;
2858 good_one = 1;
2859 }
2860 }
2861 if (!good_one) {
2862 /* cleanup */
2863 free(clabel, M_RAIDFRAME);
2864 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2865 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2866 vput(vp);
2867 }
2868 return ac_list;
2869 }
2870
2871 static RF_AutoConfig_t *
2872 rf_find_raid_components(void)
2873 {
2874 struct vnode *vp;
2875 struct disklabel label;
2876 device_t dv;
2877 deviter_t di;
2878 dev_t dev;
2879 int bmajor, bminor, wedge, rf_part_found;
2880 int error;
2881 int i;
2882 RF_AutoConfig_t *ac_list;
2883 uint64_t numsecs;
2884 unsigned secsize;
2885 int dowedges;
2886
2887 /* initialize the AutoConfig list */
2888 ac_list = NULL;
2889
2890 /*
2891 * we begin by trolling through *all* the devices on the system *twice*
2892 * first we scan for wedges, second for other devices. This avoids
2893 * using a raw partition instead of a wedge that covers the whole disk
2894 */
2895
2896 for (dowedges=1; dowedges>=0; --dowedges) {
2897 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
2898 dv = deviter_next(&di)) {
2899
2900 /* we are only interested in disks */
2901 if (device_class(dv) != DV_DISK)
2902 continue;
2903
2904 /* we don't care about floppies */
2905 if (device_is_a(dv, "fd")) {
2906 continue;
2907 }
2908
2909 /* we don't care about CDs. */
2910 if (device_is_a(dv, "cd")) {
2911 continue;
2912 }
2913
2914 /* we don't care about md. */
2915 if (device_is_a(dv, "md")) {
2916 continue;
2917 }
2918
2919 /* hdfd is the Atari/Hades floppy driver */
2920 if (device_is_a(dv, "hdfd")) {
2921 continue;
2922 }
2923
2924 /* fdisa is the Atari/Milan floppy driver */
2925 if (device_is_a(dv, "fdisa")) {
2926 continue;
2927 }
2928
2929 /* we don't care about spiflash */
2930 if (device_is_a(dv, "spiflash")) {
2931 continue;
2932 }
2933
2934 /* are we in the wedges pass ? */
2935 wedge = device_is_a(dv, "dk");
2936 if (wedge != dowedges) {
2937 continue;
2938 }
2939
2940 /* need to find the device_name_to_block_device_major stuff */
2941 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
2942
2943 rf_part_found = 0; /*No raid partition as yet*/
2944
2945 /* get a vnode for the raw partition of this disk */
2946 bminor = minor(device_unit(dv));
2947 dev = wedge ? makedev(bmajor, bminor) :
2948 MAKEDISKDEV(bmajor, bminor, RAW_PART);
2949 if (bdevvp(dev, &vp))
2950 panic("RAID can't alloc vnode");
2951
2952 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2953 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
2954
2955 if (error) {
2956 /* "Who cares." Continue looking
2957 for something that exists*/
2958 vput(vp);
2959 continue;
2960 }
2961
2962 error = getdisksize(vp, &numsecs, &secsize);
2963 if (error) {
2964 /*
2965 * Pseudo devices like vnd and cgd can be
2966 * opened but may still need some configuration.
2967 * Ignore these quietly.
2968 */
2969 if (error != ENXIO)
2970 printf("RAIDframe: can't get disk size"
2971 " for dev %s (%d)\n",
2972 device_xname(dv), error);
2973 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2974 vput(vp);
2975 continue;
2976 }
2977 if (wedge) {
2978 struct dkwedge_info dkw;
2979 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
2980 NOCRED);
2981 if (error) {
2982 printf("RAIDframe: can't get wedge info for "
2983 "dev %s (%d)\n", device_xname(dv), error);
2984 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2985 vput(vp);
2986 continue;
2987 }
2988
2989 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
2990 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2991 vput(vp);
2992 continue;
2993 }
2994
2995 VOP_UNLOCK(vp);
2996 ac_list = rf_get_component(ac_list, dev, vp,
2997 device_xname(dv), dkw.dkw_size, numsecs, secsize);
2998 rf_part_found = 1; /*There is a raid component on this disk*/
2999 continue;
3000 }
3001
3002 /* Ok, the disk exists. Go get the disklabel. */
3003 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
3004 if (error) {
3005 /*
3006 * XXX can't happen - open() would
3007 * have errored out (or faked up one)
3008 */
3009 if (error != ENOTTY)
3010 printf("RAIDframe: can't get label for dev "
3011 "%s (%d)\n", device_xname(dv), error);
3012 }
3013
3014 /* don't need this any more. We'll allocate it again
3015 a little later if we really do... */
3016 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3017 vput(vp);
3018
3019 if (error)
3020 continue;
3021
3022 rf_part_found = 0; /*No raid partitions yet*/
3023 for (i = 0; i < label.d_npartitions; i++) {
3024 char cname[sizeof(ac_list->devname)];
3025
3026 /* We only support partitions marked as RAID */
3027 if (label.d_partitions[i].p_fstype != FS_RAID)
3028 continue;
3029
3030 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
3031 if (bdevvp(dev, &vp))
3032 panic("RAID can't alloc vnode");
3033
3034 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3035 error = VOP_OPEN(vp, FREAD, NOCRED);
3036 if (error) {
3037 /* Not quite a 'whatever'. In
3038 * this situation we know
3039 * there is a FS_RAID
3040 * partition, but we can't
3041 * open it. The most likely
3042 * reason is that the
3043 * partition is already in
3044 * use by another RAID set.
3045 * So note that we've already
3046 * found a partition on this
3047 * disk so we don't attempt
3048 * to use the raw disk later. */
3049 rf_part_found = 1;
3050 vput(vp);
3051 continue;
3052 }
3053 VOP_UNLOCK(vp);
3054 snprintf(cname, sizeof(cname), "%s%c",
3055 device_xname(dv), 'a' + i);
3056 ac_list = rf_get_component(ac_list, dev, vp, cname,
3057 label.d_partitions[i].p_size, numsecs, secsize);
3058 rf_part_found = 1; /*There is at least one raid partition on this disk*/
3059 }
3060
3061 /*
3062 *If there is no raid component on this disk, either in a
3063 *disklabel or inside a wedge, check the raw partition as well,
3064 *as it is possible to configure raid components on raw disk
3065 *devices.
3066 */
3067
3068 if (!rf_part_found) {
3069 char cname[sizeof(ac_list->devname)];
3070
3071 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
3072 if (bdevvp(dev, &vp))
3073 panic("RAID can't alloc vnode");
3074
3075 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3076
3077 error = VOP_OPEN(vp, FREAD, NOCRED);
3078 if (error) {
3079 /* Whatever... */
3080 vput(vp);
3081 continue;
3082 }
3083 VOP_UNLOCK(vp);
3084 snprintf(cname, sizeof(cname), "%s%c",
3085 device_xname(dv), 'a' + RAW_PART);
3086 ac_list = rf_get_component(ac_list, dev, vp, cname,
3087 label.d_partitions[RAW_PART].p_size, numsecs, secsize);
3088 }
3089 }
3090 deviter_release(&di);
3091 }
3092 return ac_list;
3093 }
3094
3095 int
3096 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3097 {
3098
3099 if ((clabel->version==RF_COMPONENT_LABEL_VERSION_1 ||
3100 clabel->version==RF_COMPONENT_LABEL_VERSION ||
3101 clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) &&
3102 (clabel->clean == RF_RAID_CLEAN ||
3103 clabel->clean == RF_RAID_DIRTY) &&
3104 clabel->row >=0 &&
3105 clabel->column >= 0 &&
3106 clabel->num_rows > 0 &&
3107 clabel->num_columns > 0 &&
3108 clabel->row < clabel->num_rows &&
3109 clabel->column < clabel->num_columns &&
3110 clabel->blockSize > 0 &&
3111 /*
3112 * numBlocksHi may contain garbage, but it is ok since
3113 * the type is unsigned. If it is really garbage,
3114 * rf_fix_old_label_size() will fix it.
3115 */
3116 rf_component_label_numblocks(clabel) > 0) {
3117 /*
3118 * label looks reasonable enough...
3119 * let's make sure it has no old garbage.
3120 */
3121 if (numsecs)
3122 rf_fix_old_label_size(clabel, numsecs);
3123 return(1);
3124 }
3125 return(0);
3126 }
3127
3128
3129 /*
3130 * For reasons yet unknown, some old component labels have garbage in
3131 * the newer numBlocksHi region, and this causes lossage. Since those
3132 * disks will also have numsecs set to less than 32 bits of sectors,
3133 * we can determine when this corruption has occurred, and fix it.
3134 *
3135 * The exact same problem, with the same unknown reason, happens to
3136 * the partitionSizeHi member as well.
3137 */
3138 static void
3139 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3140 {
3141
3142 if (numsecs < ((uint64_t)1 << 32)) {
3143 if (clabel->numBlocksHi) {
3144 printf("WARNING: total sectors < 32 bits, yet "
3145 "numBlocksHi set\n"
3146 "WARNING: resetting numBlocksHi to zero.\n");
3147 clabel->numBlocksHi = 0;
3148 }
3149
3150 if (clabel->partitionSizeHi) {
3151 printf("WARNING: total sectors < 32 bits, yet "
3152 "partitionSizeHi set\n"
3153 "WARNING: resetting partitionSizeHi to zero.\n");
3154 clabel->partitionSizeHi = 0;
3155 }
3156 }
3157 }
3158
3159
3160 #ifdef DEBUG
3161 void
3162 rf_print_component_label(RF_ComponentLabel_t *clabel)
3163 {
3164 uint64_t numBlocks;
3165 static const char *rp[] = {
3166 "No", "Force", "Soft", "*invalid*"
3167 };
3168
3169
3170 numBlocks = rf_component_label_numblocks(clabel);
3171
3172 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3173 clabel->row, clabel->column,
3174 clabel->num_rows, clabel->num_columns);
3175 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3176 clabel->version, clabel->serial_number,
3177 clabel->mod_counter);
3178 printf(" Clean: %s Status: %d\n",
3179 clabel->clean ? "Yes" : "No", clabel->status);
3180 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3181 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3182 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3183 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3184 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3185 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]);
3186 printf(" Last configured as: raid%d\n", clabel->last_unit);
3187 #if 0
3188 printf(" Config order: %d\n", clabel->config_order);
3189 #endif
3190
3191 }
3192 #endif
3193
3194 static RF_ConfigSet_t *
3195 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3196 {
3197 RF_AutoConfig_t *ac;
3198 RF_ConfigSet_t *config_sets;
3199 RF_ConfigSet_t *cset;
3200 RF_AutoConfig_t *ac_next;
3201
3202
3203 config_sets = NULL;
3204
3205 /* Go through the AutoConfig list, and figure out which components
3206 belong to what sets. */
3207 ac = ac_list;
3208 while(ac!=NULL) {
3209 /* we're going to putz with ac->next, so save it here
3210 for use at the end of the loop */
3211 ac_next = ac->next;
3212
3213 if (config_sets == NULL) {
3214 /* will need at least this one... */
3215 config_sets = malloc(sizeof(RF_ConfigSet_t),
3216 M_RAIDFRAME, M_WAITOK);
3217 /* this one is easy :) */
3218 config_sets->ac = ac;
3219 config_sets->next = NULL;
3220 config_sets->rootable = 0;
3221 ac->next = NULL;
3222 } else {
3223 /* which set does this component fit into? */
3224 cset = config_sets;
3225 while(cset!=NULL) {
3226 if (rf_does_it_fit(cset, ac)) {
3227 /* looks like it matches... */
3228 ac->next = cset->ac;
3229 cset->ac = ac;
3230 break;
3231 }
3232 cset = cset->next;
3233 }
3234 if (cset==NULL) {
3235 /* didn't find a match above... new set..*/
3236 cset = malloc(sizeof(RF_ConfigSet_t),
3237 M_RAIDFRAME, M_WAITOK);
3238 cset->ac = ac;
3239 ac->next = NULL;
3240 cset->next = config_sets;
3241 cset->rootable = 0;
3242 config_sets = cset;
3243 }
3244 }
3245 ac = ac_next;
3246 }
3247
3248
3249 return(config_sets);
3250 }
3251
3252 static int
3253 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3254 {
3255 RF_ComponentLabel_t *clabel1, *clabel2;
3256
3257 /* If this one matches the *first* one in the set, that's good
3258 enough, since the other members of the set would have been
3259 through here too... */
3260 /* note that we are not checking partitionSize here..
3261
3262 Note that we are also not checking the mod_counters here.
3263 If everything else matches except the mod_counter, that's
3264 good enough for this test. We will deal with the mod_counters
3265 a little later in the autoconfiguration process.
3266
3267 (clabel1->mod_counter == clabel2->mod_counter) &&
3268
3269 The reason we don't check for this is that failed disks
3270 will have lower modification counts. If those disks are
3271 not added to the set they used to belong to, then they will
3272 form their own set, which may result in 2 different sets,
3273 for example, competing to be configured at raid0, and
3274 perhaps competing to be the root filesystem set. If the
3275 wrong ones get configured, or both attempt to become /,
3276 weird behaviour and or serious lossage will occur. Thus we
3277 need to bring them into the fold here, and kick them out at
3278 a later point.
3279
3280 */
3281
3282 clabel1 = cset->ac->clabel;
3283 clabel2 = ac->clabel;
3284 if ((clabel1->version == clabel2->version) &&
3285 (clabel1->serial_number == clabel2->serial_number) &&
3286 (clabel1->num_rows == clabel2->num_rows) &&
3287 (clabel1->num_columns == clabel2->num_columns) &&
3288 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3289 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3290 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3291 (clabel1->parityConfig == clabel2->parityConfig) &&
3292 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3293 (clabel1->blockSize == clabel2->blockSize) &&
3294 rf_component_label_numblocks(clabel1) ==
3295 rf_component_label_numblocks(clabel2) &&
3296 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3297 (clabel1->root_partition == clabel2->root_partition) &&
3298 (clabel1->last_unit == clabel2->last_unit) &&
3299 (clabel1->config_order == clabel2->config_order)) {
3300 /* if it get's here, it almost *has* to be a match */
3301 } else {
3302 /* it's not consistent with somebody in the set..
3303 punt */
3304 return(0);
3305 }
3306 /* all was fine.. it must fit... */
3307 return(1);
3308 }
3309
3310 static int
3311 rf_have_enough_components(RF_ConfigSet_t *cset)
3312 {
3313 RF_AutoConfig_t *ac;
3314 RF_AutoConfig_t *auto_config;
3315 RF_ComponentLabel_t *clabel;
3316 int c;
3317 int num_cols;
3318 int num_missing;
3319 int mod_counter;
3320 int mod_counter_found;
3321 int even_pair_failed;
3322 char parity_type;
3323
3324
3325 /* check to see that we have enough 'live' components
3326 of this set. If so, we can configure it if necessary */
3327
3328 num_cols = cset->ac->clabel->num_columns;
3329 parity_type = cset->ac->clabel->parityConfig;
3330
3331 /* XXX Check for duplicate components!?!?!? */
3332
3333 /* Determine what the mod_counter is supposed to be for this set. */
3334
3335 mod_counter_found = 0;
3336 mod_counter = 0;
3337 ac = cset->ac;
3338 while(ac!=NULL) {
3339 if (mod_counter_found==0) {
3340 mod_counter = ac->clabel->mod_counter;
3341 mod_counter_found = 1;
3342 } else {
3343 if (ac->clabel->mod_counter > mod_counter) {
3344 mod_counter = ac->clabel->mod_counter;
3345 }
3346 }
3347 ac = ac->next;
3348 }
3349
3350 num_missing = 0;
3351 auto_config = cset->ac;
3352
3353 even_pair_failed = 0;
3354 for(c=0; c<num_cols; c++) {
3355 ac = auto_config;
3356 while(ac!=NULL) {
3357 if ((ac->clabel->column == c) &&
3358 (ac->clabel->mod_counter == mod_counter)) {
3359 /* it's this one... */
3360 #ifdef DEBUG
3361 printf("Found: %s at %d\n",
3362 ac->devname,c);
3363 #endif
3364 break;
3365 }
3366 ac=ac->next;
3367 }
3368 if (ac==NULL) {
3369 /* Didn't find one here! */
3370 /* special case for RAID 1, especially
3371 where there are more than 2
3372 components (where RAIDframe treats
3373 things a little differently :( ) */
3374 if (parity_type == '1') {
3375 if (c%2 == 0) { /* even component */
3376 even_pair_failed = 1;
3377 } else { /* odd component. If
3378 we're failed, and
3379 so is the even
3380 component, it's
3381 "Good Night, Charlie" */
3382 if (even_pair_failed == 1) {
3383 return(0);
3384 }
3385 }
3386 } else {
3387 /* normal accounting */
3388 num_missing++;
3389 }
3390 }
3391 if ((parity_type == '1') && (c%2 == 1)) {
3392 /* Just did an even component, and we didn't
3393 bail.. reset the even_pair_failed flag,
3394 and go on to the next component.... */
3395 even_pair_failed = 0;
3396 }
3397 }
3398
3399 clabel = cset->ac->clabel;
3400
3401 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3402 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3403 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3404 /* XXX this needs to be made *much* more general */
3405 /* Too many failures */
3406 return(0);
3407 }
3408 /* otherwise, all is well, and we've got enough to take a kick
3409 at autoconfiguring this set */
3410 return(1);
3411 }
3412
3413 static void
3414 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3415 RF_Raid_t *raidPtr)
3416 {
3417 RF_ComponentLabel_t *clabel;
3418 int i;
3419
3420 clabel = ac->clabel;
3421
3422 /* 1. Fill in the common stuff */
3423 config->numCol = clabel->num_columns;
3424 config->numSpare = 0; /* XXX should this be set here? */
3425 config->sectPerSU = clabel->sectPerSU;
3426 config->SUsPerPU = clabel->SUsPerPU;
3427 config->SUsPerRU = clabel->SUsPerRU;
3428 config->parityConfig = clabel->parityConfig;
3429 /* XXX... */
3430 strcpy(config->diskQueueType,"fifo");
3431 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3432 config->layoutSpecificSize = 0; /* XXX ?? */
3433
3434 while(ac!=NULL) {
3435 /* row/col values will be in range due to the checks
3436 in reasonable_label() */
3437 strcpy(config->devnames[0][ac->clabel->column],
3438 ac->devname);
3439 ac = ac->next;
3440 }
3441
3442 for(i=0;i<RF_MAXDBGV;i++) {
3443 config->debugVars[i][0] = 0;
3444 }
3445 }
3446
3447 static int
3448 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3449 {
3450 RF_ComponentLabel_t *clabel;
3451 int column;
3452 int sparecol;
3453
3454 raidPtr->autoconfigure = new_value;
3455
3456 for(column=0; column<raidPtr->numCol; column++) {
3457 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3458 clabel = raidget_component_label(raidPtr, column);
3459 clabel->autoconfigure = new_value;
3460 raidflush_component_label(raidPtr, column);
3461 }
3462 }
3463 for(column = 0; column < raidPtr->numSpare ; column++) {
3464 sparecol = raidPtr->numCol + column;
3465 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3466 clabel = raidget_component_label(raidPtr, sparecol);
3467 clabel->autoconfigure = new_value;
3468 raidflush_component_label(raidPtr, sparecol);
3469 }
3470 }
3471 return(new_value);
3472 }
3473
3474 static int
3475 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3476 {
3477 RF_ComponentLabel_t *clabel;
3478 int column;
3479 int sparecol;
3480
3481 raidPtr->root_partition = new_value;
3482 for(column=0; column<raidPtr->numCol; column++) {
3483 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3484 clabel = raidget_component_label(raidPtr, column);
3485 clabel->root_partition = new_value;
3486 raidflush_component_label(raidPtr, column);
3487 }
3488 }
3489 for(column = 0; column < raidPtr->numSpare ; column++) {
3490 sparecol = raidPtr->numCol + column;
3491 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3492 clabel = raidget_component_label(raidPtr, sparecol);
3493 clabel->root_partition = new_value;
3494 raidflush_component_label(raidPtr, sparecol);
3495 }
3496 }
3497 return(new_value);
3498 }
3499
3500 static void
3501 rf_release_all_vps(RF_ConfigSet_t *cset)
3502 {
3503 RF_AutoConfig_t *ac;
3504
3505 ac = cset->ac;
3506 while(ac!=NULL) {
3507 /* Close the vp, and give it back */
3508 if (ac->vp) {
3509 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3510 VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
3511 vput(ac->vp);
3512 ac->vp = NULL;
3513 }
3514 ac = ac->next;
3515 }
3516 }
3517
3518
3519 static void
3520 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3521 {
3522 RF_AutoConfig_t *ac;
3523 RF_AutoConfig_t *next_ac;
3524
3525 ac = cset->ac;
3526 while(ac!=NULL) {
3527 next_ac = ac->next;
3528 /* nuke the label */
3529 free(ac->clabel, M_RAIDFRAME);
3530 /* cleanup the config structure */
3531 free(ac, M_RAIDFRAME);
3532 /* "next.." */
3533 ac = next_ac;
3534 }
3535 /* and, finally, nuke the config set */
3536 free(cset, M_RAIDFRAME);
3537 }
3538
3539
3540 void
3541 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3542 {
3543 /* avoid over-writing byteswapped version. */
3544 if (clabel->version != bswap32(RF_COMPONENT_LABEL_VERSION))
3545 clabel->version = RF_COMPONENT_LABEL_VERSION;
3546 clabel->serial_number = raidPtr->serial_number;
3547 clabel->mod_counter = raidPtr->mod_counter;
3548
3549 clabel->num_rows = 1;
3550 clabel->num_columns = raidPtr->numCol;
3551 clabel->clean = RF_RAID_DIRTY; /* not clean */
3552 clabel->status = rf_ds_optimal; /* "It's good!" */
3553
3554 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3555 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3556 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3557
3558 clabel->blockSize = raidPtr->bytesPerSector;
3559 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3560
3561 /* XXX not portable */
3562 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3563 clabel->maxOutstanding = raidPtr->maxOutstanding;
3564 clabel->autoconfigure = raidPtr->autoconfigure;
3565 clabel->root_partition = raidPtr->root_partition;
3566 clabel->last_unit = raidPtr->raidid;
3567 clabel->config_order = raidPtr->config_order;
3568
3569 #ifndef RF_NO_PARITY_MAP
3570 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3571 #endif
3572 }
3573
3574 static struct raid_softc *
3575 rf_auto_config_set(RF_ConfigSet_t *cset)
3576 {
3577 RF_Raid_t *raidPtr;
3578 RF_Config_t *config;
3579 int raidID;
3580 struct raid_softc *sc;
3581
3582 #ifdef DEBUG
3583 printf("RAID autoconfigure\n");
3584 #endif
3585
3586 /* 1. Create a config structure */
3587 config = malloc(sizeof(*config), M_RAIDFRAME, M_WAITOK|M_ZERO);
3588
3589 /*
3590 2. Figure out what RAID ID this one is supposed to live at
3591 See if we can get the same RAID dev that it was configured
3592 on last time..
3593 */
3594
3595 raidID = cset->ac->clabel->last_unit;
3596 for (sc = raidget(raidID, false); sc && sc->sc_r.valid != 0;
3597 sc = raidget(++raidID, false))
3598 continue;
3599 #ifdef DEBUG
3600 printf("Configuring raid%d:\n",raidID);
3601 #endif
3602
3603 if (sc == NULL)
3604 sc = raidget(raidID, true);
3605 raidPtr = &sc->sc_r;
3606
3607 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3608 raidPtr->softc = sc;
3609 raidPtr->raidid = raidID;
3610 raidPtr->openings = RAIDOUTSTANDING;
3611
3612 /* 3. Build the configuration structure */
3613 rf_create_configuration(cset->ac, config, raidPtr);
3614
3615 /* 4. Do the configuration */
3616 if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3617 raidinit(sc);
3618
3619 rf_markalldirty(raidPtr);
3620 raidPtr->autoconfigure = 1; /* XXX do this here? */
3621 switch (cset->ac->clabel->root_partition) {
3622 case 1: /* Force Root */
3623 case 2: /* Soft Root: root when boot partition part of raid */
3624 /*
3625 * everything configured just fine. Make a note
3626 * that this set is eligible to be root,
3627 * or forced to be root
3628 */
3629 cset->rootable = cset->ac->clabel->root_partition;
3630 /* XXX do this here? */
3631 raidPtr->root_partition = cset->rootable;
3632 break;
3633 default:
3634 break;
3635 }
3636 } else {
3637 raidput(sc);
3638 sc = NULL;
3639 }
3640
3641 /* 5. Cleanup */
3642 free(config, M_RAIDFRAME);
3643 return sc;
3644 }
3645
3646 void
3647 rf_pool_init(RF_Raid_t *raidPtr, char *w_chan, struct pool *p, size_t size, const char *pool_name,
3648 size_t xmin, size_t xmax)
3649 {
3650
3651 /* Format: raid%d_foo */
3652 snprintf(w_chan, RF_MAX_POOLNAMELEN, "raid%d_%s", raidPtr->raidid, pool_name);
3653
3654 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3655 pool_sethiwat(p, xmax);
3656 pool_prime(p, xmin);
3657 }
3658
3659
3660 /*
3661 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buffer queue
3662 * to see if there is IO pending and if that IO could possibly be done
3663 * for a given RAID set. Returns 0 if IO is waiting and can be done, 1
3664 * otherwise.
3665 *
3666 */
3667 int
3668 rf_buf_queue_check(RF_Raid_t *raidPtr)
3669 {
3670 struct raid_softc *rs;
3671 struct dk_softc *dksc;
3672
3673 rs = raidPtr->softc;
3674 dksc = &rs->sc_dksc;
3675
3676 if ((rs->sc_flags & RAIDF_INITED) == 0)
3677 return 1;
3678
3679 if (dk_strategy_pending(dksc) && raidPtr->openings > 0) {
3680 /* there is work to do */
3681 return 0;
3682 }
3683 /* default is nothing to do */
3684 return 1;
3685 }
3686
3687 int
3688 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3689 {
3690 uint64_t numsecs;
3691 unsigned secsize;
3692 int error;
3693
3694 error = getdisksize(vp, &numsecs, &secsize);
3695 if (error == 0) {
3696 diskPtr->blockSize = secsize;
3697 diskPtr->numBlocks = numsecs - rf_protectedSectors;
3698 diskPtr->partitionSize = numsecs;
3699 return 0;
3700 }
3701 return error;
3702 }
3703
3704 static int
3705 raid_match(device_t self, cfdata_t cfdata, void *aux)
3706 {
3707 return 1;
3708 }
3709
3710 static void
3711 raid_attach(device_t parent, device_t self, void *aux)
3712 {
3713 }
3714
3715
3716 static int
3717 raid_detach(device_t self, int flags)
3718 {
3719 int error;
3720 struct raid_softc *rs = raidsoftc(self);
3721
3722 if (rs == NULL)
3723 return ENXIO;
3724
3725 if ((error = raidlock(rs)) != 0)
3726 return error;
3727
3728 error = raid_detach_unlocked(rs);
3729
3730 raidunlock(rs);
3731
3732 /* XXX raid can be referenced here */
3733
3734 if (error)
3735 return error;
3736
3737 /* Free the softc */
3738 raidput(rs);
3739
3740 return 0;
3741 }
3742
3743 static void
3744 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
3745 {
3746 struct dk_softc *dksc = &rs->sc_dksc;
3747 struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
3748
3749 memset(dg, 0, sizeof(*dg));
3750
3751 dg->dg_secperunit = raidPtr->totalSectors;
3752 dg->dg_secsize = raidPtr->bytesPerSector;
3753 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
3754 dg->dg_ntracks = 4 * raidPtr->numCol;
3755
3756 disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, NULL);
3757 }
3758
3759 /*
3760 * Get cache info for all the components (including spares).
3761 * Returns intersection of all the cache flags of all disks, or first
3762 * error if any encountered.
3763 * XXXfua feature flags can change as spares are added - lock down somehow
3764 */
3765 static int
3766 rf_get_component_caches(RF_Raid_t *raidPtr, int *data)
3767 {
3768 int c;
3769 int error;
3770 int dkwhole = 0, dkpart;
3771
3772 for (c = 0; c < raidPtr->numCol + raidPtr->numSpare; c++) {
3773 /*
3774 * Check any non-dead disk, even when currently being
3775 * reconstructed.
3776 */
3777 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)
3778 || raidPtr->Disks[c].status == rf_ds_reconstructing) {
3779 error = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp,
3780 DIOCGCACHE, &dkpart, FREAD, NOCRED);
3781 if (error) {
3782 if (error != ENODEV) {
3783 printf("raid%d: get cache for component %s failed\n",
3784 raidPtr->raidid,
3785 raidPtr->Disks[c].devname);
3786 }
3787
3788 return error;
3789 }
3790
3791 if (c == 0)
3792 dkwhole = dkpart;
3793 else
3794 dkwhole = DKCACHE_COMBINE(dkwhole, dkpart);
3795 }
3796 }
3797
3798 *data = dkwhole;
3799
3800 return 0;
3801 }
3802
3803 /*
3804 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3805 * We end up returning whatever error was returned by the first cache flush
3806 * that fails.
3807 */
3808
3809 static int
3810 rf_sync_component_cache(RF_Raid_t *raidPtr, int c, int force)
3811 {
3812 int e = 0;
3813 for (int i = 0; i < 5; i++) {
3814 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3815 &force, FWRITE, NOCRED);
3816 if (!e || e == ENODEV)
3817 return e;
3818 printf("raid%d: cache flush[%d] to component %s failed (%d)\n",
3819 raidPtr->raidid, i, raidPtr->Disks[c].devname, e);
3820 }
3821 return e;
3822 }
3823
3824 int
3825 rf_sync_component_caches(RF_Raid_t *raidPtr, int force)
3826 {
3827 int c, error;
3828
3829 error = 0;
3830 for (c = 0; c < raidPtr->numCol; c++) {
3831 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3832 int e = rf_sync_component_cache(raidPtr, c, force);
3833 if (e && !error)
3834 error = e;
3835 }
3836 }
3837
3838 for (c = 0; c < raidPtr->numSpare ; c++) {
3839 int sparecol = raidPtr->numCol + c;
3840 /* Need to ensure that the reconstruct actually completed! */
3841 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3842 int e = rf_sync_component_cache(raidPtr, sparecol,
3843 force);
3844 if (e && !error)
3845 error = e;
3846 }
3847 }
3848 return error;
3849 }
3850
3851 /* Fill in info with the current status */
3852 void
3853 rf_check_recon_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3854 {
3855
3856 memset(info, 0, sizeof(*info));
3857
3858 if (raidPtr->status != rf_rs_reconstructing) {
3859 info->total = 100;
3860 info->completed = 100;
3861 } else {
3862 info->total = raidPtr->reconControl->numRUsTotal;
3863 info->completed = raidPtr->reconControl->numRUsComplete;
3864 }
3865 info->remaining = info->total - info->completed;
3866 }
3867
3868 /* Fill in info with the current status */
3869 void
3870 rf_check_parityrewrite_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3871 {
3872
3873 memset(info, 0, sizeof(*info));
3874
3875 if (raidPtr->parity_rewrite_in_progress == 1) {
3876 info->total = raidPtr->Layout.numStripe;
3877 info->completed = raidPtr->parity_rewrite_stripes_done;
3878 } else {
3879 info->completed = 100;
3880 info->total = 100;
3881 }
3882 info->remaining = info->total - info->completed;
3883 }
3884
3885 /* Fill in info with the current status */
3886 void
3887 rf_check_copyback_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3888 {
3889
3890 memset(info, 0, sizeof(*info));
3891
3892 if (raidPtr->copyback_in_progress == 1) {
3893 info->total = raidPtr->Layout.numStripe;
3894 info->completed = raidPtr->copyback_stripes_done;
3895 info->remaining = info->total - info->completed;
3896 } else {
3897 info->remaining = 0;
3898 info->completed = 100;
3899 info->total = 100;
3900 }
3901 }
3902
3903 /* Fill in config with the current info */
3904 int
3905 rf_get_info(RF_Raid_t *raidPtr, RF_DeviceConfig_t *config)
3906 {
3907 int d, i, j;
3908
3909 if (!raidPtr->valid)
3910 return ENODEV;
3911 config->cols = raidPtr->numCol;
3912 config->ndevs = raidPtr->numCol;
3913 if (config->ndevs >= RF_MAX_DISKS)
3914 return ENOMEM;
3915 config->nspares = raidPtr->numSpare;
3916 if (config->nspares >= RF_MAX_DISKS)
3917 return ENOMEM;
3918 config->maxqdepth = raidPtr->maxQueueDepth;
3919 d = 0;
3920 for (j = 0; j < config->cols; j++) {
3921 config->devs[d] = raidPtr->Disks[j];
3922 d++;
3923 }
3924 for (j = config->cols, i = 0; i < config->nspares; i++, j++) {
3925 config->spares[i] = raidPtr->Disks[j];
3926 if (config->spares[i].status == rf_ds_rebuilding_spare) {
3927 /* XXX: raidctl(8) expects to see this as a used spare */
3928 config->spares[i].status = rf_ds_used_spare;
3929 }
3930 }
3931 return 0;
3932 }
3933
3934 int
3935 rf_get_component_label(RF_Raid_t *raidPtr, void *data)
3936 {
3937 RF_ComponentLabel_t *clabel = (RF_ComponentLabel_t *)data;
3938 RF_ComponentLabel_t *raid_clabel;
3939 int column = clabel->column;
3940
3941 if ((column < 0) || (column >= raidPtr->numCol + raidPtr->numSpare))
3942 return EINVAL;
3943 raid_clabel = raidget_component_label(raidPtr, column);
3944 memcpy(clabel, raid_clabel, sizeof *clabel);
3945 /* Fix-up for userland. */
3946 if (clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION))
3947 clabel->version = RF_COMPONENT_LABEL_VERSION;
3948
3949 return 0;
3950 }
3951
3952 /*
3953 * Module interface
3954 */
3955
3956 MODULE(MODULE_CLASS_DRIVER, raid, "dk_subr,bufq_fcfs");
3957
3958 #ifdef _MODULE
3959 CFDRIVER_DECL(raid, DV_DISK, NULL);
3960 #endif
3961
3962 static int raid_modcmd(modcmd_t, void *);
3963 static int raid_modcmd_init(void);
3964 static int raid_modcmd_fini(void);
3965
3966 static int
3967 raid_modcmd(modcmd_t cmd, void *data)
3968 {
3969 int error;
3970
3971 error = 0;
3972 switch (cmd) {
3973 case MODULE_CMD_INIT:
3974 error = raid_modcmd_init();
3975 break;
3976 case MODULE_CMD_FINI:
3977 error = raid_modcmd_fini();
3978 break;
3979 default:
3980 error = ENOTTY;
3981 break;
3982 }
3983 return error;
3984 }
3985
3986 static int
3987 raid_modcmd_init(void)
3988 {
3989 int error;
3990 int bmajor, cmajor;
3991
3992 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
3993 mutex_enter(&raid_lock);
3994 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
3995 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
3996 rf_init_cond2(rf_sparet_wait_cv, "sparetw");
3997 rf_init_cond2(rf_sparet_resp_cv, "rfgst");
3998
3999 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
4000 #endif
4001
4002 bmajor = cmajor = -1;
4003 error = devsw_attach("raid", &raid_bdevsw, &bmajor,
4004 &raid_cdevsw, &cmajor);
4005 if (error != 0 && error != EEXIST) {
4006 aprint_error("%s: devsw_attach failed %d\n", __func__, error);
4007 mutex_exit(&raid_lock);
4008 return error;
4009 }
4010 #ifdef _MODULE
4011 error = config_cfdriver_attach(&raid_cd);
4012 if (error != 0) {
4013 aprint_error("%s: config_cfdriver_attach failed %d\n",
4014 __func__, error);
4015 devsw_detach(&raid_bdevsw, &raid_cdevsw);
4016 mutex_exit(&raid_lock);
4017 return error;
4018 }
4019 #endif
4020 error = config_cfattach_attach(raid_cd.cd_name, &raid_ca);
4021 if (error != 0) {
4022 aprint_error("%s: config_cfattach_attach failed %d\n",
4023 __func__, error);
4024 #ifdef _MODULE
4025 config_cfdriver_detach(&raid_cd);
4026 #endif
4027 devsw_detach(&raid_bdevsw, &raid_cdevsw);
4028 mutex_exit(&raid_lock);
4029 return error;
4030 }
4031
4032 raidautoconfigdone = false;
4033
4034 mutex_exit(&raid_lock);
4035
4036 if (error == 0) {
4037 if (rf_BootRaidframe(true) == 0)
4038 aprint_verbose("Kernelized RAIDframe activated\n");
4039 else
4040 panic("Serious error activating RAID!!");
4041 }
4042
4043 /*
4044 * Register a finalizer which will be used to auto-config RAID
4045 * sets once all real hardware devices have been found.
4046 */
4047 error = config_finalize_register(NULL, rf_autoconfig);
4048 if (error != 0) {
4049 aprint_error("WARNING: unable to register RAIDframe "
4050 "finalizer\n");
4051 error = 0;
4052 }
4053
4054 return error;
4055 }
4056
4057 static int
4058 raid_modcmd_fini(void)
4059 {
4060 int error;
4061
4062 mutex_enter(&raid_lock);
4063
4064 /* Don't allow unload if raid device(s) exist. */
4065 if (!LIST_EMPTY(&raids)) {
4066 mutex_exit(&raid_lock);
4067 return EBUSY;
4068 }
4069
4070 error = config_cfattach_detach(raid_cd.cd_name, &raid_ca);
4071 if (error != 0) {
4072 aprint_error("%s: cannot detach cfattach\n",__func__);
4073 mutex_exit(&raid_lock);
4074 return error;
4075 }
4076 #ifdef _MODULE
4077 error = config_cfdriver_detach(&raid_cd);
4078 if (error != 0) {
4079 aprint_error("%s: cannot detach cfdriver\n",__func__);
4080 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
4081 mutex_exit(&raid_lock);
4082 return error;
4083 }
4084 #endif
4085 error = devsw_detach(&raid_bdevsw, &raid_cdevsw);
4086 if (error != 0) {
4087 aprint_error("%s: cannot detach devsw\n",__func__);
4088 #ifdef _MODULE
4089 config_cfdriver_attach(&raid_cd);
4090 #endif
4091 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
4092 mutex_exit(&raid_lock);
4093 return error;
4094 }
4095 rf_BootRaidframe(false);
4096 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
4097 rf_destroy_mutex2(rf_sparet_wait_mutex);
4098 rf_destroy_cond2(rf_sparet_wait_cv);
4099 rf_destroy_cond2(rf_sparet_resp_cv);
4100 #endif
4101 mutex_exit(&raid_lock);
4102 mutex_destroy(&raid_lock);
4103
4104 return error;
4105 }
4106