rf_netbsdkintf.c revision 1.356.2.10 1 /* $NetBSD: rf_netbsdkintf.c,v 1.356.2.10 2019/01/18 08:50:42 pgoyette Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: cd.c 1.6 90/11/28$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
89 * School of Computer Science
90 * Carnegie Mellon University
91 * Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97 /***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.356.2.10 2019/01/18 08:50:42 pgoyette Exp $");
105
106 #ifdef _KERNEL_OPT
107 #include "opt_compat_netbsd.h"
108 #include "opt_compat_netbsd32.h"
109 #include "opt_raid_autoconfig.h"
110 #endif
111
112 #include <sys/param.h>
113 #include <sys/errno.h>
114 #include <sys/pool.h>
115 #include <sys/proc.h>
116 #include <sys/queue.h>
117 #include <sys/disk.h>
118 #include <sys/device.h>
119 #include <sys/stat.h>
120 #include <sys/ioctl.h>
121 #include <sys/fcntl.h>
122 #include <sys/systm.h>
123 #include <sys/vnode.h>
124 #include <sys/disklabel.h>
125 #include <sys/conf.h>
126 #include <sys/buf.h>
127 #include <sys/bufq.h>
128 #include <sys/reboot.h>
129 #include <sys/kauth.h>
130 #include <sys/module.h>
131 #include <sys/compat_stub.h>
132
133 #include <prop/proplib.h>
134
135 #include <dev/raidframe/raidframevar.h>
136 #include <dev/raidframe/raidframeio.h>
137 #include <dev/raidframe/rf_paritymap.h>
138
139 #include "rf_raid.h"
140 #include "rf_copyback.h"
141 #include "rf_dag.h"
142 #include "rf_dagflags.h"
143 #include "rf_desc.h"
144 #include "rf_diskqueue.h"
145 #include "rf_etimer.h"
146 #include "rf_general.h"
147 #include "rf_kintf.h"
148 #include "rf_options.h"
149 #include "rf_driver.h"
150 #include "rf_parityscan.h"
151 #include "rf_threadstuff.h"
152
153 #include "rf_compat50.h"
154
155 #include "rf_compat80.h"
156
157 #ifdef COMPAT_NETBSD32
158 #include "rf_compat32.h"
159 #endif
160
161 #include "ioconf.h"
162
163 #ifdef DEBUG
164 int rf_kdebug_level = 0;
165 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
166 #else /* DEBUG */
167 #define db1_printf(a) { }
168 #endif /* DEBUG */
169
170 #ifdef DEBUG_ROOT
171 #define DPRINTF(a, ...) printf(a, __VA_ARGS__)
172 #else
173 #define DPRINTF(a, ...)
174 #endif
175
176 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
177 static rf_declare_mutex2(rf_sparet_wait_mutex);
178 static rf_declare_cond2(rf_sparet_wait_cv);
179 static rf_declare_cond2(rf_sparet_resp_cv);
180
181 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
182 * spare table */
183 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
184 * installation process */
185 #endif
186
187 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
188
189 /* prototypes */
190 static void KernelWakeupFunc(struct buf *);
191 static void InitBP(struct buf *, struct vnode *, unsigned,
192 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
193 void *, int, struct proc *);
194 struct raid_softc;
195 static void raidinit(struct raid_softc *);
196 static int raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp);
197 static int rf_get_component_caches(RF_Raid_t *raidPtr, int *);
198
199 static int raid_match(device_t, cfdata_t, void *);
200 static void raid_attach(device_t, device_t, void *);
201 static int raid_detach(device_t, int);
202
203 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
204 daddr_t, daddr_t);
205 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
206 daddr_t, daddr_t, int);
207
208 static int raidwrite_component_label(unsigned,
209 dev_t, struct vnode *, RF_ComponentLabel_t *);
210 static int raidread_component_label(unsigned,
211 dev_t, struct vnode *, RF_ComponentLabel_t *);
212
213 static int raid_diskstart(device_t, struct buf *bp);
214 static int raid_dumpblocks(device_t, void *, daddr_t, int);
215 static int raid_lastclose(device_t);
216
217 static dev_type_open(raidopen);
218 static dev_type_close(raidclose);
219 static dev_type_read(raidread);
220 static dev_type_write(raidwrite);
221 static dev_type_ioctl(raidioctl);
222 static dev_type_strategy(raidstrategy);
223 static dev_type_dump(raiddump);
224 static dev_type_size(raidsize);
225
226 const struct bdevsw raid_bdevsw = {
227 .d_open = raidopen,
228 .d_close = raidclose,
229 .d_strategy = raidstrategy,
230 .d_ioctl = raidioctl,
231 .d_dump = raiddump,
232 .d_psize = raidsize,
233 .d_discard = nodiscard,
234 .d_flag = D_DISK
235 };
236
237 const struct cdevsw raid_cdevsw = {
238 .d_open = raidopen,
239 .d_close = raidclose,
240 .d_read = raidread,
241 .d_write = raidwrite,
242 .d_ioctl = raidioctl,
243 .d_stop = nostop,
244 .d_tty = notty,
245 .d_poll = nopoll,
246 .d_mmap = nommap,
247 .d_kqfilter = nokqfilter,
248 .d_discard = nodiscard,
249 .d_flag = D_DISK
250 };
251
252 static struct dkdriver rf_dkdriver = {
253 .d_open = raidopen,
254 .d_close = raidclose,
255 .d_strategy = raidstrategy,
256 .d_diskstart = raid_diskstart,
257 .d_dumpblocks = raid_dumpblocks,
258 .d_lastclose = raid_lastclose,
259 .d_minphys = minphys
260 };
261
262 struct raid_softc {
263 struct dk_softc sc_dksc;
264 int sc_unit;
265 int sc_flags; /* flags */
266 int sc_cflags; /* configuration flags */
267 kmutex_t sc_mutex; /* interlock mutex */
268 kcondvar_t sc_cv; /* and the condvar */
269 uint64_t sc_size; /* size of the raid device */
270 char sc_xname[20]; /* XXX external name */
271 RF_Raid_t sc_r;
272 LIST_ENTRY(raid_softc) sc_link;
273 };
274 /* sc_flags */
275 #define RAIDF_INITED 0x01 /* unit has been initialized */
276 #define RAIDF_SHUTDOWN 0x02 /* unit is being shutdown */
277 #define RAIDF_DETACH 0x04 /* detach after final close */
278 #define RAIDF_WANTED 0x08 /* someone waiting to obtain a lock */
279 #define RAIDF_LOCKED 0x10 /* unit is locked */
280 #define RAIDF_UNIT_CHANGED 0x20 /* unit is being changed */
281
282 #define raidunit(x) DISKUNIT(x)
283 #define raidsoftc(dev) (((struct raid_softc *)device_private(dev))->sc_r.softc)
284
285 extern struct cfdriver raid_cd;
286 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
287 raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
288 DVF_DETACH_SHUTDOWN);
289
290 /* Internal representation of a rf_recon_req */
291 struct rf_recon_req_internal {
292 RF_RowCol_t col;
293 RF_ReconReqFlags_t flags;
294 void *raidPtr;
295 };
296
297 /*
298 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
299 * Be aware that large numbers can allow the driver to consume a lot of
300 * kernel memory, especially on writes, and in degraded mode reads.
301 *
302 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
303 * a single 64K write will typically require 64K for the old data,
304 * 64K for the old parity, and 64K for the new parity, for a total
305 * of 192K (if the parity buffer is not re-used immediately).
306 * Even it if is used immediately, that's still 128K, which when multiplied
307 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
308 *
309 * Now in degraded mode, for example, a 64K read on the above setup may
310 * require data reconstruction, which will require *all* of the 4 remaining
311 * disks to participate -- 4 * 32K/disk == 128K again.
312 */
313
314 #ifndef RAIDOUTSTANDING
315 #define RAIDOUTSTANDING 6
316 #endif
317
318 #define RAIDLABELDEV(dev) \
319 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
320
321 /* declared here, and made public, for the benefit of KVM stuff.. */
322
323 static int raidlock(struct raid_softc *);
324 static void raidunlock(struct raid_softc *);
325
326 static int raid_detach_unlocked(struct raid_softc *);
327
328 static void rf_markalldirty(RF_Raid_t *);
329 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
330
331 void rf_ReconThread(struct rf_recon_req_internal *);
332 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
333 void rf_CopybackThread(RF_Raid_t *raidPtr);
334 void rf_ReconstructInPlaceThread(struct rf_recon_req_internal *);
335 int rf_autoconfig(device_t);
336 void rf_buildroothack(RF_ConfigSet_t *);
337
338 RF_AutoConfig_t *rf_find_raid_components(void);
339 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
340 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
341 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
342 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
343 int rf_set_autoconfig(RF_Raid_t *, int);
344 int rf_set_rootpartition(RF_Raid_t *, int);
345 void rf_release_all_vps(RF_ConfigSet_t *);
346 void rf_cleanup_config_set(RF_ConfigSet_t *);
347 int rf_have_enough_components(RF_ConfigSet_t *);
348 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
349 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
350
351 /*
352 * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
353 * Note that this is overridden by having RAID_AUTOCONFIG as an option
354 * in the kernel config file.
355 */
356 #ifdef RAID_AUTOCONFIG
357 int raidautoconfig = 1;
358 #else
359 int raidautoconfig = 0;
360 #endif
361 static bool raidautoconfigdone = false;
362
363 struct RF_Pools_s rf_pools;
364
365 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
366 static kmutex_t raid_lock;
367
368 static struct raid_softc *
369 raidcreate(int unit) {
370 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
371 sc->sc_unit = unit;
372 cv_init(&sc->sc_cv, "raidunit");
373 mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_NONE);
374 return sc;
375 }
376
377 static void
378 raiddestroy(struct raid_softc *sc) {
379 cv_destroy(&sc->sc_cv);
380 mutex_destroy(&sc->sc_mutex);
381 kmem_free(sc, sizeof(*sc));
382 }
383
384 static struct raid_softc *
385 raidget(int unit, bool create) {
386 struct raid_softc *sc;
387 if (unit < 0) {
388 #ifdef DIAGNOSTIC
389 panic("%s: unit %d!", __func__, unit);
390 #endif
391 return NULL;
392 }
393 mutex_enter(&raid_lock);
394 LIST_FOREACH(sc, &raids, sc_link) {
395 if (sc->sc_unit == unit) {
396 mutex_exit(&raid_lock);
397 return sc;
398 }
399 }
400 mutex_exit(&raid_lock);
401 if (!create)
402 return NULL;
403 if ((sc = raidcreate(unit)) == NULL)
404 return NULL;
405 mutex_enter(&raid_lock);
406 LIST_INSERT_HEAD(&raids, sc, sc_link);
407 mutex_exit(&raid_lock);
408 return sc;
409 }
410
411 static void
412 raidput(struct raid_softc *sc) {
413 mutex_enter(&raid_lock);
414 LIST_REMOVE(sc, sc_link);
415 mutex_exit(&raid_lock);
416 raiddestroy(sc);
417 }
418
419 void
420 raidattach(int num)
421 {
422
423 /*
424 * Device attachment and associated initialization now occurs
425 * as part of the module initialization.
426 */
427 }
428
429 int
430 rf_autoconfig(device_t self)
431 {
432 RF_AutoConfig_t *ac_list;
433 RF_ConfigSet_t *config_sets;
434
435 if (!raidautoconfig || raidautoconfigdone == true)
436 return (0);
437
438 /* XXX This code can only be run once. */
439 raidautoconfigdone = true;
440
441 #ifdef __HAVE_CPU_BOOTCONF
442 /*
443 * 0. find the boot device if needed first so we can use it later
444 * this needs to be done before we autoconfigure any raid sets,
445 * because if we use wedges we are not going to be able to open
446 * the boot device later
447 */
448 if (booted_device == NULL)
449 cpu_bootconf();
450 #endif
451 /* 1. locate all RAID components on the system */
452 aprint_debug("Searching for RAID components...\n");
453 ac_list = rf_find_raid_components();
454
455 /* 2. Sort them into their respective sets. */
456 config_sets = rf_create_auto_sets(ac_list);
457
458 /*
459 * 3. Evaluate each set and configure the valid ones.
460 * This gets done in rf_buildroothack().
461 */
462 rf_buildroothack(config_sets);
463
464 return 1;
465 }
466
467 static int
468 rf_containsboot(RF_Raid_t *r, device_t bdv) {
469 const char *bootname = device_xname(bdv);
470 size_t len = strlen(bootname);
471
472 for (int col = 0; col < r->numCol; col++) {
473 const char *devname = r->Disks[col].devname;
474 devname += sizeof("/dev/") - 1;
475 if (strncmp(devname, "dk", 2) == 0) {
476 const char *parent =
477 dkwedge_get_parent_name(r->Disks[col].dev);
478 if (parent != NULL)
479 devname = parent;
480 }
481 if (strncmp(devname, bootname, len) == 0) {
482 struct raid_softc *sc = r->softc;
483 aprint_debug("raid%d includes boot device %s\n",
484 sc->sc_unit, devname);
485 return 1;
486 }
487 }
488 return 0;
489 }
490
491 void
492 rf_buildroothack(RF_ConfigSet_t *config_sets)
493 {
494 RF_ConfigSet_t *cset;
495 RF_ConfigSet_t *next_cset;
496 int num_root;
497 struct raid_softc *sc, *rsc;
498 struct dk_softc *dksc;
499
500 sc = rsc = NULL;
501 num_root = 0;
502 cset = config_sets;
503 while (cset != NULL) {
504 next_cset = cset->next;
505 if (rf_have_enough_components(cset) &&
506 cset->ac->clabel->autoconfigure == 1) {
507 sc = rf_auto_config_set(cset);
508 if (sc != NULL) {
509 aprint_debug("raid%d: configured ok\n",
510 sc->sc_unit);
511 if (cset->rootable) {
512 rsc = sc;
513 num_root++;
514 }
515 } else {
516 /* The autoconfig didn't work :( */
517 aprint_debug("Autoconfig failed\n");
518 rf_release_all_vps(cset);
519 }
520 } else {
521 /* we're not autoconfiguring this set...
522 release the associated resources */
523 rf_release_all_vps(cset);
524 }
525 /* cleanup */
526 rf_cleanup_config_set(cset);
527 cset = next_cset;
528 }
529 dksc = &rsc->sc_dksc;
530
531 /* if the user has specified what the root device should be
532 then we don't touch booted_device or boothowto... */
533
534 if (rootspec != NULL)
535 return;
536
537 /* we found something bootable... */
538
539 /*
540 * XXX: The following code assumes that the root raid
541 * is the first ('a') partition. This is about the best
542 * we can do with a BSD disklabel, but we might be able
543 * to do better with a GPT label, by setting a specified
544 * attribute to indicate the root partition. We can then
545 * stash the partition number in the r->root_partition
546 * high bits (the bottom 2 bits are already used). For
547 * now we just set booted_partition to 0 when we override
548 * root.
549 */
550 if (num_root == 1) {
551 device_t candidate_root;
552 if (dksc->sc_dkdev.dk_nwedges != 0) {
553 char cname[sizeof(cset->ac->devname)];
554 /* XXX: assume partition 'a' first */
555 snprintf(cname, sizeof(cname), "%s%c",
556 device_xname(dksc->sc_dev), 'a');
557 candidate_root = dkwedge_find_by_wname(cname);
558 DPRINTF("%s: candidate wedge root=%s\n", __func__,
559 cname);
560 if (candidate_root == NULL) {
561 /*
562 * If that is not found, because we don't use
563 * disklabel, return the first dk child
564 * XXX: we can skip the 'a' check above
565 * and always do this...
566 */
567 size_t i = 0;
568 candidate_root = dkwedge_find_by_parent(
569 device_xname(dksc->sc_dev), &i);
570 }
571 DPRINTF("%s: candidate wedge root=%p\n", __func__,
572 candidate_root);
573 } else
574 candidate_root = dksc->sc_dev;
575 DPRINTF("%s: candidate root=%p\n", __func__, candidate_root);
576 DPRINTF("%s: booted_device=%p root_partition=%d "
577 "contains_boot=%d\n", __func__, booted_device,
578 rsc->sc_r.root_partition,
579 rf_containsboot(&rsc->sc_r, booted_device));
580 if (booted_device == NULL ||
581 rsc->sc_r.root_partition == 1 ||
582 rf_containsboot(&rsc->sc_r, booted_device)) {
583 booted_device = candidate_root;
584 booted_method = "raidframe/single";
585 booted_partition = 0; /* XXX assume 'a' */
586 }
587 } else if (num_root > 1) {
588 DPRINTF("%s: many roots=%d, %p\n", __func__, num_root,
589 booted_device);
590
591 /*
592 * Maybe the MD code can help. If it cannot, then
593 * setroot() will discover that we have no
594 * booted_device and will ask the user if nothing was
595 * hardwired in the kernel config file
596 */
597 if (booted_device == NULL)
598 return;
599
600 num_root = 0;
601 mutex_enter(&raid_lock);
602 LIST_FOREACH(sc, &raids, sc_link) {
603 RF_Raid_t *r = &sc->sc_r;
604 if (r->valid == 0)
605 continue;
606
607 if (r->root_partition == 0)
608 continue;
609
610 if (rf_containsboot(r, booted_device)) {
611 num_root++;
612 rsc = sc;
613 dksc = &rsc->sc_dksc;
614 }
615 }
616 mutex_exit(&raid_lock);
617
618 if (num_root == 1) {
619 booted_device = dksc->sc_dev;
620 booted_method = "raidframe/multi";
621 booted_partition = 0; /* XXX assume 'a' */
622 } else {
623 /* we can't guess.. require the user to answer... */
624 boothowto |= RB_ASKNAME;
625 }
626 }
627 }
628
629 static int
630 raidsize(dev_t dev)
631 {
632 struct raid_softc *rs;
633 struct dk_softc *dksc;
634 unsigned int unit;
635
636 unit = raidunit(dev);
637 if ((rs = raidget(unit, false)) == NULL)
638 return -1;
639 dksc = &rs->sc_dksc;
640
641 if ((rs->sc_flags & RAIDF_INITED) == 0)
642 return -1;
643
644 return dk_size(dksc, dev);
645 }
646
647 static int
648 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
649 {
650 unsigned int unit;
651 struct raid_softc *rs;
652 struct dk_softc *dksc;
653
654 unit = raidunit(dev);
655 if ((rs = raidget(unit, false)) == NULL)
656 return ENXIO;
657 dksc = &rs->sc_dksc;
658
659 if ((rs->sc_flags & RAIDF_INITED) == 0)
660 return ENODEV;
661
662 /*
663 Note that blkno is relative to this particular partition.
664 By adding adding RF_PROTECTED_SECTORS, we get a value that
665 is relative to the partition used for the underlying component.
666 */
667 blkno += RF_PROTECTED_SECTORS;
668
669 return dk_dump(dksc, dev, blkno, va, size);
670 }
671
672 static int
673 raid_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk)
674 {
675 struct raid_softc *rs = raidsoftc(dev);
676 const struct bdevsw *bdev;
677 RF_Raid_t *raidPtr;
678 int c, sparecol, j, scol, dumpto;
679 int error = 0;
680
681 raidPtr = &rs->sc_r;
682
683 /* we only support dumping to RAID 1 sets */
684 if (raidPtr->Layout.numDataCol != 1 ||
685 raidPtr->Layout.numParityCol != 1)
686 return EINVAL;
687
688 if ((error = raidlock(rs)) != 0)
689 return error;
690
691 /* figure out what device is alive.. */
692
693 /*
694 Look for a component to dump to. The preference for the
695 component to dump to is as follows:
696 1) the master
697 2) a used_spare of the master
698 3) the slave
699 4) a used_spare of the slave
700 */
701
702 dumpto = -1;
703 for (c = 0; c < raidPtr->numCol; c++) {
704 if (raidPtr->Disks[c].status == rf_ds_optimal) {
705 /* this might be the one */
706 dumpto = c;
707 break;
708 }
709 }
710
711 /*
712 At this point we have possibly selected a live master or a
713 live slave. We now check to see if there is a spared
714 master (or a spared slave), if we didn't find a live master
715 or a live slave.
716 */
717
718 for (c = 0; c < raidPtr->numSpare; c++) {
719 sparecol = raidPtr->numCol + c;
720 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
721 /* How about this one? */
722 scol = -1;
723 for(j=0;j<raidPtr->numCol;j++) {
724 if (raidPtr->Disks[j].spareCol == sparecol) {
725 scol = j;
726 break;
727 }
728 }
729 if (scol == 0) {
730 /*
731 We must have found a spared master!
732 We'll take that over anything else
733 found so far. (We couldn't have
734 found a real master before, since
735 this is a used spare, and it's
736 saying that it's replacing the
737 master.) On reboot (with
738 autoconfiguration turned on)
739 sparecol will become the 1st
740 component (component0) of this set.
741 */
742 dumpto = sparecol;
743 break;
744 } else if (scol != -1) {
745 /*
746 Must be a spared slave. We'll dump
747 to that if we havn't found anything
748 else so far.
749 */
750 if (dumpto == -1)
751 dumpto = sparecol;
752 }
753 }
754 }
755
756 if (dumpto == -1) {
757 /* we couldn't find any live components to dump to!?!?
758 */
759 error = EINVAL;
760 goto out;
761 }
762
763 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
764 if (bdev == NULL) {
765 error = ENXIO;
766 goto out;
767 }
768
769 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
770 blkno, va, nblk * raidPtr->bytesPerSector);
771
772 out:
773 raidunlock(rs);
774
775 return error;
776 }
777
778 /* ARGSUSED */
779 static int
780 raidopen(dev_t dev, int flags, int fmt,
781 struct lwp *l)
782 {
783 int unit = raidunit(dev);
784 struct raid_softc *rs;
785 struct dk_softc *dksc;
786 int error = 0;
787 int part, pmask;
788
789 if ((rs = raidget(unit, true)) == NULL)
790 return ENXIO;
791 if ((error = raidlock(rs)) != 0)
792 return (error);
793
794 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
795 error = EBUSY;
796 goto bad;
797 }
798
799 dksc = &rs->sc_dksc;
800
801 part = DISKPART(dev);
802 pmask = (1 << part);
803
804 if (!DK_BUSY(dksc, pmask) &&
805 ((rs->sc_flags & RAIDF_INITED) != 0)) {
806 /* First one... mark things as dirty... Note that we *MUST*
807 have done a configure before this. I DO NOT WANT TO BE
808 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
809 THAT THEY BELONG TOGETHER!!!!! */
810 /* XXX should check to see if we're only open for reading
811 here... If so, we needn't do this, but then need some
812 other way of keeping track of what's happened.. */
813
814 rf_markalldirty(&rs->sc_r);
815 }
816
817 if ((rs->sc_flags & RAIDF_INITED) != 0)
818 error = dk_open(dksc, dev, flags, fmt, l);
819
820 bad:
821 raidunlock(rs);
822
823 return (error);
824
825
826 }
827
828 static int
829 raid_lastclose(device_t self)
830 {
831 struct raid_softc *rs = raidsoftc(self);
832
833 /* Last one... device is not unconfigured yet.
834 Device shutdown has taken care of setting the
835 clean bits if RAIDF_INITED is not set
836 mark things as clean... */
837
838 rf_update_component_labels(&rs->sc_r,
839 RF_FINAL_COMPONENT_UPDATE);
840
841 /* pass to unlocked code */
842 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
843 rs->sc_flags |= RAIDF_DETACH;
844
845 return 0;
846 }
847
848 /* ARGSUSED */
849 static int
850 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
851 {
852 int unit = raidunit(dev);
853 struct raid_softc *rs;
854 struct dk_softc *dksc;
855 cfdata_t cf;
856 int error = 0, do_detach = 0, do_put = 0;
857
858 if ((rs = raidget(unit, false)) == NULL)
859 return ENXIO;
860 dksc = &rs->sc_dksc;
861
862 if ((error = raidlock(rs)) != 0)
863 return (error);
864
865 if ((rs->sc_flags & RAIDF_INITED) != 0) {
866 error = dk_close(dksc, dev, flags, fmt, l);
867 if ((rs->sc_flags & RAIDF_DETACH) != 0)
868 do_detach = 1;
869 } else if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
870 do_put = 1;
871
872 raidunlock(rs);
873
874 if (do_detach) {
875 /* free the pseudo device attach bits */
876 cf = device_cfdata(dksc->sc_dev);
877 error = config_detach(dksc->sc_dev, 0);
878 if (error == 0)
879 free(cf, M_RAIDFRAME);
880 } else if (do_put) {
881 raidput(rs);
882 }
883
884 return (error);
885
886 }
887
888 static void
889 raid_wakeup(RF_Raid_t *raidPtr)
890 {
891 rf_lock_mutex2(raidPtr->iodone_lock);
892 rf_signal_cond2(raidPtr->iodone_cv);
893 rf_unlock_mutex2(raidPtr->iodone_lock);
894 }
895
896 static void
897 raidstrategy(struct buf *bp)
898 {
899 unsigned int unit;
900 struct raid_softc *rs;
901 struct dk_softc *dksc;
902 RF_Raid_t *raidPtr;
903
904 unit = raidunit(bp->b_dev);
905 if ((rs = raidget(unit, false)) == NULL) {
906 bp->b_error = ENXIO;
907 goto fail;
908 }
909 if ((rs->sc_flags & RAIDF_INITED) == 0) {
910 bp->b_error = ENXIO;
911 goto fail;
912 }
913 dksc = &rs->sc_dksc;
914 raidPtr = &rs->sc_r;
915
916 /* Queue IO only */
917 if (dk_strategy_defer(dksc, bp))
918 goto done;
919
920 /* schedule the IO to happen at the next convenient time */
921 raid_wakeup(raidPtr);
922
923 done:
924 return;
925
926 fail:
927 bp->b_resid = bp->b_bcount;
928 biodone(bp);
929 }
930
931 static int
932 raid_diskstart(device_t dev, struct buf *bp)
933 {
934 struct raid_softc *rs = raidsoftc(dev);
935 RF_Raid_t *raidPtr;
936
937 raidPtr = &rs->sc_r;
938 if (!raidPtr->valid) {
939 db1_printf(("raid is not valid..\n"));
940 return ENODEV;
941 }
942
943 /* XXX */
944 bp->b_resid = 0;
945
946 return raiddoaccess(raidPtr, bp);
947 }
948
949 void
950 raiddone(RF_Raid_t *raidPtr, struct buf *bp)
951 {
952 struct raid_softc *rs;
953 struct dk_softc *dksc;
954
955 rs = raidPtr->softc;
956 dksc = &rs->sc_dksc;
957
958 dk_done(dksc, bp);
959
960 rf_lock_mutex2(raidPtr->mutex);
961 raidPtr->openings++;
962 rf_unlock_mutex2(raidPtr->mutex);
963
964 /* schedule more IO */
965 raid_wakeup(raidPtr);
966 }
967
968 /* ARGSUSED */
969 static int
970 raidread(dev_t dev, struct uio *uio, int flags)
971 {
972 int unit = raidunit(dev);
973 struct raid_softc *rs;
974
975 if ((rs = raidget(unit, false)) == NULL)
976 return ENXIO;
977
978 if ((rs->sc_flags & RAIDF_INITED) == 0)
979 return (ENXIO);
980
981 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
982
983 }
984
985 /* ARGSUSED */
986 static int
987 raidwrite(dev_t dev, struct uio *uio, int flags)
988 {
989 int unit = raidunit(dev);
990 struct raid_softc *rs;
991
992 if ((rs = raidget(unit, false)) == NULL)
993 return ENXIO;
994
995 if ((rs->sc_flags & RAIDF_INITED) == 0)
996 return (ENXIO);
997
998 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
999
1000 }
1001
1002 static int
1003 raid_detach_unlocked(struct raid_softc *rs)
1004 {
1005 struct dk_softc *dksc = &rs->sc_dksc;
1006 RF_Raid_t *raidPtr;
1007 int error;
1008
1009 raidPtr = &rs->sc_r;
1010
1011 if (DK_BUSY(dksc, 0) ||
1012 raidPtr->recon_in_progress != 0 ||
1013 raidPtr->parity_rewrite_in_progress != 0 ||
1014 raidPtr->copyback_in_progress != 0)
1015 return EBUSY;
1016
1017 if ((rs->sc_flags & RAIDF_INITED) == 0)
1018 return 0;
1019
1020 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1021
1022 if ((error = rf_Shutdown(raidPtr)) != 0)
1023 return error;
1024
1025 rs->sc_flags &= ~RAIDF_INITED;
1026
1027 /* Kill off any queued buffers */
1028 dk_drain(dksc);
1029 bufq_free(dksc->sc_bufq);
1030
1031 /* Detach the disk. */
1032 dkwedge_delall(&dksc->sc_dkdev);
1033 disk_detach(&dksc->sc_dkdev);
1034 disk_destroy(&dksc->sc_dkdev);
1035 dk_detach(dksc);
1036
1037 return 0;
1038 }
1039
1040 /* Hooks to call the 5.0 and 8.0 ioctl compat code */
1041 MODULE_CALL_HOOK_DECL(raidframe50_ioctl_hook, int,
1042 (int cmd, int initted, RF_Raid_t *raidPtr, int unit, void *data,
1043 RF_Config_t **k_cfg));
1044 MODULE_CALL_HOOK(raidframe50_ioctl_hook, int,
1045 (int cmd, int initted, RF_Raid_t *raidPtr, int unit, void *data,
1046 RF_Config_t **k_cfg),
1047 (cmd, initted, raidPtr, unit, data, k_cfg),
1048 enosys());
1049
1050 MODULE_CALL_HOOK_DECL(raidframe80_ioctl_hook, int,
1051 (int cmd, int initted, RF_Raid_t *raidPtr, int unit, void *data,
1052 RF_Config_t **k_cfg));
1053 MODULE_CALL_HOOK(raidframe80_ioctl_hook, int,
1054 (int cmd, int initted, RF_Raid_t *raidPtr, int unit, void *data,
1055 RF_Config_t **k_cfg),
1056 (cmd, initted, raidPtr, unit, data, k_cfg),
1057 enosys());
1058
1059 static int
1060 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1061 {
1062 int unit = raidunit(dev);
1063 int error = 0;
1064 int part, pmask;
1065 struct raid_softc *rs;
1066 struct dk_softc *dksc;
1067 RF_Config_t *k_cfg, *u_cfg;
1068 RF_Raid_t *raidPtr;
1069 RF_RaidDisk_t *diskPtr;
1070 RF_AccTotals_t *totals;
1071 RF_DeviceConfig_t *d_cfg, *ucfgp;
1072 u_char *specific_buf;
1073 int retcode = 0;
1074 int column;
1075 /* int raidid; */
1076 struct rf_recon_req *rr;
1077 struct rf_recon_req_internal *rrint;
1078 RF_ComponentLabel_t *clabel;
1079 RF_ComponentLabel_t *ci_label;
1080 RF_SingleComponent_t *sparePtr,*componentPtr;
1081 RF_SingleComponent_t component;
1082 int d;
1083
1084 if ((rs = raidget(unit, false)) == NULL)
1085 return ENXIO;
1086 dksc = &rs->sc_dksc;
1087 raidPtr = &rs->sc_r;
1088
1089 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1090 (int) DISKPART(dev), (int) unit, cmd));
1091
1092 /* Must be initialized for these... */
1093 switch (cmd) {
1094 case RAIDFRAME_REWRITEPARITY:
1095 case RAIDFRAME_GET_INFO:
1096 case RAIDFRAME_RESET_ACCTOTALS:
1097 case RAIDFRAME_GET_ACCTOTALS:
1098 case RAIDFRAME_KEEP_ACCTOTALS:
1099 case RAIDFRAME_GET_SIZE:
1100 case RAIDFRAME_FAIL_DISK:
1101 case RAIDFRAME_COPYBACK:
1102 case RAIDFRAME_CHECK_RECON_STATUS:
1103 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1104 case RAIDFRAME_GET_COMPONENT_LABEL:
1105 case RAIDFRAME_SET_COMPONENT_LABEL:
1106 case RAIDFRAME_ADD_HOT_SPARE:
1107 case RAIDFRAME_REMOVE_HOT_SPARE:
1108 case RAIDFRAME_INIT_LABELS:
1109 case RAIDFRAME_REBUILD_IN_PLACE:
1110 case RAIDFRAME_CHECK_PARITY:
1111 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1112 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1113 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1114 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1115 case RAIDFRAME_SET_AUTOCONFIG:
1116 case RAIDFRAME_SET_ROOT:
1117 case RAIDFRAME_DELETE_COMPONENT:
1118 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1119 case RAIDFRAME_PARITYMAP_STATUS:
1120 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1121 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1122 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1123 #ifdef COMPAT_NETBSD32
1124 #ifdef _LP64
1125 case RAIDFRAME_GET_INFO32:
1126 #endif
1127 #endif
1128 if ((rs->sc_flags & RAIDF_INITED) == 0)
1129 return (ENXIO);
1130 }
1131
1132 /*
1133 * Handle compat ioctl calls
1134 *
1135 * * If compat code is not loaded, stub returns ENOSYS and we just
1136 * check the "native" cmd's
1137 * * If compat code is loaded but does not recognize the cmd, it
1138 * returns EPASSTHROUGH, and we just check the "native" cmd's
1139 * * If compat code returns EAGAIN, we need to finish via config
1140 * * Otherwise the cmd has been handled and we just return
1141 */
1142 retcode = raidframe50_ioctl_hook_call(cmd,
1143 (rs->sc_flags & RAIDF_INITED), raidPtr, unit, data, &k_cfg);
1144 if (retcode == ENOSYS)
1145 retcode = 0;
1146 else if (retcode == EAGAIN)
1147 goto config;
1148 else if (retcode != EPASSTHROUGH)
1149 return retcode;
1150
1151 retcode = raidframe80_ioctl_hook_call(cmd,
1152 (rs->sc_flags & RAIDF_INITED), raidPtr, unit, data, &k_cfg);
1153 if (retcode == ENOSYS)
1154 retcode = 0;
1155 else if (retcode == EAGAIN)
1156 goto config;
1157 else if (retcode != EPASSTHROUGH)
1158 return retcode;
1159
1160 /*
1161 * XXX
1162 * Handling of FAIL_DISK80 command requires us to retain retcode's
1163 * value of EPASSTHROUGH. If you add more compat code later, make
1164 * sure you don't overwrite retcode and break this!
1165 */
1166
1167 switch (cmd) {
1168
1169 /* configure the system */
1170 case RAIDFRAME_CONFIGURE:
1171 #ifdef COMPAT_NETBSD32
1172 #ifdef _LP64
1173 case RAIDFRAME_CONFIGURE32:
1174 #endif
1175 #endif
1176
1177 if (raidPtr->valid) {
1178 /* There is a valid RAID set running on this unit! */
1179 printf("raid%d: Device already configured!\n",unit);
1180 return(EINVAL);
1181 }
1182
1183 /* copy-in the configuration information */
1184 /* data points to a pointer to the configuration structure */
1185
1186 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1187 if (k_cfg == NULL) {
1188 return (ENOMEM);
1189 }
1190 #ifdef COMPAT_NETBSD32
1191 #ifdef _LP64
1192 if (cmd == RAIDFRAME_CONFIGURE32 &&
1193 (l->l_proc->p_flag & PK_32) != 0)
1194 retcode = rf_config_netbsd32(data, k_cfg);
1195 else
1196 #endif
1197 #endif
1198 {
1199 u_cfg = *((RF_Config_t **) data);
1200 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1201 }
1202 if (retcode) {
1203 RF_Free(k_cfg, sizeof(RF_Config_t));
1204 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1205 retcode));
1206 goto no_config;
1207 }
1208 goto config;
1209 config:
1210 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1211
1212 /* allocate a buffer for the layout-specific data, and copy it
1213 * in */
1214 if (k_cfg->layoutSpecificSize) {
1215 if (k_cfg->layoutSpecificSize > 10000) {
1216 /* sanity check */
1217 RF_Free(k_cfg, sizeof(RF_Config_t));
1218 retcode = EINVAL;
1219 goto no_config;
1220 }
1221 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1222 (u_char *));
1223 if (specific_buf == NULL) {
1224 RF_Free(k_cfg, sizeof(RF_Config_t));
1225 retcode = ENOMEM;
1226 goto no_config;
1227 }
1228 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1229 k_cfg->layoutSpecificSize);
1230 if (retcode) {
1231 RF_Free(k_cfg, sizeof(RF_Config_t));
1232 RF_Free(specific_buf,
1233 k_cfg->layoutSpecificSize);
1234 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1235 retcode));
1236 goto no_config;
1237 }
1238 } else
1239 specific_buf = NULL;
1240 k_cfg->layoutSpecific = specific_buf;
1241
1242 /* should do some kind of sanity check on the configuration.
1243 * Store the sum of all the bytes in the last byte? */
1244
1245 /* configure the system */
1246
1247 /*
1248 * Clear the entire RAID descriptor, just to make sure
1249 * there is no stale data left in the case of a
1250 * reconfiguration
1251 */
1252 memset(raidPtr, 0, sizeof(*raidPtr));
1253 raidPtr->softc = rs;
1254 raidPtr->raidid = unit;
1255
1256 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1257
1258 if (retcode == 0) {
1259
1260 /* allow this many simultaneous IO's to
1261 this RAID device */
1262 raidPtr->openings = RAIDOUTSTANDING;
1263
1264 raidinit(rs);
1265 raid_wakeup(raidPtr);
1266 rf_markalldirty(raidPtr);
1267 }
1268 /* free the buffers. No return code here. */
1269 if (k_cfg->layoutSpecificSize) {
1270 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1271 }
1272 RF_Free(k_cfg, sizeof(RF_Config_t));
1273
1274 no_config:
1275 /*
1276 * If configuration failed, set sc_flags so that we
1277 * will detach the device when we close it.
1278 */
1279 if (retcode != 0)
1280 rs->sc_flags |= RAIDF_SHUTDOWN;
1281 return (retcode);
1282
1283 /* shutdown the system */
1284 case RAIDFRAME_SHUTDOWN:
1285
1286 part = DISKPART(dev);
1287 pmask = (1 << part);
1288
1289 if ((error = raidlock(rs)) != 0)
1290 return (error);
1291
1292 if (DK_BUSY(dksc, pmask) ||
1293 raidPtr->recon_in_progress != 0 ||
1294 raidPtr->parity_rewrite_in_progress != 0 ||
1295 raidPtr->copyback_in_progress != 0)
1296 retcode = EBUSY;
1297 else {
1298 /* detach and free on close */
1299 rs->sc_flags |= RAIDF_SHUTDOWN;
1300 retcode = 0;
1301 }
1302
1303 raidunlock(rs);
1304
1305 return (retcode);
1306 case RAIDFRAME_GET_COMPONENT_LABEL:
1307 return rf_get_component_label(raidPtr, data);
1308
1309 #if 0
1310 case RAIDFRAME_SET_COMPONENT_LABEL:
1311 clabel = (RF_ComponentLabel_t *) data;
1312
1313 /* XXX check the label for valid stuff... */
1314 /* Note that some things *should not* get modified --
1315 the user should be re-initing the labels instead of
1316 trying to patch things.
1317 */
1318
1319 raidid = raidPtr->raidid;
1320 #ifdef DEBUG
1321 printf("raid%d: Got component label:\n", raidid);
1322 printf("raid%d: Version: %d\n", raidid, clabel->version);
1323 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1324 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1325 printf("raid%d: Column: %d\n", raidid, clabel->column);
1326 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1327 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1328 printf("raid%d: Status: %d\n", raidid, clabel->status);
1329 #endif
1330 clabel->row = 0;
1331 column = clabel->column;
1332
1333 if ((column < 0) || (column >= raidPtr->numCol)) {
1334 return(EINVAL);
1335 }
1336
1337 /* XXX this isn't allowed to do anything for now :-) */
1338
1339 /* XXX and before it is, we need to fill in the rest
1340 of the fields!?!?!?! */
1341 memcpy(raidget_component_label(raidPtr, column),
1342 clabel, sizeof(*clabel));
1343 raidflush_component_label(raidPtr, column);
1344 return (0);
1345 #endif
1346
1347 case RAIDFRAME_INIT_LABELS:
1348 clabel = (RF_ComponentLabel_t *) data;
1349 /*
1350 we only want the serial number from
1351 the above. We get all the rest of the information
1352 from the config that was used to create this RAID
1353 set.
1354 */
1355
1356 raidPtr->serial_number = clabel->serial_number;
1357
1358 for(column=0;column<raidPtr->numCol;column++) {
1359 diskPtr = &raidPtr->Disks[column];
1360 if (!RF_DEAD_DISK(diskPtr->status)) {
1361 ci_label = raidget_component_label(raidPtr,
1362 column);
1363 /* Zeroing this is important. */
1364 memset(ci_label, 0, sizeof(*ci_label));
1365 raid_init_component_label(raidPtr, ci_label);
1366 ci_label->serial_number =
1367 raidPtr->serial_number;
1368 ci_label->row = 0; /* we dont' pretend to support more */
1369 rf_component_label_set_partitionsize(ci_label,
1370 diskPtr->partitionSize);
1371 ci_label->column = column;
1372 raidflush_component_label(raidPtr, column);
1373 }
1374 /* XXXjld what about the spares? */
1375 }
1376
1377 return (retcode);
1378 case RAIDFRAME_SET_AUTOCONFIG:
1379 d = rf_set_autoconfig(raidPtr, *(int *) data);
1380 printf("raid%d: New autoconfig value is: %d\n",
1381 raidPtr->raidid, d);
1382 *(int *) data = d;
1383 return (retcode);
1384
1385 case RAIDFRAME_SET_ROOT:
1386 d = rf_set_rootpartition(raidPtr, *(int *) data);
1387 printf("raid%d: New rootpartition value is: %d\n",
1388 raidPtr->raidid, d);
1389 *(int *) data = d;
1390 return (retcode);
1391
1392 /* initialize all parity */
1393 case RAIDFRAME_REWRITEPARITY:
1394
1395 if (raidPtr->Layout.map->faultsTolerated == 0) {
1396 /* Parity for RAID 0 is trivially correct */
1397 raidPtr->parity_good = RF_RAID_CLEAN;
1398 return(0);
1399 }
1400
1401 if (raidPtr->parity_rewrite_in_progress == 1) {
1402 /* Re-write is already in progress! */
1403 return(EINVAL);
1404 }
1405
1406 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1407 rf_RewriteParityThread,
1408 raidPtr,"raid_parity");
1409 return (retcode);
1410
1411
1412 case RAIDFRAME_ADD_HOT_SPARE:
1413 sparePtr = (RF_SingleComponent_t *) data;
1414 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1415 retcode = rf_add_hot_spare(raidPtr, &component);
1416 return(retcode);
1417
1418 case RAIDFRAME_REMOVE_HOT_SPARE:
1419 return(retcode);
1420
1421 case RAIDFRAME_DELETE_COMPONENT:
1422 componentPtr = (RF_SingleComponent_t *)data;
1423 memcpy( &component, componentPtr,
1424 sizeof(RF_SingleComponent_t));
1425 retcode = rf_delete_component(raidPtr, &component);
1426 return(retcode);
1427
1428 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1429 componentPtr = (RF_SingleComponent_t *)data;
1430 memcpy( &component, componentPtr,
1431 sizeof(RF_SingleComponent_t));
1432 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1433 return(retcode);
1434
1435 case RAIDFRAME_REBUILD_IN_PLACE:
1436
1437 if (raidPtr->Layout.map->faultsTolerated == 0) {
1438 /* Can't do this on a RAID 0!! */
1439 return(EINVAL);
1440 }
1441
1442 if (raidPtr->recon_in_progress == 1) {
1443 /* a reconstruct is already in progress! */
1444 return(EINVAL);
1445 }
1446
1447 componentPtr = (RF_SingleComponent_t *) data;
1448 memcpy( &component, componentPtr,
1449 sizeof(RF_SingleComponent_t));
1450 component.row = 0; /* we don't support any more */
1451 column = component.column;
1452
1453 if ((column < 0) || (column >= raidPtr->numCol)) {
1454 return(EINVAL);
1455 }
1456
1457 rf_lock_mutex2(raidPtr->mutex);
1458 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1459 (raidPtr->numFailures > 0)) {
1460 /* XXX 0 above shouldn't be constant!!! */
1461 /* some component other than this has failed.
1462 Let's not make things worse than they already
1463 are... */
1464 printf("raid%d: Unable to reconstruct to disk at:\n",
1465 raidPtr->raidid);
1466 printf("raid%d: Col: %d Too many failures.\n",
1467 raidPtr->raidid, column);
1468 rf_unlock_mutex2(raidPtr->mutex);
1469 return (EINVAL);
1470 }
1471 if (raidPtr->Disks[column].status ==
1472 rf_ds_reconstructing) {
1473 printf("raid%d: Unable to reconstruct to disk at:\n",
1474 raidPtr->raidid);
1475 printf("raid%d: Col: %d Reconstruction already occurring!\n", raidPtr->raidid, column);
1476
1477 rf_unlock_mutex2(raidPtr->mutex);
1478 return (EINVAL);
1479 }
1480 if (raidPtr->Disks[column].status == rf_ds_spared) {
1481 rf_unlock_mutex2(raidPtr->mutex);
1482 return (EINVAL);
1483 }
1484 rf_unlock_mutex2(raidPtr->mutex);
1485
1486 RF_Malloc(rrint, sizeof(*rrint), (struct rf_recon_req_internal *));
1487 if (rrint == NULL)
1488 return(ENOMEM);
1489
1490 rrint->col = column;
1491 rrint->raidPtr = raidPtr;
1492
1493 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1494 rf_ReconstructInPlaceThread,
1495 rrint, "raid_reconip");
1496 return(retcode);
1497
1498 case RAIDFRAME_GET_INFO:
1499 #ifdef COMPAT_NETBSD32
1500 #ifdef _LP64
1501 case RAIDFRAME_GET_INFO32:
1502 #endif
1503 #endif
1504 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1505 (RF_DeviceConfig_t *));
1506 if (d_cfg == NULL)
1507 return (ENOMEM);
1508 retcode = rf_get_info(raidPtr, d_cfg);
1509 if (retcode == 0) {
1510 #ifdef COMPAT_NETBSD32
1511 #ifdef _LP64
1512 if (cmd == RAIDFRAME_GET_INFO32)
1513 ucfgp = NETBSD32PTR64(*(netbsd32_pointer_t *)data);
1514 else
1515 #endif
1516 #endif
1517 ucfgp = *(RF_DeviceConfig_t **)data;
1518 retcode = copyout(d_cfg, ucfgp, sizeof(RF_DeviceConfig_t));
1519 }
1520 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1521
1522 return (retcode);
1523
1524 case RAIDFRAME_CHECK_PARITY:
1525 *(int *) data = raidPtr->parity_good;
1526 return (0);
1527
1528 case RAIDFRAME_PARITYMAP_STATUS:
1529 if (rf_paritymap_ineligible(raidPtr))
1530 return EINVAL;
1531 rf_paritymap_status(raidPtr->parity_map,
1532 (struct rf_pmstat *)data);
1533 return 0;
1534
1535 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1536 if (rf_paritymap_ineligible(raidPtr))
1537 return EINVAL;
1538 if (raidPtr->parity_map == NULL)
1539 return ENOENT; /* ??? */
1540 if (0 != rf_paritymap_set_params(raidPtr->parity_map,
1541 (struct rf_pmparams *)data, 1))
1542 return EINVAL;
1543 return 0;
1544
1545 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1546 if (rf_paritymap_ineligible(raidPtr))
1547 return EINVAL;
1548 *(int *) data = rf_paritymap_get_disable(raidPtr);
1549 return 0;
1550
1551 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1552 if (rf_paritymap_ineligible(raidPtr))
1553 return EINVAL;
1554 rf_paritymap_set_disable(raidPtr, *(int *)data);
1555 /* XXX should errors be passed up? */
1556 return 0;
1557
1558 case RAIDFRAME_RESET_ACCTOTALS:
1559 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1560 return (0);
1561
1562 case RAIDFRAME_GET_ACCTOTALS:
1563 totals = (RF_AccTotals_t *) data;
1564 *totals = raidPtr->acc_totals;
1565 return (0);
1566
1567 case RAIDFRAME_KEEP_ACCTOTALS:
1568 raidPtr->keep_acc_totals = *(int *)data;
1569 return (0);
1570
1571 case RAIDFRAME_GET_SIZE:
1572 *(int *) data = raidPtr->totalSectors;
1573 return (0);
1574
1575 /* fail a disk & optionally start reconstruction */
1576 case RAIDFRAME_FAIL_DISK80:
1577 /* Check if we called compat code for this cmd */
1578 if (retcode != EPASSTHROUGH)
1579 return EINVAL;
1580 /* FALLTHRU */
1581 case RAIDFRAME_FAIL_DISK:
1582 if (raidPtr->Layout.map->faultsTolerated == 0) {
1583 /* Can't do this on a RAID 0!! */
1584 return(EINVAL);
1585 }
1586
1587 rr = (struct rf_recon_req *) data;
1588 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1589 return (EINVAL);
1590
1591 rf_lock_mutex2(raidPtr->mutex);
1592 if (raidPtr->status == rf_rs_reconstructing) {
1593 /* you can't fail a disk while we're reconstructing! */
1594 /* XXX wrong for RAID6 */
1595 rf_unlock_mutex2(raidPtr->mutex);
1596 return (EINVAL);
1597 }
1598 if ((raidPtr->Disks[rr->col].status ==
1599 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1600 /* some other component has failed. Let's not make
1601 things worse. XXX wrong for RAID6 */
1602 rf_unlock_mutex2(raidPtr->mutex);
1603 return (EINVAL);
1604 }
1605 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1606 /* Can't fail a spared disk! */
1607 rf_unlock_mutex2(raidPtr->mutex);
1608 return (EINVAL);
1609 }
1610 rf_unlock_mutex2(raidPtr->mutex);
1611
1612 /* make a copy of the recon request so that we don't rely on
1613 * the user's buffer */
1614 RF_Malloc(rrint, sizeof(*rrint), (struct rf_recon_req_internal *));
1615 if (rrint == NULL)
1616 return(ENOMEM);
1617 rrint->col = rr->col;
1618 rrint->flags = rr->flags;
1619 rrint->raidPtr = raidPtr;
1620
1621 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1622 rf_ReconThread,
1623 rrint, "raid_recon");
1624 return (0);
1625
1626 /* invoke a copyback operation after recon on whatever disk
1627 * needs it, if any */
1628 case RAIDFRAME_COPYBACK:
1629
1630 if (raidPtr->Layout.map->faultsTolerated == 0) {
1631 /* This makes no sense on a RAID 0!! */
1632 return(EINVAL);
1633 }
1634
1635 if (raidPtr->copyback_in_progress == 1) {
1636 /* Copyback is already in progress! */
1637 return(EINVAL);
1638 }
1639
1640 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1641 rf_CopybackThread,
1642 raidPtr,"raid_copyback");
1643 return (retcode);
1644
1645 /* return the percentage completion of reconstruction */
1646 case RAIDFRAME_CHECK_RECON_STATUS:
1647 if (raidPtr->Layout.map->faultsTolerated == 0) {
1648 /* This makes no sense on a RAID 0, so tell the
1649 user it's done. */
1650 *(int *) data = 100;
1651 return(0);
1652 }
1653 if (raidPtr->status != rf_rs_reconstructing)
1654 *(int *) data = 100;
1655 else {
1656 if (raidPtr->reconControl->numRUsTotal > 0) {
1657 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1658 } else {
1659 *(int *) data = 0;
1660 }
1661 }
1662 return (0);
1663 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1664 rf_check_recon_status_ext(raidPtr, data);
1665 return (0);
1666
1667 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1668 if (raidPtr->Layout.map->faultsTolerated == 0) {
1669 /* This makes no sense on a RAID 0, so tell the
1670 user it's done. */
1671 *(int *) data = 100;
1672 return(0);
1673 }
1674 if (raidPtr->parity_rewrite_in_progress == 1) {
1675 *(int *) data = 100 *
1676 raidPtr->parity_rewrite_stripes_done /
1677 raidPtr->Layout.numStripe;
1678 } else {
1679 *(int *) data = 100;
1680 }
1681 return (0);
1682
1683 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1684 rf_check_parityrewrite_status_ext(raidPtr, data);
1685 return (0);
1686
1687 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1688 if (raidPtr->Layout.map->faultsTolerated == 0) {
1689 /* This makes no sense on a RAID 0 */
1690 *(int *) data = 100;
1691 return(0);
1692 }
1693 if (raidPtr->copyback_in_progress == 1) {
1694 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1695 raidPtr->Layout.numStripe;
1696 } else {
1697 *(int *) data = 100;
1698 }
1699 return (0);
1700
1701 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1702 rf_check_copyback_status_ext(raidPtr, data);
1703 return 0;
1704
1705 case RAIDFRAME_SET_LAST_UNIT:
1706 for (column = 0; column < raidPtr->numCol; column++)
1707 if (raidPtr->Disks[column].status != rf_ds_optimal)
1708 return EBUSY;
1709
1710 for (column = 0; column < raidPtr->numCol; column++) {
1711 clabel = raidget_component_label(raidPtr, column);
1712 clabel->last_unit = *(int *)data;
1713 raidflush_component_label(raidPtr, column);
1714 }
1715 rs->sc_cflags |= RAIDF_UNIT_CHANGED;
1716 return 0;
1717
1718 /* the sparetable daemon calls this to wait for the kernel to
1719 * need a spare table. this ioctl does not return until a
1720 * spare table is needed. XXX -- calling mpsleep here in the
1721 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1722 * -- I should either compute the spare table in the kernel,
1723 * or have a different -- XXX XXX -- interface (a different
1724 * character device) for delivering the table -- XXX */
1725 #if 0
1726 case RAIDFRAME_SPARET_WAIT:
1727 rf_lock_mutex2(rf_sparet_wait_mutex);
1728 while (!rf_sparet_wait_queue)
1729 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1730 waitreq = rf_sparet_wait_queue;
1731 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1732 rf_unlock_mutex2(rf_sparet_wait_mutex);
1733
1734 /* structure assignment */
1735 *((RF_SparetWait_t *) data) = *waitreq;
1736
1737 RF_Free(waitreq, sizeof(*waitreq));
1738 return (0);
1739
1740 /* wakes up a process waiting on SPARET_WAIT and puts an error
1741 * code in it that will cause the dameon to exit */
1742 case RAIDFRAME_ABORT_SPARET_WAIT:
1743 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1744 waitreq->fcol = -1;
1745 rf_lock_mutex2(rf_sparet_wait_mutex);
1746 waitreq->next = rf_sparet_wait_queue;
1747 rf_sparet_wait_queue = waitreq;
1748 rf_broadcast_conf2(rf_sparet_wait_cv);
1749 rf_unlock_mutex2(rf_sparet_wait_mutex);
1750 return (0);
1751
1752 /* used by the spare table daemon to deliver a spare table
1753 * into the kernel */
1754 case RAIDFRAME_SEND_SPARET:
1755
1756 /* install the spare table */
1757 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1758
1759 /* respond to the requestor. the return status of the spare
1760 * table installation is passed in the "fcol" field */
1761 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1762 waitreq->fcol = retcode;
1763 rf_lock_mutex2(rf_sparet_wait_mutex);
1764 waitreq->next = rf_sparet_resp_queue;
1765 rf_sparet_resp_queue = waitreq;
1766 rf_broadcast_cond2(rf_sparet_resp_cv);
1767 rf_unlock_mutex2(rf_sparet_wait_mutex);
1768
1769 return (retcode);
1770 #endif
1771
1772 default:
1773 break; /* fall through to the os-specific code below */
1774
1775 }
1776
1777 if (!raidPtr->valid)
1778 return (EINVAL);
1779
1780 /*
1781 * Add support for "regular" device ioctls here.
1782 */
1783
1784 switch (cmd) {
1785 case DIOCGCACHE:
1786 retcode = rf_get_component_caches(raidPtr, (int *)data);
1787 break;
1788
1789 case DIOCCACHESYNC:
1790 retcode = rf_sync_component_caches(raidPtr);
1791 break;
1792
1793 default:
1794 retcode = dk_ioctl(dksc, dev, cmd, data, flag, l);
1795 break;
1796 }
1797
1798 return (retcode);
1799
1800 }
1801
1802
1803 /* raidinit -- complete the rest of the initialization for the
1804 RAIDframe device. */
1805
1806
1807 static void
1808 raidinit(struct raid_softc *rs)
1809 {
1810 cfdata_t cf;
1811 unsigned int unit;
1812 struct dk_softc *dksc = &rs->sc_dksc;
1813 RF_Raid_t *raidPtr = &rs->sc_r;
1814 device_t dev;
1815
1816 unit = raidPtr->raidid;
1817
1818 /* XXX doesn't check bounds. */
1819 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%u", unit);
1820
1821 /* attach the pseudo device */
1822 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1823 cf->cf_name = raid_cd.cd_name;
1824 cf->cf_atname = raid_cd.cd_name;
1825 cf->cf_unit = unit;
1826 cf->cf_fstate = FSTATE_STAR;
1827
1828 dev = config_attach_pseudo(cf);
1829 if (dev == NULL) {
1830 printf("raid%d: config_attach_pseudo failed\n",
1831 raidPtr->raidid);
1832 free(cf, M_RAIDFRAME);
1833 return;
1834 }
1835
1836 /* provide a backpointer to the real softc */
1837 raidsoftc(dev) = rs;
1838
1839 /* disk_attach actually creates space for the CPU disklabel, among
1840 * other things, so it's critical to call this *BEFORE* we try putzing
1841 * with disklabels. */
1842 dk_init(dksc, dev, DKTYPE_RAID);
1843 disk_init(&dksc->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1844
1845 /* XXX There may be a weird interaction here between this, and
1846 * protectedSectors, as used in RAIDframe. */
1847
1848 rs->sc_size = raidPtr->totalSectors;
1849
1850 /* Attach dk and disk subsystems */
1851 dk_attach(dksc);
1852 disk_attach(&dksc->sc_dkdev);
1853 rf_set_geometry(rs, raidPtr);
1854
1855 bufq_alloc(&dksc->sc_bufq, "fcfs", BUFQ_SORT_RAWBLOCK);
1856
1857 /* mark unit as usuable */
1858 rs->sc_flags |= RAIDF_INITED;
1859
1860 dkwedge_discover(&dksc->sc_dkdev);
1861 }
1862
1863 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1864 /* wake up the daemon & tell it to get us a spare table
1865 * XXX
1866 * the entries in the queues should be tagged with the raidPtr
1867 * so that in the extremely rare case that two recons happen at once,
1868 * we know for which device were requesting a spare table
1869 * XXX
1870 *
1871 * XXX This code is not currently used. GO
1872 */
1873 int
1874 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1875 {
1876 int retcode;
1877
1878 rf_lock_mutex2(rf_sparet_wait_mutex);
1879 req->next = rf_sparet_wait_queue;
1880 rf_sparet_wait_queue = req;
1881 rf_broadcast_cond2(rf_sparet_wait_cv);
1882
1883 /* mpsleep unlocks the mutex */
1884 while (!rf_sparet_resp_queue) {
1885 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
1886 }
1887 req = rf_sparet_resp_queue;
1888 rf_sparet_resp_queue = req->next;
1889 rf_unlock_mutex2(rf_sparet_wait_mutex);
1890
1891 retcode = req->fcol;
1892 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1893 * alloc'd */
1894 return (retcode);
1895 }
1896 #endif
1897
1898 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1899 * bp & passes it down.
1900 * any calls originating in the kernel must use non-blocking I/O
1901 * do some extra sanity checking to return "appropriate" error values for
1902 * certain conditions (to make some standard utilities work)
1903 *
1904 * Formerly known as: rf_DoAccessKernel
1905 */
1906 void
1907 raidstart(RF_Raid_t *raidPtr)
1908 {
1909 struct raid_softc *rs;
1910 struct dk_softc *dksc;
1911
1912 rs = raidPtr->softc;
1913 dksc = &rs->sc_dksc;
1914 /* quick check to see if anything has died recently */
1915 rf_lock_mutex2(raidPtr->mutex);
1916 if (raidPtr->numNewFailures > 0) {
1917 rf_unlock_mutex2(raidPtr->mutex);
1918 rf_update_component_labels(raidPtr,
1919 RF_NORMAL_COMPONENT_UPDATE);
1920 rf_lock_mutex2(raidPtr->mutex);
1921 raidPtr->numNewFailures--;
1922 }
1923 rf_unlock_mutex2(raidPtr->mutex);
1924
1925 if ((rs->sc_flags & RAIDF_INITED) == 0) {
1926 printf("raid%d: raidstart not ready\n", raidPtr->raidid);
1927 return;
1928 }
1929
1930 dk_start(dksc, NULL);
1931 }
1932
1933 static int
1934 raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp)
1935 {
1936 RF_SectorCount_t num_blocks, pb, sum;
1937 RF_RaidAddr_t raid_addr;
1938 daddr_t blocknum;
1939 int do_async;
1940 int rc;
1941
1942 rf_lock_mutex2(raidPtr->mutex);
1943 if (raidPtr->openings == 0) {
1944 rf_unlock_mutex2(raidPtr->mutex);
1945 return EAGAIN;
1946 }
1947 rf_unlock_mutex2(raidPtr->mutex);
1948
1949 blocknum = bp->b_rawblkno;
1950
1951 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1952 (int) blocknum));
1953
1954 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1955 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1956
1957 /* *THIS* is where we adjust what block we're going to...
1958 * but DO NOT TOUCH bp->b_blkno!!! */
1959 raid_addr = blocknum;
1960
1961 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1962 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1963 sum = raid_addr + num_blocks + pb;
1964 if (1 || rf_debugKernelAccess) {
1965 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1966 (int) raid_addr, (int) sum, (int) num_blocks,
1967 (int) pb, (int) bp->b_resid));
1968 }
1969 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1970 || (sum < num_blocks) || (sum < pb)) {
1971 rc = ENOSPC;
1972 goto done;
1973 }
1974 /*
1975 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1976 */
1977
1978 if (bp->b_bcount & raidPtr->sectorMask) {
1979 rc = ENOSPC;
1980 goto done;
1981 }
1982 db1_printf(("Calling DoAccess..\n"));
1983
1984
1985 rf_lock_mutex2(raidPtr->mutex);
1986 raidPtr->openings--;
1987 rf_unlock_mutex2(raidPtr->mutex);
1988
1989 /*
1990 * Everything is async.
1991 */
1992 do_async = 1;
1993
1994 /* don't ever condition on bp->b_flags & B_WRITE.
1995 * always condition on B_READ instead */
1996
1997 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1998 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1999 do_async, raid_addr, num_blocks,
2000 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
2001
2002 done:
2003 return rc;
2004 }
2005
2006 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
2007
2008 int
2009 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
2010 {
2011 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2012 struct buf *bp;
2013
2014 req->queue = queue;
2015 bp = req->bp;
2016
2017 switch (req->type) {
2018 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2019 /* XXX need to do something extra here.. */
2020 /* I'm leaving this in, as I've never actually seen it used,
2021 * and I'd like folks to report it... GO */
2022 printf(("WAKEUP CALLED\n"));
2023 queue->numOutstanding++;
2024
2025 bp->b_flags = 0;
2026 bp->b_private = req;
2027
2028 KernelWakeupFunc(bp);
2029 break;
2030
2031 case RF_IO_TYPE_READ:
2032 case RF_IO_TYPE_WRITE:
2033 #if RF_ACC_TRACE > 0
2034 if (req->tracerec) {
2035 RF_ETIMER_START(req->tracerec->timer);
2036 }
2037 #endif
2038 InitBP(bp, queue->rf_cinfo->ci_vp,
2039 op, queue->rf_cinfo->ci_dev,
2040 req->sectorOffset, req->numSector,
2041 req->buf, KernelWakeupFunc, (void *) req,
2042 queue->raidPtr->logBytesPerSector, req->b_proc);
2043
2044 if (rf_debugKernelAccess) {
2045 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2046 (long) bp->b_blkno));
2047 }
2048 queue->numOutstanding++;
2049 queue->last_deq_sector = req->sectorOffset;
2050 /* acc wouldn't have been let in if there were any pending
2051 * reqs at any other priority */
2052 queue->curPriority = req->priority;
2053
2054 db1_printf(("Going for %c to unit %d col %d\n",
2055 req->type, queue->raidPtr->raidid,
2056 queue->col));
2057 db1_printf(("sector %d count %d (%d bytes) %d\n",
2058 (int) req->sectorOffset, (int) req->numSector,
2059 (int) (req->numSector <<
2060 queue->raidPtr->logBytesPerSector),
2061 (int) queue->raidPtr->logBytesPerSector));
2062
2063 /*
2064 * XXX: drop lock here since this can block at
2065 * least with backing SCSI devices. Retake it
2066 * to minimize fuss with calling interfaces.
2067 */
2068
2069 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2070 bdev_strategy(bp);
2071 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2072 break;
2073
2074 default:
2075 panic("bad req->type in rf_DispatchKernelIO");
2076 }
2077 db1_printf(("Exiting from DispatchKernelIO\n"));
2078
2079 return (0);
2080 }
2081 /* this is the callback function associated with a I/O invoked from
2082 kernel code.
2083 */
2084 static void
2085 KernelWakeupFunc(struct buf *bp)
2086 {
2087 RF_DiskQueueData_t *req = NULL;
2088 RF_DiskQueue_t *queue;
2089
2090 db1_printf(("recovering the request queue:\n"));
2091
2092 req = bp->b_private;
2093
2094 queue = (RF_DiskQueue_t *) req->queue;
2095
2096 rf_lock_mutex2(queue->raidPtr->iodone_lock);
2097
2098 #if RF_ACC_TRACE > 0
2099 if (req->tracerec) {
2100 RF_ETIMER_STOP(req->tracerec->timer);
2101 RF_ETIMER_EVAL(req->tracerec->timer);
2102 rf_lock_mutex2(rf_tracing_mutex);
2103 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2104 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2105 req->tracerec->num_phys_ios++;
2106 rf_unlock_mutex2(rf_tracing_mutex);
2107 }
2108 #endif
2109
2110 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2111 * ballistic, and mark the component as hosed... */
2112
2113 if (bp->b_error != 0) {
2114 /* Mark the disk as dead */
2115 /* but only mark it once... */
2116 /* and only if it wouldn't leave this RAID set
2117 completely broken */
2118 if (((queue->raidPtr->Disks[queue->col].status ==
2119 rf_ds_optimal) ||
2120 (queue->raidPtr->Disks[queue->col].status ==
2121 rf_ds_used_spare)) &&
2122 (queue->raidPtr->numFailures <
2123 queue->raidPtr->Layout.map->faultsTolerated)) {
2124 printf("raid%d: IO Error (%d). Marking %s as failed.\n",
2125 queue->raidPtr->raidid,
2126 bp->b_error,
2127 queue->raidPtr->Disks[queue->col].devname);
2128 queue->raidPtr->Disks[queue->col].status =
2129 rf_ds_failed;
2130 queue->raidPtr->status = rf_rs_degraded;
2131 queue->raidPtr->numFailures++;
2132 queue->raidPtr->numNewFailures++;
2133 } else { /* Disk is already dead... */
2134 /* printf("Disk already marked as dead!\n"); */
2135 }
2136
2137 }
2138
2139 /* Fill in the error value */
2140 req->error = bp->b_error;
2141
2142 /* Drop this one on the "finished" queue... */
2143 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2144
2145 /* Let the raidio thread know there is work to be done. */
2146 rf_signal_cond2(queue->raidPtr->iodone_cv);
2147
2148 rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2149 }
2150
2151
2152 /*
2153 * initialize a buf structure for doing an I/O in the kernel.
2154 */
2155 static void
2156 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2157 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2158 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2159 struct proc *b_proc)
2160 {
2161 /* bp->b_flags = B_PHYS | rw_flag; */
2162 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */
2163 bp->b_oflags = 0;
2164 bp->b_cflags = 0;
2165 bp->b_bcount = numSect << logBytesPerSector;
2166 bp->b_bufsize = bp->b_bcount;
2167 bp->b_error = 0;
2168 bp->b_dev = dev;
2169 bp->b_data = bf;
2170 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2171 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2172 if (bp->b_bcount == 0) {
2173 panic("bp->b_bcount is zero in InitBP!!");
2174 }
2175 bp->b_proc = b_proc;
2176 bp->b_iodone = cbFunc;
2177 bp->b_private = cbArg;
2178 }
2179
2180 /*
2181 * Wait interruptibly for an exclusive lock.
2182 *
2183 * XXX
2184 * Several drivers do this; it should be abstracted and made MP-safe.
2185 * (Hmm... where have we seen this warning before :-> GO )
2186 */
2187 static int
2188 raidlock(struct raid_softc *rs)
2189 {
2190 int error;
2191
2192 error = 0;
2193 mutex_enter(&rs->sc_mutex);
2194 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2195 rs->sc_flags |= RAIDF_WANTED;
2196 error = cv_wait_sig(&rs->sc_cv, &rs->sc_mutex);
2197 if (error != 0)
2198 goto done;
2199 }
2200 rs->sc_flags |= RAIDF_LOCKED;
2201 done:
2202 mutex_exit(&rs->sc_mutex);
2203 return (error);
2204 }
2205 /*
2206 * Unlock and wake up any waiters.
2207 */
2208 static void
2209 raidunlock(struct raid_softc *rs)
2210 {
2211
2212 mutex_enter(&rs->sc_mutex);
2213 rs->sc_flags &= ~RAIDF_LOCKED;
2214 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2215 rs->sc_flags &= ~RAIDF_WANTED;
2216 cv_broadcast(&rs->sc_cv);
2217 }
2218 mutex_exit(&rs->sc_mutex);
2219 }
2220
2221
2222 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2223 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2224 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2225
2226 static daddr_t
2227 rf_component_info_offset(void)
2228 {
2229
2230 return RF_COMPONENT_INFO_OFFSET;
2231 }
2232
2233 static daddr_t
2234 rf_component_info_size(unsigned secsize)
2235 {
2236 daddr_t info_size;
2237
2238 KASSERT(secsize);
2239 if (secsize > RF_COMPONENT_INFO_SIZE)
2240 info_size = secsize;
2241 else
2242 info_size = RF_COMPONENT_INFO_SIZE;
2243
2244 return info_size;
2245 }
2246
2247 static daddr_t
2248 rf_parity_map_offset(RF_Raid_t *raidPtr)
2249 {
2250 daddr_t map_offset;
2251
2252 KASSERT(raidPtr->bytesPerSector);
2253 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2254 map_offset = raidPtr->bytesPerSector;
2255 else
2256 map_offset = RF_COMPONENT_INFO_SIZE;
2257 map_offset += rf_component_info_offset();
2258
2259 return map_offset;
2260 }
2261
2262 static daddr_t
2263 rf_parity_map_size(RF_Raid_t *raidPtr)
2264 {
2265 daddr_t map_size;
2266
2267 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2268 map_size = raidPtr->bytesPerSector;
2269 else
2270 map_size = RF_PARITY_MAP_SIZE;
2271
2272 return map_size;
2273 }
2274
2275 int
2276 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2277 {
2278 RF_ComponentLabel_t *clabel;
2279
2280 clabel = raidget_component_label(raidPtr, col);
2281 clabel->clean = RF_RAID_CLEAN;
2282 raidflush_component_label(raidPtr, col);
2283 return(0);
2284 }
2285
2286
2287 int
2288 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2289 {
2290 RF_ComponentLabel_t *clabel;
2291
2292 clabel = raidget_component_label(raidPtr, col);
2293 clabel->clean = RF_RAID_DIRTY;
2294 raidflush_component_label(raidPtr, col);
2295 return(0);
2296 }
2297
2298 int
2299 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2300 {
2301 KASSERT(raidPtr->bytesPerSector);
2302 return raidread_component_label(raidPtr->bytesPerSector,
2303 raidPtr->Disks[col].dev,
2304 raidPtr->raid_cinfo[col].ci_vp,
2305 &raidPtr->raid_cinfo[col].ci_label);
2306 }
2307
2308 RF_ComponentLabel_t *
2309 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2310 {
2311 return &raidPtr->raid_cinfo[col].ci_label;
2312 }
2313
2314 int
2315 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2316 {
2317 RF_ComponentLabel_t *label;
2318
2319 label = &raidPtr->raid_cinfo[col].ci_label;
2320 label->mod_counter = raidPtr->mod_counter;
2321 #ifndef RF_NO_PARITY_MAP
2322 label->parity_map_modcount = label->mod_counter;
2323 #endif
2324 return raidwrite_component_label(raidPtr->bytesPerSector,
2325 raidPtr->Disks[col].dev,
2326 raidPtr->raid_cinfo[col].ci_vp, label);
2327 }
2328
2329
2330 static int
2331 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2332 RF_ComponentLabel_t *clabel)
2333 {
2334 return raidread_component_area(dev, b_vp, clabel,
2335 sizeof(RF_ComponentLabel_t),
2336 rf_component_info_offset(),
2337 rf_component_info_size(secsize));
2338 }
2339
2340 /* ARGSUSED */
2341 static int
2342 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2343 size_t msize, daddr_t offset, daddr_t dsize)
2344 {
2345 struct buf *bp;
2346 int error;
2347
2348 /* XXX should probably ensure that we don't try to do this if
2349 someone has changed rf_protected_sectors. */
2350
2351 if (b_vp == NULL) {
2352 /* For whatever reason, this component is not valid.
2353 Don't try to read a component label from it. */
2354 return(EINVAL);
2355 }
2356
2357 /* get a block of the appropriate size... */
2358 bp = geteblk((int)dsize);
2359 bp->b_dev = dev;
2360
2361 /* get our ducks in a row for the read */
2362 bp->b_blkno = offset / DEV_BSIZE;
2363 bp->b_bcount = dsize;
2364 bp->b_flags |= B_READ;
2365 bp->b_resid = dsize;
2366
2367 bdev_strategy(bp);
2368 error = biowait(bp);
2369
2370 if (!error) {
2371 memcpy(data, bp->b_data, msize);
2372 }
2373
2374 brelse(bp, 0);
2375 return(error);
2376 }
2377
2378
2379 static int
2380 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2381 RF_ComponentLabel_t *clabel)
2382 {
2383 return raidwrite_component_area(dev, b_vp, clabel,
2384 sizeof(RF_ComponentLabel_t),
2385 rf_component_info_offset(),
2386 rf_component_info_size(secsize), 0);
2387 }
2388
2389 /* ARGSUSED */
2390 static int
2391 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2392 size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2393 {
2394 struct buf *bp;
2395 int error;
2396
2397 /* get a block of the appropriate size... */
2398 bp = geteblk((int)dsize);
2399 bp->b_dev = dev;
2400
2401 /* get our ducks in a row for the write */
2402 bp->b_blkno = offset / DEV_BSIZE;
2403 bp->b_bcount = dsize;
2404 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2405 bp->b_resid = dsize;
2406
2407 memset(bp->b_data, 0, dsize);
2408 memcpy(bp->b_data, data, msize);
2409
2410 bdev_strategy(bp);
2411 if (asyncp)
2412 return 0;
2413 error = biowait(bp);
2414 brelse(bp, 0);
2415 if (error) {
2416 #if 1
2417 printf("Failed to write RAID component info!\n");
2418 #endif
2419 }
2420
2421 return(error);
2422 }
2423
2424 void
2425 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2426 {
2427 int c;
2428
2429 for (c = 0; c < raidPtr->numCol; c++) {
2430 /* Skip dead disks. */
2431 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2432 continue;
2433 /* XXXjld: what if an error occurs here? */
2434 raidwrite_component_area(raidPtr->Disks[c].dev,
2435 raidPtr->raid_cinfo[c].ci_vp, map,
2436 RF_PARITYMAP_NBYTE,
2437 rf_parity_map_offset(raidPtr),
2438 rf_parity_map_size(raidPtr), 0);
2439 }
2440 }
2441
2442 void
2443 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2444 {
2445 struct rf_paritymap_ondisk tmp;
2446 int c,first;
2447
2448 first=1;
2449 for (c = 0; c < raidPtr->numCol; c++) {
2450 /* Skip dead disks. */
2451 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2452 continue;
2453 raidread_component_area(raidPtr->Disks[c].dev,
2454 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2455 RF_PARITYMAP_NBYTE,
2456 rf_parity_map_offset(raidPtr),
2457 rf_parity_map_size(raidPtr));
2458 if (first) {
2459 memcpy(map, &tmp, sizeof(*map));
2460 first = 0;
2461 } else {
2462 rf_paritymap_merge(map, &tmp);
2463 }
2464 }
2465 }
2466
2467 void
2468 rf_markalldirty(RF_Raid_t *raidPtr)
2469 {
2470 RF_ComponentLabel_t *clabel;
2471 int sparecol;
2472 int c;
2473 int j;
2474 int scol = -1;
2475
2476 raidPtr->mod_counter++;
2477 for (c = 0; c < raidPtr->numCol; c++) {
2478 /* we don't want to touch (at all) a disk that has
2479 failed */
2480 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2481 clabel = raidget_component_label(raidPtr, c);
2482 if (clabel->status == rf_ds_spared) {
2483 /* XXX do something special...
2484 but whatever you do, don't
2485 try to access it!! */
2486 } else {
2487 raidmarkdirty(raidPtr, c);
2488 }
2489 }
2490 }
2491
2492 for( c = 0; c < raidPtr->numSpare ; c++) {
2493 sparecol = raidPtr->numCol + c;
2494 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2495 /*
2496
2497 we claim this disk is "optimal" if it's
2498 rf_ds_used_spare, as that means it should be
2499 directly substitutable for the disk it replaced.
2500 We note that too...
2501
2502 */
2503
2504 for(j=0;j<raidPtr->numCol;j++) {
2505 if (raidPtr->Disks[j].spareCol == sparecol) {
2506 scol = j;
2507 break;
2508 }
2509 }
2510
2511 clabel = raidget_component_label(raidPtr, sparecol);
2512 /* make sure status is noted */
2513
2514 raid_init_component_label(raidPtr, clabel);
2515
2516 clabel->row = 0;
2517 clabel->column = scol;
2518 /* Note: we *don't* change status from rf_ds_used_spare
2519 to rf_ds_optimal */
2520 /* clabel.status = rf_ds_optimal; */
2521
2522 raidmarkdirty(raidPtr, sparecol);
2523 }
2524 }
2525 }
2526
2527
2528 void
2529 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2530 {
2531 RF_ComponentLabel_t *clabel;
2532 int sparecol;
2533 int c;
2534 int j;
2535 int scol;
2536 struct raid_softc *rs = raidPtr->softc;
2537
2538 scol = -1;
2539
2540 /* XXX should do extra checks to make sure things really are clean,
2541 rather than blindly setting the clean bit... */
2542
2543 raidPtr->mod_counter++;
2544
2545 for (c = 0; c < raidPtr->numCol; c++) {
2546 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2547 clabel = raidget_component_label(raidPtr, c);
2548 /* make sure status is noted */
2549 clabel->status = rf_ds_optimal;
2550
2551 /* note what unit we are configured as */
2552 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2553 clabel->last_unit = raidPtr->raidid;
2554
2555 raidflush_component_label(raidPtr, c);
2556 if (final == RF_FINAL_COMPONENT_UPDATE) {
2557 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2558 raidmarkclean(raidPtr, c);
2559 }
2560 }
2561 }
2562 /* else we don't touch it.. */
2563 }
2564
2565 for( c = 0; c < raidPtr->numSpare ; c++) {
2566 sparecol = raidPtr->numCol + c;
2567 /* Need to ensure that the reconstruct actually completed! */
2568 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2569 /*
2570
2571 we claim this disk is "optimal" if it's
2572 rf_ds_used_spare, as that means it should be
2573 directly substitutable for the disk it replaced.
2574 We note that too...
2575
2576 */
2577
2578 for(j=0;j<raidPtr->numCol;j++) {
2579 if (raidPtr->Disks[j].spareCol == sparecol) {
2580 scol = j;
2581 break;
2582 }
2583 }
2584
2585 /* XXX shouldn't *really* need this... */
2586 clabel = raidget_component_label(raidPtr, sparecol);
2587 /* make sure status is noted */
2588
2589 raid_init_component_label(raidPtr, clabel);
2590
2591 clabel->column = scol;
2592 clabel->status = rf_ds_optimal;
2593 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2594 clabel->last_unit = raidPtr->raidid;
2595
2596 raidflush_component_label(raidPtr, sparecol);
2597 if (final == RF_FINAL_COMPONENT_UPDATE) {
2598 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2599 raidmarkclean(raidPtr, sparecol);
2600 }
2601 }
2602 }
2603 }
2604 }
2605
2606 void
2607 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2608 {
2609
2610 if (vp != NULL) {
2611 if (auto_configured == 1) {
2612 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2613 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2614 vput(vp);
2615
2616 } else {
2617 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2618 }
2619 }
2620 }
2621
2622
2623 void
2624 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2625 {
2626 int r,c;
2627 struct vnode *vp;
2628 int acd;
2629
2630
2631 /* We take this opportunity to close the vnodes like we should.. */
2632
2633 for (c = 0; c < raidPtr->numCol; c++) {
2634 vp = raidPtr->raid_cinfo[c].ci_vp;
2635 acd = raidPtr->Disks[c].auto_configured;
2636 rf_close_component(raidPtr, vp, acd);
2637 raidPtr->raid_cinfo[c].ci_vp = NULL;
2638 raidPtr->Disks[c].auto_configured = 0;
2639 }
2640
2641 for (r = 0; r < raidPtr->numSpare; r++) {
2642 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2643 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2644 rf_close_component(raidPtr, vp, acd);
2645 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2646 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2647 }
2648 }
2649
2650
2651 void
2652 rf_ReconThread(struct rf_recon_req_internal *req)
2653 {
2654 int s;
2655 RF_Raid_t *raidPtr;
2656
2657 s = splbio();
2658 raidPtr = (RF_Raid_t *) req->raidPtr;
2659 raidPtr->recon_in_progress = 1;
2660
2661 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2662 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2663
2664 RF_Free(req, sizeof(*req));
2665
2666 raidPtr->recon_in_progress = 0;
2667 splx(s);
2668
2669 /* That's all... */
2670 kthread_exit(0); /* does not return */
2671 }
2672
2673 void
2674 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2675 {
2676 int retcode;
2677 int s;
2678
2679 raidPtr->parity_rewrite_stripes_done = 0;
2680 raidPtr->parity_rewrite_in_progress = 1;
2681 s = splbio();
2682 retcode = rf_RewriteParity(raidPtr);
2683 splx(s);
2684 if (retcode) {
2685 printf("raid%d: Error re-writing parity (%d)!\n",
2686 raidPtr->raidid, retcode);
2687 } else {
2688 /* set the clean bit! If we shutdown correctly,
2689 the clean bit on each component label will get
2690 set */
2691 raidPtr->parity_good = RF_RAID_CLEAN;
2692 }
2693 raidPtr->parity_rewrite_in_progress = 0;
2694
2695 /* Anyone waiting for us to stop? If so, inform them... */
2696 if (raidPtr->waitShutdown) {
2697 rf_lock_mutex2(raidPtr->rad_lock);
2698 cv_broadcast(&raidPtr->parity_rewrite_cv);
2699 rf_unlock_mutex2(raidPtr->rad_lock);
2700 }
2701
2702 /* That's all... */
2703 kthread_exit(0); /* does not return */
2704 }
2705
2706
2707 void
2708 rf_CopybackThread(RF_Raid_t *raidPtr)
2709 {
2710 int s;
2711
2712 raidPtr->copyback_in_progress = 1;
2713 s = splbio();
2714 rf_CopybackReconstructedData(raidPtr);
2715 splx(s);
2716 raidPtr->copyback_in_progress = 0;
2717
2718 /* That's all... */
2719 kthread_exit(0); /* does not return */
2720 }
2721
2722
2723 void
2724 rf_ReconstructInPlaceThread(struct rf_recon_req_internal *req)
2725 {
2726 int s;
2727 RF_Raid_t *raidPtr;
2728
2729 s = splbio();
2730 raidPtr = req->raidPtr;
2731 raidPtr->recon_in_progress = 1;
2732 rf_ReconstructInPlace(raidPtr, req->col);
2733 RF_Free(req, sizeof(*req));
2734 raidPtr->recon_in_progress = 0;
2735 splx(s);
2736
2737 /* That's all... */
2738 kthread_exit(0); /* does not return */
2739 }
2740
2741 static RF_AutoConfig_t *
2742 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
2743 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
2744 unsigned secsize)
2745 {
2746 int good_one = 0;
2747 RF_ComponentLabel_t *clabel;
2748 RF_AutoConfig_t *ac;
2749
2750 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
2751 if (clabel == NULL) {
2752 oomem:
2753 while(ac_list) {
2754 ac = ac_list;
2755 if (ac->clabel)
2756 free(ac->clabel, M_RAIDFRAME);
2757 ac_list = ac_list->next;
2758 free(ac, M_RAIDFRAME);
2759 }
2760 printf("RAID auto config: out of memory!\n");
2761 return NULL; /* XXX probably should panic? */
2762 }
2763
2764 if (!raidread_component_label(secsize, dev, vp, clabel)) {
2765 /* Got the label. Does it look reasonable? */
2766 if (rf_reasonable_label(clabel, numsecs) &&
2767 (rf_component_label_partitionsize(clabel) <= size)) {
2768 #ifdef DEBUG
2769 printf("Component on: %s: %llu\n",
2770 cname, (unsigned long long)size);
2771 rf_print_component_label(clabel);
2772 #endif
2773 /* if it's reasonable, add it, else ignore it. */
2774 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
2775 M_NOWAIT);
2776 if (ac == NULL) {
2777 free(clabel, M_RAIDFRAME);
2778 goto oomem;
2779 }
2780 strlcpy(ac->devname, cname, sizeof(ac->devname));
2781 ac->dev = dev;
2782 ac->vp = vp;
2783 ac->clabel = clabel;
2784 ac->next = ac_list;
2785 ac_list = ac;
2786 good_one = 1;
2787 }
2788 }
2789 if (!good_one) {
2790 /* cleanup */
2791 free(clabel, M_RAIDFRAME);
2792 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2793 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2794 vput(vp);
2795 }
2796 return ac_list;
2797 }
2798
2799 RF_AutoConfig_t *
2800 rf_find_raid_components(void)
2801 {
2802 struct vnode *vp;
2803 struct disklabel label;
2804 device_t dv;
2805 deviter_t di;
2806 dev_t dev;
2807 int bmajor, bminor, wedge, rf_part_found;
2808 int error;
2809 int i;
2810 RF_AutoConfig_t *ac_list;
2811 uint64_t numsecs;
2812 unsigned secsize;
2813 int dowedges;
2814
2815 /* initialize the AutoConfig list */
2816 ac_list = NULL;
2817
2818 /*
2819 * we begin by trolling through *all* the devices on the system *twice*
2820 * first we scan for wedges, second for other devices. This avoids
2821 * using a raw partition instead of a wedge that covers the whole disk
2822 */
2823
2824 for (dowedges=1; dowedges>=0; --dowedges) {
2825 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
2826 dv = deviter_next(&di)) {
2827
2828 /* we are only interested in disks... */
2829 if (device_class(dv) != DV_DISK)
2830 continue;
2831
2832 /* we don't care about floppies... */
2833 if (device_is_a(dv, "fd")) {
2834 continue;
2835 }
2836
2837 /* we don't care about CD's... */
2838 if (device_is_a(dv, "cd")) {
2839 continue;
2840 }
2841
2842 /* we don't care about md's... */
2843 if (device_is_a(dv, "md")) {
2844 continue;
2845 }
2846
2847 /* hdfd is the Atari/Hades floppy driver */
2848 if (device_is_a(dv, "hdfd")) {
2849 continue;
2850 }
2851
2852 /* fdisa is the Atari/Milan floppy driver */
2853 if (device_is_a(dv, "fdisa")) {
2854 continue;
2855 }
2856
2857 /* are we in the wedges pass ? */
2858 wedge = device_is_a(dv, "dk");
2859 if (wedge != dowedges) {
2860 continue;
2861 }
2862
2863 /* need to find the device_name_to_block_device_major stuff */
2864 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
2865
2866 rf_part_found = 0; /*No raid partition as yet*/
2867
2868 /* get a vnode for the raw partition of this disk */
2869 bminor = minor(device_unit(dv));
2870 dev = wedge ? makedev(bmajor, bminor) :
2871 MAKEDISKDEV(bmajor, bminor, RAW_PART);
2872 if (bdevvp(dev, &vp))
2873 panic("RAID can't alloc vnode");
2874
2875 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
2876
2877 if (error) {
2878 /* "Who cares." Continue looking
2879 for something that exists*/
2880 vput(vp);
2881 continue;
2882 }
2883
2884 error = getdisksize(vp, &numsecs, &secsize);
2885 if (error) {
2886 /*
2887 * Pseudo devices like vnd and cgd can be
2888 * opened but may still need some configuration.
2889 * Ignore these quietly.
2890 */
2891 if (error != ENXIO)
2892 printf("RAIDframe: can't get disk size"
2893 " for dev %s (%d)\n",
2894 device_xname(dv), error);
2895 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2896 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2897 vput(vp);
2898 continue;
2899 }
2900 if (wedge) {
2901 struct dkwedge_info dkw;
2902 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
2903 NOCRED);
2904 if (error) {
2905 printf("RAIDframe: can't get wedge info for "
2906 "dev %s (%d)\n", device_xname(dv), error);
2907 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2908 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2909 vput(vp);
2910 continue;
2911 }
2912
2913 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
2914 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2915 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2916 vput(vp);
2917 continue;
2918 }
2919
2920 ac_list = rf_get_component(ac_list, dev, vp,
2921 device_xname(dv), dkw.dkw_size, numsecs, secsize);
2922 rf_part_found = 1; /*There is a raid component on this disk*/
2923 continue;
2924 }
2925
2926 /* Ok, the disk exists. Go get the disklabel. */
2927 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
2928 if (error) {
2929 /*
2930 * XXX can't happen - open() would
2931 * have errored out (or faked up one)
2932 */
2933 if (error != ENOTTY)
2934 printf("RAIDframe: can't get label for dev "
2935 "%s (%d)\n", device_xname(dv), error);
2936 }
2937
2938 /* don't need this any more. We'll allocate it again
2939 a little later if we really do... */
2940 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2941 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2942 vput(vp);
2943
2944 if (error)
2945 continue;
2946
2947 rf_part_found = 0; /*No raid partitions yet*/
2948 for (i = 0; i < label.d_npartitions; i++) {
2949 char cname[sizeof(ac_list->devname)];
2950
2951 /* We only support partitions marked as RAID */
2952 if (label.d_partitions[i].p_fstype != FS_RAID)
2953 continue;
2954
2955 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
2956 if (bdevvp(dev, &vp))
2957 panic("RAID can't alloc vnode");
2958
2959 error = VOP_OPEN(vp, FREAD, NOCRED);
2960 if (error) {
2961 /* Whatever... */
2962 vput(vp);
2963 continue;
2964 }
2965 snprintf(cname, sizeof(cname), "%s%c",
2966 device_xname(dv), 'a' + i);
2967 ac_list = rf_get_component(ac_list, dev, vp, cname,
2968 label.d_partitions[i].p_size, numsecs, secsize);
2969 rf_part_found = 1; /*There is at least one raid partition on this disk*/
2970 }
2971
2972 /*
2973 *If there is no raid component on this disk, either in a
2974 *disklabel or inside a wedge, check the raw partition as well,
2975 *as it is possible to configure raid components on raw disk
2976 *devices.
2977 */
2978
2979 if (!rf_part_found) {
2980 char cname[sizeof(ac_list->devname)];
2981
2982 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
2983 if (bdevvp(dev, &vp))
2984 panic("RAID can't alloc vnode");
2985
2986 error = VOP_OPEN(vp, FREAD, NOCRED);
2987 if (error) {
2988 /* Whatever... */
2989 vput(vp);
2990 continue;
2991 }
2992 snprintf(cname, sizeof(cname), "%s%c",
2993 device_xname(dv), 'a' + RAW_PART);
2994 ac_list = rf_get_component(ac_list, dev, vp, cname,
2995 label.d_partitions[RAW_PART].p_size, numsecs, secsize);
2996 }
2997 }
2998 deviter_release(&di);
2999 }
3000 return ac_list;
3001 }
3002
3003
3004 int
3005 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3006 {
3007
3008 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
3009 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
3010 ((clabel->clean == RF_RAID_CLEAN) ||
3011 (clabel->clean == RF_RAID_DIRTY)) &&
3012 clabel->row >=0 &&
3013 clabel->column >= 0 &&
3014 clabel->num_rows > 0 &&
3015 clabel->num_columns > 0 &&
3016 clabel->row < clabel->num_rows &&
3017 clabel->column < clabel->num_columns &&
3018 clabel->blockSize > 0 &&
3019 /*
3020 * numBlocksHi may contain garbage, but it is ok since
3021 * the type is unsigned. If it is really garbage,
3022 * rf_fix_old_label_size() will fix it.
3023 */
3024 rf_component_label_numblocks(clabel) > 0) {
3025 /*
3026 * label looks reasonable enough...
3027 * let's make sure it has no old garbage.
3028 */
3029 if (numsecs)
3030 rf_fix_old_label_size(clabel, numsecs);
3031 return(1);
3032 }
3033 return(0);
3034 }
3035
3036
3037 /*
3038 * For reasons yet unknown, some old component labels have garbage in
3039 * the newer numBlocksHi region, and this causes lossage. Since those
3040 * disks will also have numsecs set to less than 32 bits of sectors,
3041 * we can determine when this corruption has occurred, and fix it.
3042 *
3043 * The exact same problem, with the same unknown reason, happens to
3044 * the partitionSizeHi member as well.
3045 */
3046 static void
3047 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3048 {
3049
3050 if (numsecs < ((uint64_t)1 << 32)) {
3051 if (clabel->numBlocksHi) {
3052 printf("WARNING: total sectors < 32 bits, yet "
3053 "numBlocksHi set\n"
3054 "WARNING: resetting numBlocksHi to zero.\n");
3055 clabel->numBlocksHi = 0;
3056 }
3057
3058 if (clabel->partitionSizeHi) {
3059 printf("WARNING: total sectors < 32 bits, yet "
3060 "partitionSizeHi set\n"
3061 "WARNING: resetting partitionSizeHi to zero.\n");
3062 clabel->partitionSizeHi = 0;
3063 }
3064 }
3065 }
3066
3067
3068 #ifdef DEBUG
3069 void
3070 rf_print_component_label(RF_ComponentLabel_t *clabel)
3071 {
3072 uint64_t numBlocks;
3073 static const char *rp[] = {
3074 "No", "Force", "Soft", "*invalid*"
3075 };
3076
3077
3078 numBlocks = rf_component_label_numblocks(clabel);
3079
3080 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3081 clabel->row, clabel->column,
3082 clabel->num_rows, clabel->num_columns);
3083 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3084 clabel->version, clabel->serial_number,
3085 clabel->mod_counter);
3086 printf(" Clean: %s Status: %d\n",
3087 clabel->clean ? "Yes" : "No", clabel->status);
3088 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3089 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3090 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3091 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3092 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3093 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]);
3094 printf(" Last configured as: raid%d\n", clabel->last_unit);
3095 #if 0
3096 printf(" Config order: %d\n", clabel->config_order);
3097 #endif
3098
3099 }
3100 #endif
3101
3102 RF_ConfigSet_t *
3103 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3104 {
3105 RF_AutoConfig_t *ac;
3106 RF_ConfigSet_t *config_sets;
3107 RF_ConfigSet_t *cset;
3108 RF_AutoConfig_t *ac_next;
3109
3110
3111 config_sets = NULL;
3112
3113 /* Go through the AutoConfig list, and figure out which components
3114 belong to what sets. */
3115 ac = ac_list;
3116 while(ac!=NULL) {
3117 /* we're going to putz with ac->next, so save it here
3118 for use at the end of the loop */
3119 ac_next = ac->next;
3120
3121 if (config_sets == NULL) {
3122 /* will need at least this one... */
3123 config_sets = (RF_ConfigSet_t *)
3124 malloc(sizeof(RF_ConfigSet_t),
3125 M_RAIDFRAME, M_NOWAIT);
3126 if (config_sets == NULL) {
3127 panic("rf_create_auto_sets: No memory!");
3128 }
3129 /* this one is easy :) */
3130 config_sets->ac = ac;
3131 config_sets->next = NULL;
3132 config_sets->rootable = 0;
3133 ac->next = NULL;
3134 } else {
3135 /* which set does this component fit into? */
3136 cset = config_sets;
3137 while(cset!=NULL) {
3138 if (rf_does_it_fit(cset, ac)) {
3139 /* looks like it matches... */
3140 ac->next = cset->ac;
3141 cset->ac = ac;
3142 break;
3143 }
3144 cset = cset->next;
3145 }
3146 if (cset==NULL) {
3147 /* didn't find a match above... new set..*/
3148 cset = (RF_ConfigSet_t *)
3149 malloc(sizeof(RF_ConfigSet_t),
3150 M_RAIDFRAME, M_NOWAIT);
3151 if (cset == NULL) {
3152 panic("rf_create_auto_sets: No memory!");
3153 }
3154 cset->ac = ac;
3155 ac->next = NULL;
3156 cset->next = config_sets;
3157 cset->rootable = 0;
3158 config_sets = cset;
3159 }
3160 }
3161 ac = ac_next;
3162 }
3163
3164
3165 return(config_sets);
3166 }
3167
3168 static int
3169 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3170 {
3171 RF_ComponentLabel_t *clabel1, *clabel2;
3172
3173 /* If this one matches the *first* one in the set, that's good
3174 enough, since the other members of the set would have been
3175 through here too... */
3176 /* note that we are not checking partitionSize here..
3177
3178 Note that we are also not checking the mod_counters here.
3179 If everything else matches except the mod_counter, that's
3180 good enough for this test. We will deal with the mod_counters
3181 a little later in the autoconfiguration process.
3182
3183 (clabel1->mod_counter == clabel2->mod_counter) &&
3184
3185 The reason we don't check for this is that failed disks
3186 will have lower modification counts. If those disks are
3187 not added to the set they used to belong to, then they will
3188 form their own set, which may result in 2 different sets,
3189 for example, competing to be configured at raid0, and
3190 perhaps competing to be the root filesystem set. If the
3191 wrong ones get configured, or both attempt to become /,
3192 weird behaviour and or serious lossage will occur. Thus we
3193 need to bring them into the fold here, and kick them out at
3194 a later point.
3195
3196 */
3197
3198 clabel1 = cset->ac->clabel;
3199 clabel2 = ac->clabel;
3200 if ((clabel1->version == clabel2->version) &&
3201 (clabel1->serial_number == clabel2->serial_number) &&
3202 (clabel1->num_rows == clabel2->num_rows) &&
3203 (clabel1->num_columns == clabel2->num_columns) &&
3204 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3205 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3206 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3207 (clabel1->parityConfig == clabel2->parityConfig) &&
3208 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3209 (clabel1->blockSize == clabel2->blockSize) &&
3210 rf_component_label_numblocks(clabel1) ==
3211 rf_component_label_numblocks(clabel2) &&
3212 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3213 (clabel1->root_partition == clabel2->root_partition) &&
3214 (clabel1->last_unit == clabel2->last_unit) &&
3215 (clabel1->config_order == clabel2->config_order)) {
3216 /* if it get's here, it almost *has* to be a match */
3217 } else {
3218 /* it's not consistent with somebody in the set..
3219 punt */
3220 return(0);
3221 }
3222 /* all was fine.. it must fit... */
3223 return(1);
3224 }
3225
3226 int
3227 rf_have_enough_components(RF_ConfigSet_t *cset)
3228 {
3229 RF_AutoConfig_t *ac;
3230 RF_AutoConfig_t *auto_config;
3231 RF_ComponentLabel_t *clabel;
3232 int c;
3233 int num_cols;
3234 int num_missing;
3235 int mod_counter;
3236 int mod_counter_found;
3237 int even_pair_failed;
3238 char parity_type;
3239
3240
3241 /* check to see that we have enough 'live' components
3242 of this set. If so, we can configure it if necessary */
3243
3244 num_cols = cset->ac->clabel->num_columns;
3245 parity_type = cset->ac->clabel->parityConfig;
3246
3247 /* XXX Check for duplicate components!?!?!? */
3248
3249 /* Determine what the mod_counter is supposed to be for this set. */
3250
3251 mod_counter_found = 0;
3252 mod_counter = 0;
3253 ac = cset->ac;
3254 while(ac!=NULL) {
3255 if (mod_counter_found==0) {
3256 mod_counter = ac->clabel->mod_counter;
3257 mod_counter_found = 1;
3258 } else {
3259 if (ac->clabel->mod_counter > mod_counter) {
3260 mod_counter = ac->clabel->mod_counter;
3261 }
3262 }
3263 ac = ac->next;
3264 }
3265
3266 num_missing = 0;
3267 auto_config = cset->ac;
3268
3269 even_pair_failed = 0;
3270 for(c=0; c<num_cols; c++) {
3271 ac = auto_config;
3272 while(ac!=NULL) {
3273 if ((ac->clabel->column == c) &&
3274 (ac->clabel->mod_counter == mod_counter)) {
3275 /* it's this one... */
3276 #ifdef DEBUG
3277 printf("Found: %s at %d\n",
3278 ac->devname,c);
3279 #endif
3280 break;
3281 }
3282 ac=ac->next;
3283 }
3284 if (ac==NULL) {
3285 /* Didn't find one here! */
3286 /* special case for RAID 1, especially
3287 where there are more than 2
3288 components (where RAIDframe treats
3289 things a little differently :( ) */
3290 if (parity_type == '1') {
3291 if (c%2 == 0) { /* even component */
3292 even_pair_failed = 1;
3293 } else { /* odd component. If
3294 we're failed, and
3295 so is the even
3296 component, it's
3297 "Good Night, Charlie" */
3298 if (even_pair_failed == 1) {
3299 return(0);
3300 }
3301 }
3302 } else {
3303 /* normal accounting */
3304 num_missing++;
3305 }
3306 }
3307 if ((parity_type == '1') && (c%2 == 1)) {
3308 /* Just did an even component, and we didn't
3309 bail.. reset the even_pair_failed flag,
3310 and go on to the next component.... */
3311 even_pair_failed = 0;
3312 }
3313 }
3314
3315 clabel = cset->ac->clabel;
3316
3317 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3318 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3319 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3320 /* XXX this needs to be made *much* more general */
3321 /* Too many failures */
3322 return(0);
3323 }
3324 /* otherwise, all is well, and we've got enough to take a kick
3325 at autoconfiguring this set */
3326 return(1);
3327 }
3328
3329 void
3330 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3331 RF_Raid_t *raidPtr)
3332 {
3333 RF_ComponentLabel_t *clabel;
3334 int i;
3335
3336 clabel = ac->clabel;
3337
3338 /* 1. Fill in the common stuff */
3339 config->numCol = clabel->num_columns;
3340 config->numSpare = 0; /* XXX should this be set here? */
3341 config->sectPerSU = clabel->sectPerSU;
3342 config->SUsPerPU = clabel->SUsPerPU;
3343 config->SUsPerRU = clabel->SUsPerRU;
3344 config->parityConfig = clabel->parityConfig;
3345 /* XXX... */
3346 strcpy(config->diskQueueType,"fifo");
3347 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3348 config->layoutSpecificSize = 0; /* XXX ?? */
3349
3350 while(ac!=NULL) {
3351 /* row/col values will be in range due to the checks
3352 in reasonable_label() */
3353 strcpy(config->devnames[0][ac->clabel->column],
3354 ac->devname);
3355 ac = ac->next;
3356 }
3357
3358 for(i=0;i<RF_MAXDBGV;i++) {
3359 config->debugVars[i][0] = 0;
3360 }
3361 }
3362
3363 int
3364 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3365 {
3366 RF_ComponentLabel_t *clabel;
3367 int column;
3368 int sparecol;
3369
3370 raidPtr->autoconfigure = new_value;
3371
3372 for(column=0; column<raidPtr->numCol; column++) {
3373 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3374 clabel = raidget_component_label(raidPtr, column);
3375 clabel->autoconfigure = new_value;
3376 raidflush_component_label(raidPtr, column);
3377 }
3378 }
3379 for(column = 0; column < raidPtr->numSpare ; column++) {
3380 sparecol = raidPtr->numCol + column;
3381 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3382 clabel = raidget_component_label(raidPtr, sparecol);
3383 clabel->autoconfigure = new_value;
3384 raidflush_component_label(raidPtr, sparecol);
3385 }
3386 }
3387 return(new_value);
3388 }
3389
3390 int
3391 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3392 {
3393 RF_ComponentLabel_t *clabel;
3394 int column;
3395 int sparecol;
3396
3397 raidPtr->root_partition = new_value;
3398 for(column=0; column<raidPtr->numCol; column++) {
3399 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3400 clabel = raidget_component_label(raidPtr, column);
3401 clabel->root_partition = new_value;
3402 raidflush_component_label(raidPtr, column);
3403 }
3404 }
3405 for(column = 0; column < raidPtr->numSpare ; column++) {
3406 sparecol = raidPtr->numCol + column;
3407 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3408 clabel = raidget_component_label(raidPtr, sparecol);
3409 clabel->root_partition = new_value;
3410 raidflush_component_label(raidPtr, sparecol);
3411 }
3412 }
3413 return(new_value);
3414 }
3415
3416 void
3417 rf_release_all_vps(RF_ConfigSet_t *cset)
3418 {
3419 RF_AutoConfig_t *ac;
3420
3421 ac = cset->ac;
3422 while(ac!=NULL) {
3423 /* Close the vp, and give it back */
3424 if (ac->vp) {
3425 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3426 VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
3427 vput(ac->vp);
3428 ac->vp = NULL;
3429 }
3430 ac = ac->next;
3431 }
3432 }
3433
3434
3435 void
3436 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3437 {
3438 RF_AutoConfig_t *ac;
3439 RF_AutoConfig_t *next_ac;
3440
3441 ac = cset->ac;
3442 while(ac!=NULL) {
3443 next_ac = ac->next;
3444 /* nuke the label */
3445 free(ac->clabel, M_RAIDFRAME);
3446 /* cleanup the config structure */
3447 free(ac, M_RAIDFRAME);
3448 /* "next.." */
3449 ac = next_ac;
3450 }
3451 /* and, finally, nuke the config set */
3452 free(cset, M_RAIDFRAME);
3453 }
3454
3455
3456 void
3457 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3458 {
3459 /* current version number */
3460 clabel->version = RF_COMPONENT_LABEL_VERSION;
3461 clabel->serial_number = raidPtr->serial_number;
3462 clabel->mod_counter = raidPtr->mod_counter;
3463
3464 clabel->num_rows = 1;
3465 clabel->num_columns = raidPtr->numCol;
3466 clabel->clean = RF_RAID_DIRTY; /* not clean */
3467 clabel->status = rf_ds_optimal; /* "It's good!" */
3468
3469 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3470 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3471 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3472
3473 clabel->blockSize = raidPtr->bytesPerSector;
3474 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3475
3476 /* XXX not portable */
3477 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3478 clabel->maxOutstanding = raidPtr->maxOutstanding;
3479 clabel->autoconfigure = raidPtr->autoconfigure;
3480 clabel->root_partition = raidPtr->root_partition;
3481 clabel->last_unit = raidPtr->raidid;
3482 clabel->config_order = raidPtr->config_order;
3483
3484 #ifndef RF_NO_PARITY_MAP
3485 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3486 #endif
3487 }
3488
3489 struct raid_softc *
3490 rf_auto_config_set(RF_ConfigSet_t *cset)
3491 {
3492 RF_Raid_t *raidPtr;
3493 RF_Config_t *config;
3494 int raidID;
3495 struct raid_softc *sc;
3496
3497 #ifdef DEBUG
3498 printf("RAID autoconfigure\n");
3499 #endif
3500
3501 /* 1. Create a config structure */
3502 config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO);
3503 if (config == NULL) {
3504 printf("%s: Out of mem - config!?!?\n", __func__);
3505 /* XXX do something more intelligent here. */
3506 return NULL;
3507 }
3508
3509 /*
3510 2. Figure out what RAID ID this one is supposed to live at
3511 See if we can get the same RAID dev that it was configured
3512 on last time..
3513 */
3514
3515 raidID = cset->ac->clabel->last_unit;
3516 for (sc = raidget(raidID, false); sc && sc->sc_r.valid != 0;
3517 sc = raidget(++raidID, false))
3518 continue;
3519 #ifdef DEBUG
3520 printf("Configuring raid%d:\n",raidID);
3521 #endif
3522
3523 if (sc == NULL)
3524 sc = raidget(raidID, true);
3525 if (sc == NULL) {
3526 printf("%s: Out of mem - softc!?!?\n", __func__);
3527 /* XXX do something more intelligent here. */
3528 free(config, M_RAIDFRAME);
3529 return NULL;
3530 }
3531
3532 raidPtr = &sc->sc_r;
3533
3534 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3535 raidPtr->softc = sc;
3536 raidPtr->raidid = raidID;
3537 raidPtr->openings = RAIDOUTSTANDING;
3538
3539 /* 3. Build the configuration structure */
3540 rf_create_configuration(cset->ac, config, raidPtr);
3541
3542 /* 4. Do the configuration */
3543 if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3544 raidinit(sc);
3545
3546 rf_markalldirty(raidPtr);
3547 raidPtr->autoconfigure = 1; /* XXX do this here? */
3548 switch (cset->ac->clabel->root_partition) {
3549 case 1: /* Force Root */
3550 case 2: /* Soft Root: root when boot partition part of raid */
3551 /*
3552 * everything configured just fine. Make a note
3553 * that this set is eligible to be root,
3554 * or forced to be root
3555 */
3556 cset->rootable = cset->ac->clabel->root_partition;
3557 /* XXX do this here? */
3558 raidPtr->root_partition = cset->rootable;
3559 break;
3560 default:
3561 break;
3562 }
3563 } else {
3564 raidput(sc);
3565 sc = NULL;
3566 }
3567
3568 /* 5. Cleanup */
3569 free(config, M_RAIDFRAME);
3570 return sc;
3571 }
3572
3573 void
3574 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3575 size_t xmin, size_t xmax)
3576 {
3577 int error;
3578
3579 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3580 pool_sethiwat(p, xmax);
3581 if ((error = pool_prime(p, xmin)) != 0)
3582 panic("%s: failed to prime pool: %d", __func__, error);
3583 pool_setlowat(p, xmin);
3584 }
3585
3586 /*
3587 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buffer queue
3588 * to see if there is IO pending and if that IO could possibly be done
3589 * for a given RAID set. Returns 0 if IO is waiting and can be done, 1
3590 * otherwise.
3591 *
3592 */
3593 int
3594 rf_buf_queue_check(RF_Raid_t *raidPtr)
3595 {
3596 struct raid_softc *rs;
3597 struct dk_softc *dksc;
3598
3599 rs = raidPtr->softc;
3600 dksc = &rs->sc_dksc;
3601
3602 if ((rs->sc_flags & RAIDF_INITED) == 0)
3603 return 1;
3604
3605 if (dk_strategy_pending(dksc) && raidPtr->openings > 0) {
3606 /* there is work to do */
3607 return 0;
3608 }
3609 /* default is nothing to do */
3610 return 1;
3611 }
3612
3613 int
3614 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3615 {
3616 uint64_t numsecs;
3617 unsigned secsize;
3618 int error;
3619
3620 error = getdisksize(vp, &numsecs, &secsize);
3621 if (error == 0) {
3622 diskPtr->blockSize = secsize;
3623 diskPtr->numBlocks = numsecs - rf_protectedSectors;
3624 diskPtr->partitionSize = numsecs;
3625 return 0;
3626 }
3627 return error;
3628 }
3629
3630 static int
3631 raid_match(device_t self, cfdata_t cfdata, void *aux)
3632 {
3633 return 1;
3634 }
3635
3636 static void
3637 raid_attach(device_t parent, device_t self, void *aux)
3638 {
3639 }
3640
3641
3642 static int
3643 raid_detach(device_t self, int flags)
3644 {
3645 int error;
3646 struct raid_softc *rs = raidsoftc(self);
3647
3648 if (rs == NULL)
3649 return ENXIO;
3650
3651 if ((error = raidlock(rs)) != 0)
3652 return (error);
3653
3654 error = raid_detach_unlocked(rs);
3655
3656 raidunlock(rs);
3657
3658 /* XXX raid can be referenced here */
3659
3660 if (error)
3661 return error;
3662
3663 /* Free the softc */
3664 raidput(rs);
3665
3666 return 0;
3667 }
3668
3669 static void
3670 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
3671 {
3672 struct dk_softc *dksc = &rs->sc_dksc;
3673 struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
3674
3675 memset(dg, 0, sizeof(*dg));
3676
3677 dg->dg_secperunit = raidPtr->totalSectors;
3678 dg->dg_secsize = raidPtr->bytesPerSector;
3679 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
3680 dg->dg_ntracks = 4 * raidPtr->numCol;
3681
3682 disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, NULL);
3683 }
3684
3685 /*
3686 * Get cache info for all the components (including spares).
3687 * Returns intersection of all the cache flags of all disks, or first
3688 * error if any encountered.
3689 * XXXfua feature flags can change as spares are added - lock down somehow
3690 */
3691 static int
3692 rf_get_component_caches(RF_Raid_t *raidPtr, int *data)
3693 {
3694 int c;
3695 int error;
3696 int dkwhole = 0, dkpart;
3697
3698 for (c = 0; c < raidPtr->numCol + raidPtr->numSpare; c++) {
3699 /*
3700 * Check any non-dead disk, even when currently being
3701 * reconstructed.
3702 */
3703 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)
3704 || raidPtr->Disks[c].status == rf_ds_reconstructing) {
3705 error = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp,
3706 DIOCGCACHE, &dkpart, FREAD, NOCRED);
3707 if (error) {
3708 if (error != ENODEV) {
3709 printf("raid%d: get cache for component %s failed\n",
3710 raidPtr->raidid,
3711 raidPtr->Disks[c].devname);
3712 }
3713
3714 return error;
3715 }
3716
3717 if (c == 0)
3718 dkwhole = dkpart;
3719 else
3720 dkwhole = DKCACHE_COMBINE(dkwhole, dkpart);
3721 }
3722 }
3723
3724 *data = dkwhole;
3725
3726 return 0;
3727 }
3728
3729 /*
3730 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3731 * We end up returning whatever error was returned by the first cache flush
3732 * that fails.
3733 */
3734
3735 int
3736 rf_sync_component_caches(RF_Raid_t *raidPtr)
3737 {
3738 int c, sparecol;
3739 int e,error;
3740 int force = 1;
3741
3742 error = 0;
3743 for (c = 0; c < raidPtr->numCol; c++) {
3744 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3745 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3746 &force, FWRITE, NOCRED);
3747 if (e) {
3748 if (e != ENODEV)
3749 printf("raid%d: cache flush to component %s failed.\n",
3750 raidPtr->raidid, raidPtr->Disks[c].devname);
3751 if (error == 0) {
3752 error = e;
3753 }
3754 }
3755 }
3756 }
3757
3758 for( c = 0; c < raidPtr->numSpare ; c++) {
3759 sparecol = raidPtr->numCol + c;
3760 /* Need to ensure that the reconstruct actually completed! */
3761 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3762 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3763 DIOCCACHESYNC, &force, FWRITE, NOCRED);
3764 if (e) {
3765 if (e != ENODEV)
3766 printf("raid%d: cache flush to component %s failed.\n",
3767 raidPtr->raidid, raidPtr->Disks[sparecol].devname);
3768 if (error == 0) {
3769 error = e;
3770 }
3771 }
3772 }
3773 }
3774 return error;
3775 }
3776
3777 /* Fill in info with the current status */
3778 void
3779 rf_check_recon_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3780 {
3781
3782 if (raidPtr->status != rf_rs_reconstructing) {
3783 info->total = 100;
3784 info->completed = 100;
3785 } else {
3786 info->total = raidPtr->reconControl->numRUsTotal;
3787 info->completed = raidPtr->reconControl->numRUsComplete;
3788 }
3789 info->remaining = info->total - info->completed;
3790 }
3791
3792 /* Fill in info with the current status */
3793 void
3794 rf_check_parityrewrite_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3795 {
3796
3797 if (raidPtr->parity_rewrite_in_progress == 1) {
3798 info->total = raidPtr->Layout.numStripe;
3799 info->completed = raidPtr->parity_rewrite_stripes_done;
3800 } else {
3801 info->completed = 100;
3802 info->total = 100;
3803 }
3804 info->remaining = info->total - info->completed;
3805 }
3806
3807 /* Fill in info with the current status */
3808 void
3809 rf_check_copyback_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3810 {
3811
3812 if (raidPtr->copyback_in_progress == 1) {
3813 info->total = raidPtr->Layout.numStripe;
3814 info->completed = raidPtr->copyback_stripes_done;
3815 info->remaining = info->total - info->completed;
3816 } else {
3817 info->remaining = 0;
3818 info->completed = 100;
3819 info->total = 100;
3820 }
3821 }
3822
3823 /* Fill in config with the current info */
3824 int
3825 rf_get_info(RF_Raid_t *raidPtr, RF_DeviceConfig_t *config)
3826 {
3827 int d, i, j;
3828
3829 if (!raidPtr->valid)
3830 return (ENODEV);
3831 config->cols = raidPtr->numCol;
3832 config->ndevs = raidPtr->numCol;
3833 if (config->ndevs >= RF_MAX_DISKS)
3834 return (ENOMEM);
3835 config->nspares = raidPtr->numSpare;
3836 if (config->nspares >= RF_MAX_DISKS)
3837 return (ENOMEM);
3838 config->maxqdepth = raidPtr->maxQueueDepth;
3839 d = 0;
3840 for (j = 0; j < config->cols; j++) {
3841 config->devs[d] = raidPtr->Disks[j];
3842 d++;
3843 }
3844 for (j = config->cols, i = 0; i < config->nspares; i++, j++) {
3845 config->spares[i] = raidPtr->Disks[j];
3846 if (config->spares[i].status == rf_ds_rebuilding_spare) {
3847 /* XXX: raidctl(8) expects to see this as a used spare */
3848 config->spares[i].status = rf_ds_used_spare;
3849 }
3850 }
3851 return 0;
3852 }
3853
3854 int
3855 rf_get_component_label(RF_Raid_t *raidPtr, void *data)
3856 {
3857 RF_ComponentLabel_t *clabel = (RF_ComponentLabel_t *)data;
3858 RF_ComponentLabel_t *raid_clabel;
3859 int column = clabel->column;
3860
3861 if ((column < 0) || (column >= raidPtr->numCol + raidPtr->numSpare))
3862 return EINVAL;
3863 raid_clabel = raidget_component_label(raidPtr, column);
3864 memcpy(clabel, raid_clabel, sizeof *clabel);
3865
3866 return 0;
3867 }
3868
3869 /*
3870 * Module interface
3871 */
3872
3873 MODULE(MODULE_CLASS_DRIVER, raid, "dk_subr,bufq_fcfs");
3874
3875 #ifdef _MODULE
3876 CFDRIVER_DECL(raid, DV_DISK, NULL);
3877 #endif
3878
3879 static int raid_modcmd(modcmd_t, void *);
3880 static int raid_modcmd_init(void);
3881 static int raid_modcmd_fini(void);
3882
3883 static int
3884 raid_modcmd(modcmd_t cmd, void *data)
3885 {
3886 int error;
3887
3888 error = 0;
3889 switch (cmd) {
3890 case MODULE_CMD_INIT:
3891 error = raid_modcmd_init();
3892 break;
3893 case MODULE_CMD_FINI:
3894 error = raid_modcmd_fini();
3895 break;
3896 default:
3897 error = ENOTTY;
3898 break;
3899 }
3900 return error;
3901 }
3902
3903 static int
3904 raid_modcmd_init(void)
3905 {
3906 int error;
3907 int bmajor, cmajor;
3908
3909 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
3910 mutex_enter(&raid_lock);
3911 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
3912 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
3913 rf_init_cond2(rf_sparet_wait_cv, "sparetw");
3914 rf_init_cond2(rf_sparet_resp_cv, "rfgst");
3915
3916 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
3917 #endif
3918
3919 bmajor = cmajor = -1;
3920 error = devsw_attach("raid", &raid_bdevsw, &bmajor,
3921 &raid_cdevsw, &cmajor);
3922 if (error != 0 && error != EEXIST) {
3923 aprint_error("%s: devsw_attach failed %d\n", __func__, error);
3924 mutex_exit(&raid_lock);
3925 return error;
3926 }
3927 #ifdef _MODULE
3928 error = config_cfdriver_attach(&raid_cd);
3929 if (error != 0) {
3930 aprint_error("%s: config_cfdriver_attach failed %d\n",
3931 __func__, error);
3932 devsw_detach(&raid_bdevsw, &raid_cdevsw);
3933 mutex_exit(&raid_lock);
3934 return error;
3935 }
3936 #endif
3937 error = config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3938 if (error != 0) {
3939 aprint_error("%s: config_cfattach_attach failed %d\n",
3940 __func__, error);
3941 #ifdef _MODULE
3942 config_cfdriver_detach(&raid_cd);
3943 #endif
3944 devsw_detach(&raid_bdevsw, &raid_cdevsw);
3945 mutex_exit(&raid_lock);
3946 return error;
3947 }
3948
3949 raidautoconfigdone = false;
3950
3951 mutex_exit(&raid_lock);
3952
3953 if (error == 0) {
3954 if (rf_BootRaidframe(true) == 0)
3955 aprint_verbose("Kernelized RAIDframe activated\n");
3956 else
3957 panic("Serious error activating RAID!!");
3958 }
3959
3960 /*
3961 * Register a finalizer which will be used to auto-config RAID
3962 * sets once all real hardware devices have been found.
3963 */
3964 error = config_finalize_register(NULL, rf_autoconfig);
3965 if (error != 0) {
3966 aprint_error("WARNING: unable to register RAIDframe "
3967 "finalizer\n");
3968 error = 0;
3969 }
3970
3971 return error;
3972 }
3973
3974 static int
3975 raid_modcmd_fini(void)
3976 {
3977 int error;
3978
3979 mutex_enter(&raid_lock);
3980
3981 /* Don't allow unload if raid device(s) exist. */
3982 if (!LIST_EMPTY(&raids)) {
3983 mutex_exit(&raid_lock);
3984 return EBUSY;
3985 }
3986
3987 error = config_cfattach_detach(raid_cd.cd_name, &raid_ca);
3988 if (error != 0) {
3989 aprint_error("%s: cannot detach cfattach\n",__func__);
3990 mutex_exit(&raid_lock);
3991 return error;
3992 }
3993 #ifdef _MODULE
3994 error = config_cfdriver_detach(&raid_cd);
3995 if (error != 0) {
3996 aprint_error("%s: cannot detach cfdriver\n",__func__);
3997 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3998 mutex_exit(&raid_lock);
3999 return error;
4000 }
4001 #endif
4002 error = devsw_detach(&raid_bdevsw, &raid_cdevsw);
4003 if (error != 0) {
4004 aprint_error("%s: cannot detach devsw\n",__func__);
4005 #ifdef _MODULE
4006 config_cfdriver_attach(&raid_cd);
4007 #endif
4008 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
4009 mutex_exit(&raid_lock);
4010 return error;
4011 }
4012 rf_BootRaidframe(false);
4013 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
4014 rf_destroy_mutex2(rf_sparet_wait_mutex);
4015 rf_destroy_cond2(rf_sparet_wait_cv);
4016 rf_destroy_cond2(rf_sparet_resp_cv);
4017 #endif
4018 mutex_exit(&raid_lock);
4019 mutex_destroy(&raid_lock);
4020
4021 return error;
4022 }
4023