rf_netbsdkintf.c revision 1.376.4.5 1 /* $NetBSD: rf_netbsdkintf.c,v 1.376.4.5 2023/09/18 19:00:21 martin Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
89 * School of Computer Science
90 * Carnegie Mellon University
91 * Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97 /***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.376.4.5 2023/09/18 19:00:21 martin Exp $");
105
106 #ifdef _KERNEL_OPT
107 #include "opt_raid_autoconfig.h"
108 #include "opt_compat_netbsd32.h"
109 #endif
110
111 #include <sys/param.h>
112 #include <sys/errno.h>
113 #include <sys/pool.h>
114 #include <sys/proc.h>
115 #include <sys/queue.h>
116 #include <sys/disk.h>
117 #include <sys/device.h>
118 #include <sys/stat.h>
119 #include <sys/ioctl.h>
120 #include <sys/fcntl.h>
121 #include <sys/systm.h>
122 #include <sys/vnode.h>
123 #include <sys/disklabel.h>
124 #include <sys/conf.h>
125 #include <sys/buf.h>
126 #include <sys/bufq.h>
127 #include <sys/reboot.h>
128 #include <sys/kauth.h>
129 #include <sys/module.h>
130 #include <sys/compat_stub.h>
131
132 #include <prop/proplib.h>
133
134 #include <dev/raidframe/raidframevar.h>
135 #include <dev/raidframe/raidframeio.h>
136 #include <dev/raidframe/rf_paritymap.h>
137
138 #include "rf_raid.h"
139 #include "rf_copyback.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_desc.h"
143 #include "rf_diskqueue.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_kintf.h"
147 #include "rf_options.h"
148 #include "rf_driver.h"
149 #include "rf_parityscan.h"
150 #include "rf_threadstuff.h"
151
152 #include "ioconf.h"
153
154 #ifdef DEBUG
155 int rf_kdebug_level = 0;
156 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
157 #else /* DEBUG */
158 #define db1_printf(a) { }
159 #endif /* DEBUG */
160
161 #ifdef DEBUG_ROOT
162 #define DPRINTF(a, ...) printf(a, __VA_ARGS__)
163 #else
164 #define DPRINTF(a, ...)
165 #endif
166
167 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
168 static rf_declare_mutex2(rf_sparet_wait_mutex);
169 static rf_declare_cond2(rf_sparet_wait_cv);
170 static rf_declare_cond2(rf_sparet_resp_cv);
171
172 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
173 * spare table */
174 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
175 * installation process */
176 #endif
177
178 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
179
180 /* prototypes */
181 static void KernelWakeupFunc(struct buf *);
182 static void InitBP(struct buf *, struct vnode *, unsigned,
183 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
184 void *, int, struct proc *);
185 static void raidinit(struct raid_softc *);
186 static int raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp);
187 static int rf_get_component_caches(RF_Raid_t *raidPtr, int *);
188
189 static int raid_match(device_t, cfdata_t, void *);
190 static void raid_attach(device_t, device_t, void *);
191 static int raid_detach(device_t, int);
192
193 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
194 daddr_t, daddr_t);
195 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
196 daddr_t, daddr_t, int);
197
198 static int raidwrite_component_label(unsigned,
199 dev_t, struct vnode *, RF_ComponentLabel_t *);
200 static int raidread_component_label(unsigned,
201 dev_t, struct vnode *, RF_ComponentLabel_t *);
202
203 static int raid_diskstart(device_t, struct buf *bp);
204 static int raid_dumpblocks(device_t, void *, daddr_t, int);
205 static int raid_lastclose(device_t);
206
207 static dev_type_open(raidopen);
208 static dev_type_close(raidclose);
209 static dev_type_read(raidread);
210 static dev_type_write(raidwrite);
211 static dev_type_ioctl(raidioctl);
212 static dev_type_strategy(raidstrategy);
213 static dev_type_dump(raiddump);
214 static dev_type_size(raidsize);
215
216 const struct bdevsw raid_bdevsw = {
217 .d_open = raidopen,
218 .d_close = raidclose,
219 .d_strategy = raidstrategy,
220 .d_ioctl = raidioctl,
221 .d_dump = raiddump,
222 .d_psize = raidsize,
223 .d_discard = nodiscard,
224 .d_flag = D_DISK
225 };
226
227 const struct cdevsw raid_cdevsw = {
228 .d_open = raidopen,
229 .d_close = raidclose,
230 .d_read = raidread,
231 .d_write = raidwrite,
232 .d_ioctl = raidioctl,
233 .d_stop = nostop,
234 .d_tty = notty,
235 .d_poll = nopoll,
236 .d_mmap = nommap,
237 .d_kqfilter = nokqfilter,
238 .d_discard = nodiscard,
239 .d_flag = D_DISK
240 };
241
242 static struct dkdriver rf_dkdriver = {
243 .d_open = raidopen,
244 .d_close = raidclose,
245 .d_strategy = raidstrategy,
246 .d_diskstart = raid_diskstart,
247 .d_dumpblocks = raid_dumpblocks,
248 .d_lastclose = raid_lastclose,
249 .d_minphys = minphys
250 };
251
252 #define raidunit(x) DISKUNIT(x)
253 #define raidsoftc(dev) (((struct raid_softc *)device_private(dev))->sc_r.softc)
254
255 extern struct cfdriver raid_cd;
256 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
257 raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
258 DVF_DETACH_SHUTDOWN);
259
260 /* Internal representation of a rf_recon_req */
261 struct rf_recon_req_internal {
262 RF_RowCol_t col;
263 RF_ReconReqFlags_t flags;
264 void *raidPtr;
265 };
266
267 /*
268 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
269 * Be aware that large numbers can allow the driver to consume a lot of
270 * kernel memory, especially on writes, and in degraded mode reads.
271 *
272 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
273 * a single 64K write will typically require 64K for the old data,
274 * 64K for the old parity, and 64K for the new parity, for a total
275 * of 192K (if the parity buffer is not re-used immediately).
276 * Even it if is used immediately, that's still 128K, which when multiplied
277 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
278 *
279 * Now in degraded mode, for example, a 64K read on the above setup may
280 * require data reconstruction, which will require *all* of the 4 remaining
281 * disks to participate -- 4 * 32K/disk == 128K again.
282 */
283
284 #ifndef RAIDOUTSTANDING
285 #define RAIDOUTSTANDING 6
286 #endif
287
288 #define RAIDLABELDEV(dev) \
289 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
290
291 /* declared here, and made public, for the benefit of KVM stuff.. */
292
293 static int raidlock(struct raid_softc *);
294 static void raidunlock(struct raid_softc *);
295
296 static int raid_detach_unlocked(struct raid_softc *);
297
298 static void rf_markalldirty(RF_Raid_t *);
299 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
300
301 void rf_ReconThread(struct rf_recon_req_internal *);
302 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
303 void rf_CopybackThread(RF_Raid_t *raidPtr);
304 void rf_ReconstructInPlaceThread(struct rf_recon_req_internal *);
305 int rf_autoconfig(device_t);
306 void rf_buildroothack(RF_ConfigSet_t *);
307
308 RF_AutoConfig_t *rf_find_raid_components(void);
309 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
310 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
311 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
312 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
313 int rf_set_autoconfig(RF_Raid_t *, int);
314 int rf_set_rootpartition(RF_Raid_t *, int);
315 void rf_release_all_vps(RF_ConfigSet_t *);
316 void rf_cleanup_config_set(RF_ConfigSet_t *);
317 int rf_have_enough_components(RF_ConfigSet_t *);
318 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
319 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
320
321 /*
322 * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
323 * Note that this is overridden by having RAID_AUTOCONFIG as an option
324 * in the kernel config file.
325 */
326 #ifdef RAID_AUTOCONFIG
327 int raidautoconfig = 1;
328 #else
329 int raidautoconfig = 0;
330 #endif
331 static bool raidautoconfigdone = false;
332
333 struct RF_Pools_s rf_pools;
334
335 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
336 static kmutex_t raid_lock;
337
338 static struct raid_softc *
339 raidcreate(int unit) {
340 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
341 sc->sc_unit = unit;
342 cv_init(&sc->sc_cv, "raidunit");
343 mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_NONE);
344 return sc;
345 }
346
347 static void
348 raiddestroy(struct raid_softc *sc) {
349 cv_destroy(&sc->sc_cv);
350 mutex_destroy(&sc->sc_mutex);
351 kmem_free(sc, sizeof(*sc));
352 }
353
354 static struct raid_softc *
355 raidget(int unit, bool create) {
356 struct raid_softc *sc;
357 if (unit < 0) {
358 #ifdef DIAGNOSTIC
359 panic("%s: unit %d!", __func__, unit);
360 #endif
361 return NULL;
362 }
363 mutex_enter(&raid_lock);
364 LIST_FOREACH(sc, &raids, sc_link) {
365 if (sc->sc_unit == unit) {
366 mutex_exit(&raid_lock);
367 return sc;
368 }
369 }
370 mutex_exit(&raid_lock);
371 if (!create)
372 return NULL;
373 if ((sc = raidcreate(unit)) == NULL)
374 return NULL;
375 mutex_enter(&raid_lock);
376 LIST_INSERT_HEAD(&raids, sc, sc_link);
377 mutex_exit(&raid_lock);
378 return sc;
379 }
380
381 static void
382 raidput(struct raid_softc *sc) {
383 mutex_enter(&raid_lock);
384 LIST_REMOVE(sc, sc_link);
385 mutex_exit(&raid_lock);
386 raiddestroy(sc);
387 }
388
389 void
390 raidattach(int num)
391 {
392
393 /*
394 * Device attachment and associated initialization now occurs
395 * as part of the module initialization.
396 */
397 }
398
399 int
400 rf_autoconfig(device_t self)
401 {
402 RF_AutoConfig_t *ac_list;
403 RF_ConfigSet_t *config_sets;
404
405 if (!raidautoconfig || raidautoconfigdone == true)
406 return (0);
407
408 /* XXX This code can only be run once. */
409 raidautoconfigdone = true;
410
411 #ifdef __HAVE_CPU_BOOTCONF
412 /*
413 * 0. find the boot device if needed first so we can use it later
414 * this needs to be done before we autoconfigure any raid sets,
415 * because if we use wedges we are not going to be able to open
416 * the boot device later
417 */
418 if (booted_device == NULL)
419 cpu_bootconf();
420 #endif
421 /* 1. locate all RAID components on the system */
422 aprint_debug("Searching for RAID components...\n");
423 ac_list = rf_find_raid_components();
424
425 /* 2. Sort them into their respective sets. */
426 config_sets = rf_create_auto_sets(ac_list);
427
428 /*
429 * 3. Evaluate each set and configure the valid ones.
430 * This gets done in rf_buildroothack().
431 */
432 rf_buildroothack(config_sets);
433
434 return 1;
435 }
436
437 int
438 rf_inited(const struct raid_softc *rs) {
439 return (rs->sc_flags & RAIDF_INITED) != 0;
440 }
441
442 RF_Raid_t *
443 rf_get_raid(struct raid_softc *rs) {
444 return &rs->sc_r;
445 }
446
447 int
448 rf_get_unit(const struct raid_softc *rs) {
449 return rs->sc_unit;
450 }
451
452 static int
453 rf_containsboot(RF_Raid_t *r, device_t bdv) {
454 const char *bootname;
455 size_t len;
456
457 /* if bdv is NULL, the set can't contain it. exit early. */
458 if (bdv == NULL)
459 return 0;
460
461 bootname = device_xname(bdv);
462 len = strlen(bootname);
463
464 for (int col = 0; col < r->numCol; col++) {
465 const char *devname = r->Disks[col].devname;
466 devname += sizeof("/dev/") - 1;
467 if (strncmp(devname, "dk", 2) == 0) {
468 const char *parent =
469 dkwedge_get_parent_name(r->Disks[col].dev);
470 if (parent != NULL)
471 devname = parent;
472 }
473 if (strncmp(devname, bootname, len) == 0) {
474 struct raid_softc *sc = r->softc;
475 aprint_debug("raid%d includes boot device %s\n",
476 sc->sc_unit, devname);
477 return 1;
478 }
479 }
480 return 0;
481 }
482
483 void
484 rf_buildroothack(RF_ConfigSet_t *config_sets)
485 {
486 RF_ConfigSet_t *cset;
487 RF_ConfigSet_t *next_cset;
488 int num_root;
489 struct raid_softc *sc, *rsc;
490 struct dk_softc *dksc;
491
492 sc = rsc = NULL;
493 num_root = 0;
494 cset = config_sets;
495 while (cset != NULL) {
496 next_cset = cset->next;
497 if (rf_have_enough_components(cset) &&
498 cset->ac->clabel->autoconfigure == 1) {
499 sc = rf_auto_config_set(cset);
500 if (sc != NULL) {
501 aprint_debug("raid%d: configured ok, rootable %d\n",
502 sc->sc_unit, cset->rootable);
503 if (cset->rootable) {
504 rsc = sc;
505 num_root++;
506 }
507 } else {
508 /* The autoconfig didn't work :( */
509 aprint_debug("Autoconfig failed\n");
510 rf_release_all_vps(cset);
511 }
512 } else {
513 /* we're not autoconfiguring this set...
514 release the associated resources */
515 rf_release_all_vps(cset);
516 }
517 /* cleanup */
518 rf_cleanup_config_set(cset);
519 cset = next_cset;
520 }
521 dksc = &rsc->sc_dksc;
522
523 /* if the user has specified what the root device should be
524 then we don't touch booted_device or boothowto... */
525
526 if (rootspec != NULL) {
527 DPRINTF("%s: rootspec %s\n", __func__, rootspec);
528 return;
529 }
530
531 /* we found something bootable... */
532
533 /*
534 * XXX: The following code assumes that the root raid
535 * is the first ('a') partition. This is about the best
536 * we can do with a BSD disklabel, but we might be able
537 * to do better with a GPT label, by setting a specified
538 * attribute to indicate the root partition. We can then
539 * stash the partition number in the r->root_partition
540 * high bits (the bottom 2 bits are already used). For
541 * now we just set booted_partition to 0 when we override
542 * root.
543 */
544 if (num_root == 1) {
545 device_t candidate_root;
546 if (dksc->sc_dkdev.dk_nwedges != 0) {
547 char cname[sizeof(cset->ac->devname)];
548 /* XXX: assume partition 'a' first */
549 snprintf(cname, sizeof(cname), "%s%c",
550 device_xname(dksc->sc_dev), 'a');
551 candidate_root = dkwedge_find_by_wname(cname);
552 DPRINTF("%s: candidate wedge root=%s\n", __func__,
553 cname);
554 if (candidate_root == NULL) {
555 /*
556 * If that is not found, because we don't use
557 * disklabel, return the first dk child
558 * XXX: we can skip the 'a' check above
559 * and always do this...
560 */
561 size_t i = 0;
562 candidate_root = dkwedge_find_by_parent(
563 device_xname(dksc->sc_dev), &i);
564 }
565 DPRINTF("%s: candidate wedge root=%p\n", __func__,
566 candidate_root);
567 } else
568 candidate_root = dksc->sc_dev;
569 DPRINTF("%s: candidate root=%p\n", __func__, candidate_root);
570 DPRINTF("%s: booted_device=%p root_partition=%d "
571 "contains_boot=%d",
572 __func__, booted_device, rsc->sc_r.root_partition,
573 rf_containsboot(&rsc->sc_r, booted_device));
574 /* XXX the check for booted_device == NULL can probably be
575 * dropped, now that rf_containsboot handles that case.
576 */
577 if (booted_device == NULL ||
578 rsc->sc_r.root_partition == 1 ||
579 rf_containsboot(&rsc->sc_r, booted_device)) {
580 booted_device = candidate_root;
581 booted_method = "raidframe/single";
582 booted_partition = 0; /* XXX assume 'a' */
583 }
584 } else if (num_root > 1) {
585 DPRINTF("%s: many roots=%d, %p\n", __func__, num_root,
586 booted_device);
587
588 /*
589 * Maybe the MD code can help. If it cannot, then
590 * setroot() will discover that we have no
591 * booted_device and will ask the user if nothing was
592 * hardwired in the kernel config file
593 */
594 if (booted_device == NULL)
595 return;
596
597 num_root = 0;
598 mutex_enter(&raid_lock);
599 LIST_FOREACH(sc, &raids, sc_link) {
600 RF_Raid_t *r = &sc->sc_r;
601 if (r->valid == 0)
602 continue;
603
604 if (r->root_partition == 0)
605 continue;
606
607 if (rf_containsboot(r, booted_device)) {
608 num_root++;
609 rsc = sc;
610 dksc = &rsc->sc_dksc;
611 }
612 }
613 mutex_exit(&raid_lock);
614
615 if (num_root == 1) {
616 booted_device = dksc->sc_dev;
617 booted_method = "raidframe/multi";
618 booted_partition = 0; /* XXX assume 'a' */
619 } else {
620 /* we can't guess.. require the user to answer... */
621 boothowto |= RB_ASKNAME;
622 }
623 }
624 }
625
626 static int
627 raidsize(dev_t dev)
628 {
629 struct raid_softc *rs;
630 struct dk_softc *dksc;
631 unsigned int unit;
632
633 unit = raidunit(dev);
634 if ((rs = raidget(unit, false)) == NULL)
635 return -1;
636 dksc = &rs->sc_dksc;
637
638 if ((rs->sc_flags & RAIDF_INITED) == 0)
639 return -1;
640
641 return dk_size(dksc, dev);
642 }
643
644 static int
645 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
646 {
647 unsigned int unit;
648 struct raid_softc *rs;
649 struct dk_softc *dksc;
650
651 unit = raidunit(dev);
652 if ((rs = raidget(unit, false)) == NULL)
653 return ENXIO;
654 dksc = &rs->sc_dksc;
655
656 if ((rs->sc_flags & RAIDF_INITED) == 0)
657 return ENODEV;
658
659 /*
660 Note that blkno is relative to this particular partition.
661 By adding adding RF_PROTECTED_SECTORS, we get a value that
662 is relative to the partition used for the underlying component.
663 */
664 blkno += RF_PROTECTED_SECTORS;
665
666 return dk_dump(dksc, dev, blkno, va, size, DK_DUMP_RECURSIVE);
667 }
668
669 static int
670 raid_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk)
671 {
672 struct raid_softc *rs = raidsoftc(dev);
673 const struct bdevsw *bdev;
674 RF_Raid_t *raidPtr;
675 int c, sparecol, j, scol, dumpto;
676 int error = 0;
677
678 raidPtr = &rs->sc_r;
679
680 /* we only support dumping to RAID 1 sets */
681 if (raidPtr->Layout.numDataCol != 1 ||
682 raidPtr->Layout.numParityCol != 1)
683 return EINVAL;
684
685 if ((error = raidlock(rs)) != 0)
686 return error;
687
688 /* figure out what device is alive.. */
689
690 /*
691 Look for a component to dump to. The preference for the
692 component to dump to is as follows:
693 1) the master
694 2) a used_spare of the master
695 3) the slave
696 4) a used_spare of the slave
697 */
698
699 dumpto = -1;
700 for (c = 0; c < raidPtr->numCol; c++) {
701 if (raidPtr->Disks[c].status == rf_ds_optimal) {
702 /* this might be the one */
703 dumpto = c;
704 break;
705 }
706 }
707
708 /*
709 At this point we have possibly selected a live master or a
710 live slave. We now check to see if there is a spared
711 master (or a spared slave), if we didn't find a live master
712 or a live slave.
713 */
714
715 for (c = 0; c < raidPtr->numSpare; c++) {
716 sparecol = raidPtr->numCol + c;
717 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
718 /* How about this one? */
719 scol = -1;
720 for(j=0;j<raidPtr->numCol;j++) {
721 if (raidPtr->Disks[j].spareCol == sparecol) {
722 scol = j;
723 break;
724 }
725 }
726 if (scol == 0) {
727 /*
728 We must have found a spared master!
729 We'll take that over anything else
730 found so far. (We couldn't have
731 found a real master before, since
732 this is a used spare, and it's
733 saying that it's replacing the
734 master.) On reboot (with
735 autoconfiguration turned on)
736 sparecol will become the 1st
737 component (component0) of this set.
738 */
739 dumpto = sparecol;
740 break;
741 } else if (scol != -1) {
742 /*
743 Must be a spared slave. We'll dump
744 to that if we havn't found anything
745 else so far.
746 */
747 if (dumpto == -1)
748 dumpto = sparecol;
749 }
750 }
751 }
752
753 if (dumpto == -1) {
754 /* we couldn't find any live components to dump to!?!?
755 */
756 error = EINVAL;
757 goto out;
758 }
759
760 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
761 if (bdev == NULL) {
762 error = ENXIO;
763 goto out;
764 }
765
766 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
767 blkno, va, nblk * raidPtr->bytesPerSector);
768
769 out:
770 raidunlock(rs);
771
772 return error;
773 }
774
775 /* ARGSUSED */
776 static int
777 raidopen(dev_t dev, int flags, int fmt,
778 struct lwp *l)
779 {
780 int unit = raidunit(dev);
781 struct raid_softc *rs;
782 struct dk_softc *dksc;
783 int error = 0;
784 int part, pmask;
785
786 if ((rs = raidget(unit, true)) == NULL)
787 return ENXIO;
788 if ((error = raidlock(rs)) != 0)
789 return (error);
790
791 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
792 error = EBUSY;
793 goto bad;
794 }
795
796 dksc = &rs->sc_dksc;
797
798 part = DISKPART(dev);
799 pmask = (1 << part);
800
801 if (!DK_BUSY(dksc, pmask) &&
802 ((rs->sc_flags & RAIDF_INITED) != 0)) {
803 /* First one... mark things as dirty... Note that we *MUST*
804 have done a configure before this. I DO NOT WANT TO BE
805 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
806 THAT THEY BELONG TOGETHER!!!!! */
807 /* XXX should check to see if we're only open for reading
808 here... If so, we needn't do this, but then need some
809 other way of keeping track of what's happened.. */
810
811 rf_markalldirty(&rs->sc_r);
812 }
813
814 if ((rs->sc_flags & RAIDF_INITED) != 0)
815 error = dk_open(dksc, dev, flags, fmt, l);
816
817 bad:
818 raidunlock(rs);
819
820 return (error);
821
822
823 }
824
825 static int
826 raid_lastclose(device_t self)
827 {
828 struct raid_softc *rs = raidsoftc(self);
829
830 /* Last one... device is not unconfigured yet.
831 Device shutdown has taken care of setting the
832 clean bits if RAIDF_INITED is not set
833 mark things as clean... */
834
835 rf_update_component_labels(&rs->sc_r,
836 RF_FINAL_COMPONENT_UPDATE);
837
838 /* pass to unlocked code */
839 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
840 rs->sc_flags |= RAIDF_DETACH;
841
842 return 0;
843 }
844
845 /* ARGSUSED */
846 static int
847 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
848 {
849 int unit = raidunit(dev);
850 struct raid_softc *rs;
851 struct dk_softc *dksc;
852 cfdata_t cf;
853 int error = 0, do_detach = 0, do_put = 0;
854
855 if ((rs = raidget(unit, false)) == NULL)
856 return ENXIO;
857 dksc = &rs->sc_dksc;
858
859 if ((error = raidlock(rs)) != 0)
860 return (error);
861
862 if ((rs->sc_flags & RAIDF_INITED) != 0) {
863 error = dk_close(dksc, dev, flags, fmt, l);
864 if ((rs->sc_flags & RAIDF_DETACH) != 0)
865 do_detach = 1;
866 } else if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
867 do_put = 1;
868
869 raidunlock(rs);
870
871 if (do_detach) {
872 /* free the pseudo device attach bits */
873 cf = device_cfdata(dksc->sc_dev);
874 error = config_detach(dksc->sc_dev, 0);
875 if (error == 0)
876 free(cf, M_RAIDFRAME);
877 } else if (do_put) {
878 raidput(rs);
879 }
880
881 return (error);
882
883 }
884
885 static void
886 raid_wakeup(RF_Raid_t *raidPtr)
887 {
888 rf_lock_mutex2(raidPtr->iodone_lock);
889 rf_signal_cond2(raidPtr->iodone_cv);
890 rf_unlock_mutex2(raidPtr->iodone_lock);
891 }
892
893 static void
894 raidstrategy(struct buf *bp)
895 {
896 unsigned int unit;
897 struct raid_softc *rs;
898 struct dk_softc *dksc;
899 RF_Raid_t *raidPtr;
900
901 unit = raidunit(bp->b_dev);
902 if ((rs = raidget(unit, false)) == NULL) {
903 bp->b_error = ENXIO;
904 goto fail;
905 }
906 if ((rs->sc_flags & RAIDF_INITED) == 0) {
907 bp->b_error = ENXIO;
908 goto fail;
909 }
910 dksc = &rs->sc_dksc;
911 raidPtr = &rs->sc_r;
912
913 /* Queue IO only */
914 if (dk_strategy_defer(dksc, bp))
915 goto done;
916
917 /* schedule the IO to happen at the next convenient time */
918 raid_wakeup(raidPtr);
919
920 done:
921 return;
922
923 fail:
924 bp->b_resid = bp->b_bcount;
925 biodone(bp);
926 }
927
928 static int
929 raid_diskstart(device_t dev, struct buf *bp)
930 {
931 struct raid_softc *rs = raidsoftc(dev);
932 RF_Raid_t *raidPtr;
933
934 raidPtr = &rs->sc_r;
935 if (!raidPtr->valid) {
936 db1_printf(("raid is not valid..\n"));
937 return ENODEV;
938 }
939
940 /* XXX */
941 bp->b_resid = 0;
942
943 return raiddoaccess(raidPtr, bp);
944 }
945
946 void
947 raiddone(RF_Raid_t *raidPtr, struct buf *bp)
948 {
949 struct raid_softc *rs;
950 struct dk_softc *dksc;
951
952 rs = raidPtr->softc;
953 dksc = &rs->sc_dksc;
954
955 dk_done(dksc, bp);
956
957 rf_lock_mutex2(raidPtr->mutex);
958 raidPtr->openings++;
959 rf_unlock_mutex2(raidPtr->mutex);
960
961 /* schedule more IO */
962 raid_wakeup(raidPtr);
963 }
964
965 /* ARGSUSED */
966 static int
967 raidread(dev_t dev, struct uio *uio, int flags)
968 {
969 int unit = raidunit(dev);
970 struct raid_softc *rs;
971
972 if ((rs = raidget(unit, false)) == NULL)
973 return ENXIO;
974
975 if ((rs->sc_flags & RAIDF_INITED) == 0)
976 return (ENXIO);
977
978 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
979
980 }
981
982 /* ARGSUSED */
983 static int
984 raidwrite(dev_t dev, struct uio *uio, int flags)
985 {
986 int unit = raidunit(dev);
987 struct raid_softc *rs;
988
989 if ((rs = raidget(unit, false)) == NULL)
990 return ENXIO;
991
992 if ((rs->sc_flags & RAIDF_INITED) == 0)
993 return (ENXIO);
994
995 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
996
997 }
998
999 static int
1000 raid_detach_unlocked(struct raid_softc *rs)
1001 {
1002 struct dk_softc *dksc = &rs->sc_dksc;
1003 RF_Raid_t *raidPtr;
1004 int error;
1005
1006 raidPtr = &rs->sc_r;
1007
1008 if (DK_BUSY(dksc, 0) ||
1009 raidPtr->recon_in_progress != 0 ||
1010 raidPtr->parity_rewrite_in_progress != 0 ||
1011 raidPtr->copyback_in_progress != 0)
1012 return EBUSY;
1013
1014 if ((rs->sc_flags & RAIDF_INITED) == 0)
1015 return 0;
1016
1017 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1018
1019 if ((error = rf_Shutdown(raidPtr)) != 0)
1020 return error;
1021
1022 rs->sc_flags &= ~RAIDF_INITED;
1023
1024 /* Kill off any queued buffers */
1025 dk_drain(dksc);
1026 bufq_free(dksc->sc_bufq);
1027
1028 /* Detach the disk. */
1029 dkwedge_delall(&dksc->sc_dkdev);
1030 disk_detach(&dksc->sc_dkdev);
1031 disk_destroy(&dksc->sc_dkdev);
1032 dk_detach(dksc);
1033
1034 return 0;
1035 }
1036
1037 static bool
1038 rf_must_be_initialized(const struct raid_softc *rs, u_long cmd)
1039 {
1040 switch (cmd) {
1041 case RAIDFRAME_ADD_HOT_SPARE:
1042 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1043 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1044 case RAIDFRAME_CHECK_PARITY:
1045 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1046 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1047 case RAIDFRAME_CHECK_RECON_STATUS:
1048 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1049 case RAIDFRAME_COPYBACK:
1050 case RAIDFRAME_DELETE_COMPONENT:
1051 case RAIDFRAME_FAIL_DISK:
1052 case RAIDFRAME_GET_ACCTOTALS:
1053 case RAIDFRAME_GET_COMPONENT_LABEL:
1054 case RAIDFRAME_GET_INFO:
1055 case RAIDFRAME_GET_SIZE:
1056 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1057 case RAIDFRAME_INIT_LABELS:
1058 case RAIDFRAME_KEEP_ACCTOTALS:
1059 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1060 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1061 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1062 case RAIDFRAME_PARITYMAP_STATUS:
1063 case RAIDFRAME_REBUILD_IN_PLACE:
1064 case RAIDFRAME_REMOVE_HOT_SPARE:
1065 case RAIDFRAME_RESET_ACCTOTALS:
1066 case RAIDFRAME_REWRITEPARITY:
1067 case RAIDFRAME_SET_AUTOCONFIG:
1068 case RAIDFRAME_SET_COMPONENT_LABEL:
1069 case RAIDFRAME_SET_LAST_UNIT:
1070 case RAIDFRAME_SET_ROOT:
1071 case RAIDFRAME_SHUTDOWN:
1072 return (rs->sc_flags & RAIDF_INITED) == 0;
1073 }
1074 return false;
1075 }
1076
1077 int
1078 rf_fail_disk(RF_Raid_t *raidPtr, struct rf_recon_req *rr)
1079 {
1080 struct rf_recon_req_internal *rrint;
1081
1082 if (raidPtr->Layout.map->faultsTolerated == 0) {
1083 /* Can't do this on a RAID 0!! */
1084 return EINVAL;
1085 }
1086
1087 if (rr->col < 0 || rr->col >= raidPtr->numCol) {
1088 /* bad column */
1089 return EINVAL;
1090 }
1091
1092 rf_lock_mutex2(raidPtr->mutex);
1093 if (raidPtr->status == rf_rs_reconstructing) {
1094 /* you can't fail a disk while we're reconstructing! */
1095 /* XXX wrong for RAID6 */
1096 goto out;
1097 }
1098 if ((raidPtr->Disks[rr->col].status == rf_ds_optimal) &&
1099 (raidPtr->numFailures > 0)) {
1100 /* some other component has failed. Let's not make
1101 things worse. XXX wrong for RAID6 */
1102 goto out;
1103 }
1104 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1105 /* Can't fail a spared disk! */
1106 goto out;
1107 }
1108 rf_unlock_mutex2(raidPtr->mutex);
1109
1110 /* make a copy of the recon request so that we don't rely on
1111 * the user's buffer */
1112 rrint = RF_Malloc(sizeof(*rrint));
1113 if (rrint == NULL)
1114 return(ENOMEM);
1115 rrint->col = rr->col;
1116 rrint->flags = rr->flags;
1117 rrint->raidPtr = raidPtr;
1118
1119 return RF_CREATE_THREAD(raidPtr->recon_thread, rf_ReconThread,
1120 rrint, "raid_recon");
1121 out:
1122 rf_unlock_mutex2(raidPtr->mutex);
1123 return EINVAL;
1124 }
1125
1126 static int
1127 rf_copyinspecificbuf(RF_Config_t *k_cfg)
1128 {
1129 /* allocate a buffer for the layout-specific data, and copy it in */
1130 if (k_cfg->layoutSpecificSize == 0)
1131 return 0;
1132
1133 if (k_cfg->layoutSpecificSize > 10000) {
1134 /* sanity check */
1135 return EINVAL;
1136 }
1137
1138 u_char *specific_buf;
1139 specific_buf = RF_Malloc(k_cfg->layoutSpecificSize);
1140 if (specific_buf == NULL)
1141 return ENOMEM;
1142
1143 int retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1144 k_cfg->layoutSpecificSize);
1145 if (retcode) {
1146 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1147 db1_printf(("%s: retcode=%d copyin.2\n", __func__, retcode));
1148 return retcode;
1149 }
1150
1151 k_cfg->layoutSpecific = specific_buf;
1152 return 0;
1153 }
1154
1155 static int
1156 rf_getConfiguration(struct raid_softc *rs, void *data, RF_Config_t **k_cfg)
1157 {
1158 RF_Config_t *u_cfg = *((RF_Config_t **) data);
1159
1160 if (rs->sc_r.valid) {
1161 /* There is a valid RAID set running on this unit! */
1162 printf("raid%d: Device already configured!\n", rs->sc_unit);
1163 return EINVAL;
1164 }
1165
1166 /* copy-in the configuration information */
1167 /* data points to a pointer to the configuration structure */
1168 *k_cfg = RF_Malloc(sizeof(**k_cfg));
1169 if (*k_cfg == NULL) {
1170 return ENOMEM;
1171 }
1172 int retcode = copyin(u_cfg, *k_cfg, sizeof(RF_Config_t));
1173 if (retcode == 0)
1174 return 0;
1175 RF_Free(*k_cfg, sizeof(RF_Config_t));
1176 db1_printf(("%s: retcode=%d copyin.1\n", __func__, retcode));
1177 rs->sc_flags |= RAIDF_SHUTDOWN;
1178 return retcode;
1179 }
1180
1181 int
1182 rf_construct(struct raid_softc *rs, RF_Config_t *k_cfg)
1183 {
1184 int retcode, i;
1185 RF_Raid_t *raidPtr = &rs->sc_r;
1186
1187 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1188
1189 if ((retcode = rf_copyinspecificbuf(k_cfg)) != 0)
1190 goto out;
1191
1192 /* should do some kind of sanity check on the configuration.
1193 * Store the sum of all the bytes in the last byte? */
1194
1195 /* Force nul-termination on all strings. */
1196 #define ZERO_FINAL(s) do { s[sizeof(s) - 1] = '\0'; } while (0)
1197 for (i = 0; i < RF_MAXCOL; i++) {
1198 ZERO_FINAL(k_cfg->devnames[0][i]);
1199 }
1200 for (i = 0; i < RF_MAXSPARE; i++) {
1201 ZERO_FINAL(k_cfg->spare_names[i]);
1202 }
1203 for (i = 0; i < RF_MAXDBGV; i++) {
1204 ZERO_FINAL(k_cfg->debugVars[i]);
1205 }
1206 #undef ZERO_FINAL
1207
1208 /* Check some basic limits. */
1209 if (k_cfg->numCol >= RF_MAXCOL || k_cfg->numCol < 0) {
1210 retcode = EINVAL;
1211 goto out;
1212 }
1213 if (k_cfg->numSpare >= RF_MAXSPARE || k_cfg->numSpare < 0) {
1214 retcode = EINVAL;
1215 goto out;
1216 }
1217
1218 /* configure the system */
1219
1220 /*
1221 * Clear the entire RAID descriptor, just to make sure
1222 * there is no stale data left in the case of a
1223 * reconfiguration
1224 */
1225 memset(raidPtr, 0, sizeof(*raidPtr));
1226 raidPtr->softc = rs;
1227 raidPtr->raidid = rs->sc_unit;
1228
1229 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1230
1231 if (retcode == 0) {
1232 /* allow this many simultaneous IO's to
1233 this RAID device */
1234 raidPtr->openings = RAIDOUTSTANDING;
1235
1236 raidinit(rs);
1237 raid_wakeup(raidPtr);
1238 rf_markalldirty(raidPtr);
1239 }
1240
1241 /* free the buffers. No return code here. */
1242 if (k_cfg->layoutSpecificSize) {
1243 RF_Free(k_cfg->layoutSpecific, k_cfg->layoutSpecificSize);
1244 }
1245 out:
1246 RF_Free(k_cfg, sizeof(RF_Config_t));
1247 if (retcode) {
1248 /*
1249 * If configuration failed, set sc_flags so that we
1250 * will detach the device when we close it.
1251 */
1252 rs->sc_flags |= RAIDF_SHUTDOWN;
1253 }
1254 return retcode;
1255 }
1256
1257 #if RF_DISABLED
1258 static int
1259 rf_set_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
1260 {
1261
1262 /* XXX check the label for valid stuff... */
1263 /* Note that some things *should not* get modified --
1264 the user should be re-initing the labels instead of
1265 trying to patch things.
1266 */
1267 #ifdef DEBUG
1268 int raidid = raidPtr->raidid;
1269 printf("raid%d: Got component label:\n", raidid);
1270 printf("raid%d: Version: %d\n", raidid, clabel->version);
1271 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1272 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1273 printf("raid%d: Column: %d\n", raidid, clabel->column);
1274 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1275 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1276 printf("raid%d: Status: %d\n", raidid, clabel->status);
1277 #endif /* DEBUG */
1278 clabel->row = 0;
1279 int column = clabel->column;
1280
1281 if ((column < 0) || (column >= raidPtr->numCol)) {
1282 return(EINVAL);
1283 }
1284
1285 /* XXX this isn't allowed to do anything for now :-) */
1286
1287 /* XXX and before it is, we need to fill in the rest
1288 of the fields!?!?!?! */
1289 memcpy(raidget_component_label(raidPtr, column),
1290 clabel, sizeof(*clabel));
1291 raidflush_component_label(raidPtr, column);
1292 return 0;
1293 }
1294 #endif
1295
1296 static int
1297 rf_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
1298 {
1299 /*
1300 we only want the serial number from
1301 the above. We get all the rest of the information
1302 from the config that was used to create this RAID
1303 set.
1304 */
1305
1306 raidPtr->serial_number = clabel->serial_number;
1307
1308 for (int column = 0; column < raidPtr->numCol; column++) {
1309 RF_RaidDisk_t *diskPtr = &raidPtr->Disks[column];
1310 if (RF_DEAD_DISK(diskPtr->status))
1311 continue;
1312 RF_ComponentLabel_t *ci_label = raidget_component_label(
1313 raidPtr, column);
1314 /* Zeroing this is important. */
1315 memset(ci_label, 0, sizeof(*ci_label));
1316 raid_init_component_label(raidPtr, ci_label);
1317 ci_label->serial_number = raidPtr->serial_number;
1318 ci_label->row = 0; /* we dont' pretend to support more */
1319 rf_component_label_set_partitionsize(ci_label,
1320 diskPtr->partitionSize);
1321 ci_label->column = column;
1322 raidflush_component_label(raidPtr, column);
1323 /* XXXjld what about the spares? */
1324 }
1325
1326 return 0;
1327 }
1328
1329 static int
1330 rf_rebuild_in_place(RF_Raid_t *raidPtr, RF_SingleComponent_t *componentPtr)
1331 {
1332
1333 if (raidPtr->Layout.map->faultsTolerated == 0) {
1334 /* Can't do this on a RAID 0!! */
1335 return EINVAL;
1336 }
1337
1338 if (raidPtr->recon_in_progress == 1) {
1339 /* a reconstruct is already in progress! */
1340 return EINVAL;
1341 }
1342
1343 RF_SingleComponent_t component;
1344 memcpy(&component, componentPtr, sizeof(RF_SingleComponent_t));
1345 component.row = 0; /* we don't support any more */
1346 int column = component.column;
1347
1348 if ((column < 0) || (column >= raidPtr->numCol)) {
1349 return EINVAL;
1350 }
1351
1352 rf_lock_mutex2(raidPtr->mutex);
1353 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1354 (raidPtr->numFailures > 0)) {
1355 /* XXX 0 above shouldn't be constant!!! */
1356 /* some component other than this has failed.
1357 Let's not make things worse than they already
1358 are... */
1359 printf("raid%d: Unable to reconstruct to disk at:\n",
1360 raidPtr->raidid);
1361 printf("raid%d: Col: %d Too many failures.\n",
1362 raidPtr->raidid, column);
1363 rf_unlock_mutex2(raidPtr->mutex);
1364 return EINVAL;
1365 }
1366
1367 if (raidPtr->Disks[column].status == rf_ds_reconstructing) {
1368 printf("raid%d: Unable to reconstruct to disk at:\n",
1369 raidPtr->raidid);
1370 printf("raid%d: Col: %d "
1371 "Reconstruction already occurring!\n",
1372 raidPtr->raidid, column);
1373
1374 rf_unlock_mutex2(raidPtr->mutex);
1375 return EINVAL;
1376 }
1377
1378 if (raidPtr->Disks[column].status == rf_ds_spared) {
1379 rf_unlock_mutex2(raidPtr->mutex);
1380 return EINVAL;
1381 }
1382
1383 rf_unlock_mutex2(raidPtr->mutex);
1384
1385 struct rf_recon_req_internal *rrint;
1386 rrint = RF_Malloc(sizeof(*rrint));
1387 if (rrint == NULL)
1388 return ENOMEM;
1389
1390 rrint->col = column;
1391 rrint->raidPtr = raidPtr;
1392
1393 return RF_CREATE_THREAD(raidPtr->recon_thread,
1394 rf_ReconstructInPlaceThread, rrint, "raid_reconip");
1395 }
1396
1397 static int
1398 rf_check_recon_status(RF_Raid_t *raidPtr, int *data)
1399 {
1400 /*
1401 * This makes no sense on a RAID 0, or if we are not reconstructing
1402 * so tell the user it's done.
1403 */
1404 if (raidPtr->Layout.map->faultsTolerated == 0 ||
1405 raidPtr->status != rf_rs_reconstructing) {
1406 *data = 100;
1407 return 0;
1408 }
1409 if (raidPtr->reconControl->numRUsTotal == 0) {
1410 *data = 0;
1411 return 0;
1412 }
1413 *data = (raidPtr->reconControl->numRUsComplete * 100
1414 / raidPtr->reconControl->numRUsTotal);
1415 return 0;
1416 }
1417
1418 /*
1419 * Copy a RF_SingleComponent_t from 'data', ensuring nul-termination
1420 * on the component_name[] array.
1421 */
1422 static void
1423 rf_copy_single_component(RF_SingleComponent_t *component, void *data)
1424 {
1425
1426 memcpy(component, data, sizeof *component);
1427 component->component_name[sizeof(component->component_name) - 1] = '\0';
1428 }
1429
1430 static int
1431 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1432 {
1433 int unit = raidunit(dev);
1434 int part, pmask;
1435 struct raid_softc *rs;
1436 struct dk_softc *dksc;
1437 RF_Config_t *k_cfg;
1438 RF_Raid_t *raidPtr;
1439 RF_AccTotals_t *totals;
1440 RF_SingleComponent_t component;
1441 RF_DeviceConfig_t *d_cfg, *ucfgp;
1442 int retcode = 0;
1443 int column;
1444 RF_ComponentLabel_t *clabel;
1445 int d;
1446
1447 if ((rs = raidget(unit, false)) == NULL)
1448 return ENXIO;
1449
1450 dksc = &rs->sc_dksc;
1451 raidPtr = &rs->sc_r;
1452
1453 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1454 (int) DISKPART(dev), (int) unit, cmd));
1455
1456 /* Must be initialized for these... */
1457 if (rf_must_be_initialized(rs, cmd))
1458 return ENXIO;
1459
1460 switch (cmd) {
1461 /* configure the system */
1462 case RAIDFRAME_CONFIGURE:
1463 if ((retcode = rf_getConfiguration(rs, data, &k_cfg)) != 0)
1464 return retcode;
1465 return rf_construct(rs, k_cfg);
1466
1467 /* shutdown the system */
1468 case RAIDFRAME_SHUTDOWN:
1469
1470 part = DISKPART(dev);
1471 pmask = (1 << part);
1472
1473 if ((retcode = raidlock(rs)) != 0)
1474 return retcode;
1475
1476 if (DK_BUSY(dksc, pmask) ||
1477 raidPtr->recon_in_progress != 0 ||
1478 raidPtr->parity_rewrite_in_progress != 0 ||
1479 raidPtr->copyback_in_progress != 0)
1480 retcode = EBUSY;
1481 else {
1482 /* detach and free on close */
1483 rs->sc_flags |= RAIDF_SHUTDOWN;
1484 retcode = 0;
1485 }
1486
1487 raidunlock(rs);
1488
1489 return retcode;
1490 case RAIDFRAME_GET_COMPONENT_LABEL:
1491 return rf_get_component_label(raidPtr, data);
1492
1493 #if RF_DISABLED
1494 case RAIDFRAME_SET_COMPONENT_LABEL:
1495 return rf_set_component_label(raidPtr, data);
1496 #endif
1497
1498 case RAIDFRAME_INIT_LABELS:
1499 return rf_init_component_label(raidPtr, data);
1500
1501 case RAIDFRAME_SET_AUTOCONFIG:
1502 d = rf_set_autoconfig(raidPtr, *(int *) data);
1503 printf("raid%d: New autoconfig value is: %d\n",
1504 raidPtr->raidid, d);
1505 *(int *) data = d;
1506 return retcode;
1507
1508 case RAIDFRAME_SET_ROOT:
1509 d = rf_set_rootpartition(raidPtr, *(int *) data);
1510 printf("raid%d: New rootpartition value is: %d\n",
1511 raidPtr->raidid, d);
1512 *(int *) data = d;
1513 return retcode;
1514
1515 /* initialize all parity */
1516 case RAIDFRAME_REWRITEPARITY:
1517
1518 if (raidPtr->Layout.map->faultsTolerated == 0) {
1519 /* Parity for RAID 0 is trivially correct */
1520 raidPtr->parity_good = RF_RAID_CLEAN;
1521 return 0;
1522 }
1523
1524 if (raidPtr->parity_rewrite_in_progress == 1) {
1525 /* Re-write is already in progress! */
1526 return EINVAL;
1527 }
1528
1529 return RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1530 rf_RewriteParityThread, raidPtr,"raid_parity");
1531
1532 case RAIDFRAME_ADD_HOT_SPARE:
1533 rf_copy_single_component(&component, data);
1534 return rf_add_hot_spare(raidPtr, &component);
1535
1536 case RAIDFRAME_REMOVE_HOT_SPARE:
1537 return retcode;
1538
1539 case RAIDFRAME_DELETE_COMPONENT:
1540 rf_copy_single_component(&component, data);
1541 return rf_delete_component(raidPtr, &component);
1542
1543 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1544 rf_copy_single_component(&component, data);
1545 return rf_incorporate_hot_spare(raidPtr, &component);
1546
1547 case RAIDFRAME_REBUILD_IN_PLACE:
1548 return rf_rebuild_in_place(raidPtr, data);
1549
1550 case RAIDFRAME_GET_INFO:
1551 ucfgp = *(RF_DeviceConfig_t **)data;
1552 d_cfg = RF_Malloc(sizeof(*d_cfg));
1553 if (d_cfg == NULL)
1554 return ENOMEM;
1555 retcode = rf_get_info(raidPtr, d_cfg);
1556 if (retcode == 0) {
1557 retcode = copyout(d_cfg, ucfgp, sizeof(*d_cfg));
1558 }
1559 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1560 return retcode;
1561
1562 case RAIDFRAME_CHECK_PARITY:
1563 *(int *) data = raidPtr->parity_good;
1564 return 0;
1565
1566 case RAIDFRAME_PARITYMAP_STATUS:
1567 if (rf_paritymap_ineligible(raidPtr))
1568 return EINVAL;
1569 rf_paritymap_status(raidPtr->parity_map, data);
1570 return 0;
1571
1572 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1573 if (rf_paritymap_ineligible(raidPtr))
1574 return EINVAL;
1575 if (raidPtr->parity_map == NULL)
1576 return ENOENT; /* ??? */
1577 if (rf_paritymap_set_params(raidPtr->parity_map, data, 1) != 0)
1578 return EINVAL;
1579 return 0;
1580
1581 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1582 if (rf_paritymap_ineligible(raidPtr))
1583 return EINVAL;
1584 *(int *) data = rf_paritymap_get_disable(raidPtr);
1585 return 0;
1586
1587 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1588 if (rf_paritymap_ineligible(raidPtr))
1589 return EINVAL;
1590 rf_paritymap_set_disable(raidPtr, *(int *)data);
1591 /* XXX should errors be passed up? */
1592 return 0;
1593
1594 case RAIDFRAME_RESET_ACCTOTALS:
1595 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1596 return 0;
1597
1598 case RAIDFRAME_GET_ACCTOTALS:
1599 totals = (RF_AccTotals_t *) data;
1600 *totals = raidPtr->acc_totals;
1601 return 0;
1602
1603 case RAIDFRAME_KEEP_ACCTOTALS:
1604 raidPtr->keep_acc_totals = *(int *)data;
1605 return 0;
1606
1607 case RAIDFRAME_GET_SIZE:
1608 *(int *) data = raidPtr->totalSectors;
1609 return 0;
1610
1611 case RAIDFRAME_FAIL_DISK:
1612 return rf_fail_disk(raidPtr, data);
1613
1614 /* invoke a copyback operation after recon on whatever disk
1615 * needs it, if any */
1616 case RAIDFRAME_COPYBACK:
1617
1618 if (raidPtr->Layout.map->faultsTolerated == 0) {
1619 /* This makes no sense on a RAID 0!! */
1620 return EINVAL;
1621 }
1622
1623 if (raidPtr->copyback_in_progress == 1) {
1624 /* Copyback is already in progress! */
1625 return EINVAL;
1626 }
1627
1628 return RF_CREATE_THREAD(raidPtr->copyback_thread,
1629 rf_CopybackThread, raidPtr, "raid_copyback");
1630
1631 /* return the percentage completion of reconstruction */
1632 case RAIDFRAME_CHECK_RECON_STATUS:
1633 return rf_check_recon_status(raidPtr, data);
1634
1635 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1636 rf_check_recon_status_ext(raidPtr, data);
1637 return 0;
1638
1639 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1640 if (raidPtr->Layout.map->faultsTolerated == 0) {
1641 /* This makes no sense on a RAID 0, so tell the
1642 user it's done. */
1643 *(int *) data = 100;
1644 return 0;
1645 }
1646 if (raidPtr->parity_rewrite_in_progress == 1) {
1647 *(int *) data = 100 *
1648 raidPtr->parity_rewrite_stripes_done /
1649 raidPtr->Layout.numStripe;
1650 } else {
1651 *(int *) data = 100;
1652 }
1653 return 0;
1654
1655 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1656 rf_check_parityrewrite_status_ext(raidPtr, data);
1657 return 0;
1658
1659 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1660 if (raidPtr->Layout.map->faultsTolerated == 0) {
1661 /* This makes no sense on a RAID 0 */
1662 *(int *) data = 100;
1663 return 0;
1664 }
1665 if (raidPtr->copyback_in_progress == 1) {
1666 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1667 raidPtr->Layout.numStripe;
1668 } else {
1669 *(int *) data = 100;
1670 }
1671 return 0;
1672
1673 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1674 rf_check_copyback_status_ext(raidPtr, data);
1675 return 0;
1676
1677 case RAIDFRAME_SET_LAST_UNIT:
1678 for (column = 0; column < raidPtr->numCol; column++)
1679 if (raidPtr->Disks[column].status != rf_ds_optimal)
1680 return EBUSY;
1681
1682 for (column = 0; column < raidPtr->numCol; column++) {
1683 clabel = raidget_component_label(raidPtr, column);
1684 clabel->last_unit = *(int *)data;
1685 raidflush_component_label(raidPtr, column);
1686 }
1687 rs->sc_cflags |= RAIDF_UNIT_CHANGED;
1688 return 0;
1689
1690 /* the sparetable daemon calls this to wait for the kernel to
1691 * need a spare table. this ioctl does not return until a
1692 * spare table is needed. XXX -- calling mpsleep here in the
1693 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1694 * -- I should either compute the spare table in the kernel,
1695 * or have a different -- XXX XXX -- interface (a different
1696 * character device) for delivering the table -- XXX */
1697 #if RF_DISABLED
1698 case RAIDFRAME_SPARET_WAIT:
1699 rf_lock_mutex2(rf_sparet_wait_mutex);
1700 while (!rf_sparet_wait_queue)
1701 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1702 RF_SparetWait_t *waitreq = rf_sparet_wait_queue;
1703 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1704 rf_unlock_mutex2(rf_sparet_wait_mutex);
1705
1706 /* structure assignment */
1707 *((RF_SparetWait_t *) data) = *waitreq;
1708
1709 RF_Free(waitreq, sizeof(*waitreq));
1710 return 0;
1711
1712 /* wakes up a process waiting on SPARET_WAIT and puts an error
1713 * code in it that will cause the dameon to exit */
1714 case RAIDFRAME_ABORT_SPARET_WAIT:
1715 waitreq = RF_Malloc(sizeof(*waitreq));
1716 waitreq->fcol = -1;
1717 rf_lock_mutex2(rf_sparet_wait_mutex);
1718 waitreq->next = rf_sparet_wait_queue;
1719 rf_sparet_wait_queue = waitreq;
1720 rf_broadcast_cond2(rf_sparet_wait_cv);
1721 rf_unlock_mutex2(rf_sparet_wait_mutex);
1722 return 0;
1723
1724 /* used by the spare table daemon to deliver a spare table
1725 * into the kernel */
1726 case RAIDFRAME_SEND_SPARET:
1727
1728 /* install the spare table */
1729 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1730
1731 /* respond to the requestor. the return status of the spare
1732 * table installation is passed in the "fcol" field */
1733 waitred = RF_Malloc(sizeof(*waitreq));
1734 waitreq->fcol = retcode;
1735 rf_lock_mutex2(rf_sparet_wait_mutex);
1736 waitreq->next = rf_sparet_resp_queue;
1737 rf_sparet_resp_queue = waitreq;
1738 rf_broadcast_cond2(rf_sparet_resp_cv);
1739 rf_unlock_mutex2(rf_sparet_wait_mutex);
1740
1741 return retcode;
1742 #endif
1743 default:
1744 /*
1745 * Don't bother trying to load compat modules
1746 * if it is not our ioctl. This is more efficient
1747 * and makes rump tests not depend on compat code
1748 */
1749 if (IOCGROUP(cmd) != 'r')
1750 break;
1751 #ifdef _LP64
1752 if ((l->l_proc->p_flag & PK_32) != 0) {
1753 module_autoload("compat_netbsd32_raid",
1754 MODULE_CLASS_EXEC);
1755 MODULE_HOOK_CALL(raidframe_netbsd32_ioctl_hook,
1756 (rs, cmd, data), enosys(), retcode);
1757 if (retcode != EPASSTHROUGH)
1758 return retcode;
1759 }
1760 #endif
1761 module_autoload("compat_raid_80", MODULE_CLASS_EXEC);
1762 MODULE_HOOK_CALL(raidframe_ioctl_80_hook,
1763 (rs, cmd, data), enosys(), retcode);
1764 if (retcode != EPASSTHROUGH)
1765 return retcode;
1766
1767 module_autoload("compat_raid_50", MODULE_CLASS_EXEC);
1768 MODULE_HOOK_CALL(raidframe_ioctl_50_hook,
1769 (rs, cmd, data), enosys(), retcode);
1770 if (retcode != EPASSTHROUGH)
1771 return retcode;
1772 break; /* fall through to the os-specific code below */
1773
1774 }
1775
1776 if (!raidPtr->valid)
1777 return (EINVAL);
1778
1779 /*
1780 * Add support for "regular" device ioctls here.
1781 */
1782
1783 switch (cmd) {
1784 case DIOCGCACHE:
1785 retcode = rf_get_component_caches(raidPtr, (int *)data);
1786 break;
1787
1788 case DIOCCACHESYNC:
1789 retcode = rf_sync_component_caches(raidPtr);
1790 break;
1791
1792 default:
1793 retcode = dk_ioctl(dksc, dev, cmd, data, flag, l);
1794 break;
1795 }
1796
1797 return (retcode);
1798
1799 }
1800
1801
1802 /* raidinit -- complete the rest of the initialization for the
1803 RAIDframe device. */
1804
1805
1806 static void
1807 raidinit(struct raid_softc *rs)
1808 {
1809 cfdata_t cf;
1810 unsigned int unit;
1811 struct dk_softc *dksc = &rs->sc_dksc;
1812 RF_Raid_t *raidPtr = &rs->sc_r;
1813 device_t dev;
1814
1815 unit = raidPtr->raidid;
1816
1817 /* XXX doesn't check bounds. */
1818 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%u", unit);
1819
1820 /* attach the pseudo device */
1821 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1822 cf->cf_name = raid_cd.cd_name;
1823 cf->cf_atname = raid_cd.cd_name;
1824 cf->cf_unit = unit;
1825 cf->cf_fstate = FSTATE_STAR;
1826
1827 dev = config_attach_pseudo(cf);
1828 if (dev == NULL) {
1829 printf("raid%d: config_attach_pseudo failed\n",
1830 raidPtr->raidid);
1831 free(cf, M_RAIDFRAME);
1832 return;
1833 }
1834
1835 /* provide a backpointer to the real softc */
1836 raidsoftc(dev) = rs;
1837
1838 /* disk_attach actually creates space for the CPU disklabel, among
1839 * other things, so it's critical to call this *BEFORE* we try putzing
1840 * with disklabels. */
1841 dk_init(dksc, dev, DKTYPE_RAID);
1842 disk_init(&dksc->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1843
1844 /* XXX There may be a weird interaction here between this, and
1845 * protectedSectors, as used in RAIDframe. */
1846
1847 rs->sc_size = raidPtr->totalSectors;
1848
1849 /* Attach dk and disk subsystems */
1850 dk_attach(dksc);
1851 disk_attach(&dksc->sc_dkdev);
1852 rf_set_geometry(rs, raidPtr);
1853
1854 bufq_alloc(&dksc->sc_bufq, "fcfs", BUFQ_SORT_RAWBLOCK);
1855
1856 /* mark unit as usuable */
1857 rs->sc_flags |= RAIDF_INITED;
1858
1859 dkwedge_discover(&dksc->sc_dkdev);
1860 }
1861
1862 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1863 /* wake up the daemon & tell it to get us a spare table
1864 * XXX
1865 * the entries in the queues should be tagged with the raidPtr
1866 * so that in the extremely rare case that two recons happen at once,
1867 * we know for which device were requesting a spare table
1868 * XXX
1869 *
1870 * XXX This code is not currently used. GO
1871 */
1872 int
1873 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1874 {
1875 int retcode;
1876
1877 rf_lock_mutex2(rf_sparet_wait_mutex);
1878 req->next = rf_sparet_wait_queue;
1879 rf_sparet_wait_queue = req;
1880 rf_broadcast_cond2(rf_sparet_wait_cv);
1881
1882 /* mpsleep unlocks the mutex */
1883 while (!rf_sparet_resp_queue) {
1884 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
1885 }
1886 req = rf_sparet_resp_queue;
1887 rf_sparet_resp_queue = req->next;
1888 rf_unlock_mutex2(rf_sparet_wait_mutex);
1889
1890 retcode = req->fcol;
1891 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1892 * alloc'd */
1893 return (retcode);
1894 }
1895 #endif
1896
1897 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1898 * bp & passes it down.
1899 * any calls originating in the kernel must use non-blocking I/O
1900 * do some extra sanity checking to return "appropriate" error values for
1901 * certain conditions (to make some standard utilities work)
1902 *
1903 * Formerly known as: rf_DoAccessKernel
1904 */
1905 void
1906 raidstart(RF_Raid_t *raidPtr)
1907 {
1908 struct raid_softc *rs;
1909 struct dk_softc *dksc;
1910
1911 rs = raidPtr->softc;
1912 dksc = &rs->sc_dksc;
1913 /* quick check to see if anything has died recently */
1914 rf_lock_mutex2(raidPtr->mutex);
1915 if (raidPtr->numNewFailures > 0) {
1916 rf_unlock_mutex2(raidPtr->mutex);
1917 rf_update_component_labels(raidPtr,
1918 RF_NORMAL_COMPONENT_UPDATE);
1919 rf_lock_mutex2(raidPtr->mutex);
1920 raidPtr->numNewFailures--;
1921 }
1922 rf_unlock_mutex2(raidPtr->mutex);
1923
1924 if ((rs->sc_flags & RAIDF_INITED) == 0) {
1925 printf("raid%d: raidstart not ready\n", raidPtr->raidid);
1926 return;
1927 }
1928
1929 dk_start(dksc, NULL);
1930 }
1931
1932 static int
1933 raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp)
1934 {
1935 RF_SectorCount_t num_blocks, pb, sum;
1936 RF_RaidAddr_t raid_addr;
1937 daddr_t blocknum;
1938 int do_async;
1939 int rc;
1940
1941 rf_lock_mutex2(raidPtr->mutex);
1942 if (raidPtr->openings == 0) {
1943 rf_unlock_mutex2(raidPtr->mutex);
1944 return EAGAIN;
1945 }
1946 rf_unlock_mutex2(raidPtr->mutex);
1947
1948 blocknum = bp->b_rawblkno;
1949
1950 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1951 (int) blocknum));
1952
1953 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1954 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1955
1956 /* *THIS* is where we adjust what block we're going to...
1957 * but DO NOT TOUCH bp->b_blkno!!! */
1958 raid_addr = blocknum;
1959
1960 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1961 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1962 sum = raid_addr + num_blocks + pb;
1963 if (1 || rf_debugKernelAccess) {
1964 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1965 (int) raid_addr, (int) sum, (int) num_blocks,
1966 (int) pb, (int) bp->b_resid));
1967 }
1968 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1969 || (sum < num_blocks) || (sum < pb)) {
1970 rc = ENOSPC;
1971 goto done;
1972 }
1973 /*
1974 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1975 */
1976
1977 if (bp->b_bcount & raidPtr->sectorMask) {
1978 rc = ENOSPC;
1979 goto done;
1980 }
1981 db1_printf(("Calling DoAccess..\n"));
1982
1983
1984 rf_lock_mutex2(raidPtr->mutex);
1985 raidPtr->openings--;
1986 rf_unlock_mutex2(raidPtr->mutex);
1987
1988 /*
1989 * Everything is async.
1990 */
1991 do_async = 1;
1992
1993 /* don't ever condition on bp->b_flags & B_WRITE.
1994 * always condition on B_READ instead */
1995
1996 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1997 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1998 do_async, raid_addr, num_blocks,
1999 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
2000
2001 done:
2002 return rc;
2003 }
2004
2005 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
2006
2007 int
2008 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
2009 {
2010 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2011 struct buf *bp;
2012
2013 req->queue = queue;
2014 bp = req->bp;
2015
2016 switch (req->type) {
2017 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2018 /* XXX need to do something extra here.. */
2019 /* I'm leaving this in, as I've never actually seen it used,
2020 * and I'd like folks to report it... GO */
2021 printf(("WAKEUP CALLED\n"));
2022 queue->numOutstanding++;
2023
2024 bp->b_flags = 0;
2025 bp->b_private = req;
2026
2027 KernelWakeupFunc(bp);
2028 break;
2029
2030 case RF_IO_TYPE_READ:
2031 case RF_IO_TYPE_WRITE:
2032 #if RF_ACC_TRACE > 0
2033 if (req->tracerec) {
2034 RF_ETIMER_START(req->tracerec->timer);
2035 }
2036 #endif
2037 InitBP(bp, queue->rf_cinfo->ci_vp,
2038 op, queue->rf_cinfo->ci_dev,
2039 req->sectorOffset, req->numSector,
2040 req->buf, KernelWakeupFunc, (void *) req,
2041 queue->raidPtr->logBytesPerSector, req->b_proc);
2042
2043 if (rf_debugKernelAccess) {
2044 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2045 (long) bp->b_blkno));
2046 }
2047 queue->numOutstanding++;
2048 queue->last_deq_sector = req->sectorOffset;
2049 /* acc wouldn't have been let in if there were any pending
2050 * reqs at any other priority */
2051 queue->curPriority = req->priority;
2052
2053 db1_printf(("Going for %c to unit %d col %d\n",
2054 req->type, queue->raidPtr->raidid,
2055 queue->col));
2056 db1_printf(("sector %d count %d (%d bytes) %d\n",
2057 (int) req->sectorOffset, (int) req->numSector,
2058 (int) (req->numSector <<
2059 queue->raidPtr->logBytesPerSector),
2060 (int) queue->raidPtr->logBytesPerSector));
2061
2062 /*
2063 * XXX: drop lock here since this can block at
2064 * least with backing SCSI devices. Retake it
2065 * to minimize fuss with calling interfaces.
2066 */
2067
2068 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2069 bdev_strategy(bp);
2070 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2071 break;
2072
2073 default:
2074 panic("bad req->type in rf_DispatchKernelIO");
2075 }
2076 db1_printf(("Exiting from DispatchKernelIO\n"));
2077
2078 return (0);
2079 }
2080 /* this is the callback function associated with a I/O invoked from
2081 kernel code.
2082 */
2083 static void
2084 KernelWakeupFunc(struct buf *bp)
2085 {
2086 RF_DiskQueueData_t *req = NULL;
2087 RF_DiskQueue_t *queue;
2088
2089 db1_printf(("recovering the request queue:\n"));
2090
2091 req = bp->b_private;
2092
2093 queue = (RF_DiskQueue_t *) req->queue;
2094
2095 rf_lock_mutex2(queue->raidPtr->iodone_lock);
2096
2097 #if RF_ACC_TRACE > 0
2098 if (req->tracerec) {
2099 RF_ETIMER_STOP(req->tracerec->timer);
2100 RF_ETIMER_EVAL(req->tracerec->timer);
2101 rf_lock_mutex2(rf_tracing_mutex);
2102 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2103 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2104 req->tracerec->num_phys_ios++;
2105 rf_unlock_mutex2(rf_tracing_mutex);
2106 }
2107 #endif
2108
2109 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2110 * ballistic, and mark the component as hosed... */
2111
2112 if (bp->b_error != 0) {
2113 /* Mark the disk as dead */
2114 /* but only mark it once... */
2115 /* and only if it wouldn't leave this RAID set
2116 completely broken */
2117 if (((queue->raidPtr->Disks[queue->col].status ==
2118 rf_ds_optimal) ||
2119 (queue->raidPtr->Disks[queue->col].status ==
2120 rf_ds_used_spare)) &&
2121 (queue->raidPtr->numFailures <
2122 queue->raidPtr->Layout.map->faultsTolerated)) {
2123 printf("raid%d: IO Error (%d). Marking %s as failed.\n",
2124 queue->raidPtr->raidid,
2125 bp->b_error,
2126 queue->raidPtr->Disks[queue->col].devname);
2127 queue->raidPtr->Disks[queue->col].status =
2128 rf_ds_failed;
2129 queue->raidPtr->status = rf_rs_degraded;
2130 queue->raidPtr->numFailures++;
2131 queue->raidPtr->numNewFailures++;
2132 } else { /* Disk is already dead... */
2133 /* printf("Disk already marked as dead!\n"); */
2134 }
2135
2136 }
2137
2138 /* Fill in the error value */
2139 req->error = bp->b_error;
2140
2141 /* Drop this one on the "finished" queue... */
2142 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2143
2144 /* Let the raidio thread know there is work to be done. */
2145 rf_signal_cond2(queue->raidPtr->iodone_cv);
2146
2147 rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2148 }
2149
2150
2151 /*
2152 * initialize a buf structure for doing an I/O in the kernel.
2153 */
2154 static void
2155 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2156 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2157 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2158 struct proc *b_proc)
2159 {
2160 /* bp->b_flags = B_PHYS | rw_flag; */
2161 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */
2162 bp->b_oflags = 0;
2163 bp->b_cflags = 0;
2164 bp->b_bcount = numSect << logBytesPerSector;
2165 bp->b_bufsize = bp->b_bcount;
2166 bp->b_error = 0;
2167 bp->b_dev = dev;
2168 bp->b_data = bf;
2169 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2170 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2171 if (bp->b_bcount == 0) {
2172 panic("bp->b_bcount is zero in InitBP!!");
2173 }
2174 bp->b_proc = b_proc;
2175 bp->b_iodone = cbFunc;
2176 bp->b_private = cbArg;
2177 }
2178
2179 /*
2180 * Wait interruptibly for an exclusive lock.
2181 *
2182 * XXX
2183 * Several drivers do this; it should be abstracted and made MP-safe.
2184 * (Hmm... where have we seen this warning before :-> GO )
2185 */
2186 static int
2187 raidlock(struct raid_softc *rs)
2188 {
2189 int error;
2190
2191 error = 0;
2192 mutex_enter(&rs->sc_mutex);
2193 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2194 rs->sc_flags |= RAIDF_WANTED;
2195 error = cv_wait_sig(&rs->sc_cv, &rs->sc_mutex);
2196 if (error != 0)
2197 goto done;
2198 }
2199 rs->sc_flags |= RAIDF_LOCKED;
2200 done:
2201 mutex_exit(&rs->sc_mutex);
2202 return (error);
2203 }
2204 /*
2205 * Unlock and wake up any waiters.
2206 */
2207 static void
2208 raidunlock(struct raid_softc *rs)
2209 {
2210
2211 mutex_enter(&rs->sc_mutex);
2212 rs->sc_flags &= ~RAIDF_LOCKED;
2213 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2214 rs->sc_flags &= ~RAIDF_WANTED;
2215 cv_broadcast(&rs->sc_cv);
2216 }
2217 mutex_exit(&rs->sc_mutex);
2218 }
2219
2220
2221 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2222 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2223 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2224
2225 static daddr_t
2226 rf_component_info_offset(void)
2227 {
2228
2229 return RF_COMPONENT_INFO_OFFSET;
2230 }
2231
2232 static daddr_t
2233 rf_component_info_size(unsigned secsize)
2234 {
2235 daddr_t info_size;
2236
2237 KASSERT(secsize);
2238 if (secsize > RF_COMPONENT_INFO_SIZE)
2239 info_size = secsize;
2240 else
2241 info_size = RF_COMPONENT_INFO_SIZE;
2242
2243 return info_size;
2244 }
2245
2246 static daddr_t
2247 rf_parity_map_offset(RF_Raid_t *raidPtr)
2248 {
2249 daddr_t map_offset;
2250
2251 KASSERT(raidPtr->bytesPerSector);
2252 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2253 map_offset = raidPtr->bytesPerSector;
2254 else
2255 map_offset = RF_COMPONENT_INFO_SIZE;
2256 map_offset += rf_component_info_offset();
2257
2258 return map_offset;
2259 }
2260
2261 static daddr_t
2262 rf_parity_map_size(RF_Raid_t *raidPtr)
2263 {
2264 daddr_t map_size;
2265
2266 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2267 map_size = raidPtr->bytesPerSector;
2268 else
2269 map_size = RF_PARITY_MAP_SIZE;
2270
2271 return map_size;
2272 }
2273
2274 int
2275 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2276 {
2277 RF_ComponentLabel_t *clabel;
2278
2279 clabel = raidget_component_label(raidPtr, col);
2280 clabel->clean = RF_RAID_CLEAN;
2281 raidflush_component_label(raidPtr, col);
2282 return(0);
2283 }
2284
2285
2286 int
2287 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2288 {
2289 RF_ComponentLabel_t *clabel;
2290
2291 clabel = raidget_component_label(raidPtr, col);
2292 clabel->clean = RF_RAID_DIRTY;
2293 raidflush_component_label(raidPtr, col);
2294 return(0);
2295 }
2296
2297 int
2298 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2299 {
2300 KASSERT(raidPtr->bytesPerSector);
2301 return raidread_component_label(raidPtr->bytesPerSector,
2302 raidPtr->Disks[col].dev,
2303 raidPtr->raid_cinfo[col].ci_vp,
2304 &raidPtr->raid_cinfo[col].ci_label);
2305 }
2306
2307 RF_ComponentLabel_t *
2308 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2309 {
2310 return &raidPtr->raid_cinfo[col].ci_label;
2311 }
2312
2313 int
2314 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2315 {
2316 RF_ComponentLabel_t *label;
2317
2318 label = &raidPtr->raid_cinfo[col].ci_label;
2319 label->mod_counter = raidPtr->mod_counter;
2320 #ifndef RF_NO_PARITY_MAP
2321 label->parity_map_modcount = label->mod_counter;
2322 #endif
2323 return raidwrite_component_label(raidPtr->bytesPerSector,
2324 raidPtr->Disks[col].dev,
2325 raidPtr->raid_cinfo[col].ci_vp, label);
2326 }
2327
2328
2329 static int
2330 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2331 RF_ComponentLabel_t *clabel)
2332 {
2333 return raidread_component_area(dev, b_vp, clabel,
2334 sizeof(RF_ComponentLabel_t),
2335 rf_component_info_offset(),
2336 rf_component_info_size(secsize));
2337 }
2338
2339 /* ARGSUSED */
2340 static int
2341 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2342 size_t msize, daddr_t offset, daddr_t dsize)
2343 {
2344 struct buf *bp;
2345 int error;
2346
2347 /* XXX should probably ensure that we don't try to do this if
2348 someone has changed rf_protected_sectors. */
2349
2350 if (b_vp == NULL) {
2351 /* For whatever reason, this component is not valid.
2352 Don't try to read a component label from it. */
2353 return(EINVAL);
2354 }
2355
2356 /* get a block of the appropriate size... */
2357 bp = geteblk((int)dsize);
2358 bp->b_dev = dev;
2359
2360 /* get our ducks in a row for the read */
2361 bp->b_blkno = offset / DEV_BSIZE;
2362 bp->b_bcount = dsize;
2363 bp->b_flags |= B_READ;
2364 bp->b_resid = dsize;
2365
2366 bdev_strategy(bp);
2367 error = biowait(bp);
2368
2369 if (!error) {
2370 memcpy(data, bp->b_data, msize);
2371 }
2372
2373 brelse(bp, 0);
2374 return(error);
2375 }
2376
2377
2378 static int
2379 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2380 RF_ComponentLabel_t *clabel)
2381 {
2382 return raidwrite_component_area(dev, b_vp, clabel,
2383 sizeof(RF_ComponentLabel_t),
2384 rf_component_info_offset(),
2385 rf_component_info_size(secsize), 0);
2386 }
2387
2388 /* ARGSUSED */
2389 static int
2390 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2391 size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2392 {
2393 struct buf *bp;
2394 int error;
2395
2396 /* get a block of the appropriate size... */
2397 bp = geteblk((int)dsize);
2398 bp->b_dev = dev;
2399
2400 /* get our ducks in a row for the write */
2401 bp->b_blkno = offset / DEV_BSIZE;
2402 bp->b_bcount = dsize;
2403 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2404 bp->b_resid = dsize;
2405
2406 memset(bp->b_data, 0, dsize);
2407 memcpy(bp->b_data, data, msize);
2408
2409 bdev_strategy(bp);
2410 if (asyncp)
2411 return 0;
2412 error = biowait(bp);
2413 brelse(bp, 0);
2414 if (error) {
2415 #if 1
2416 printf("Failed to write RAID component info!\n");
2417 #endif
2418 }
2419
2420 return(error);
2421 }
2422
2423 void
2424 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2425 {
2426 int c;
2427
2428 for (c = 0; c < raidPtr->numCol; c++) {
2429 /* Skip dead disks. */
2430 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2431 continue;
2432 /* XXXjld: what if an error occurs here? */
2433 raidwrite_component_area(raidPtr->Disks[c].dev,
2434 raidPtr->raid_cinfo[c].ci_vp, map,
2435 RF_PARITYMAP_NBYTE,
2436 rf_parity_map_offset(raidPtr),
2437 rf_parity_map_size(raidPtr), 0);
2438 }
2439 }
2440
2441 void
2442 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2443 {
2444 struct rf_paritymap_ondisk tmp;
2445 int c,first;
2446
2447 first=1;
2448 for (c = 0; c < raidPtr->numCol; c++) {
2449 /* Skip dead disks. */
2450 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2451 continue;
2452 raidread_component_area(raidPtr->Disks[c].dev,
2453 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2454 RF_PARITYMAP_NBYTE,
2455 rf_parity_map_offset(raidPtr),
2456 rf_parity_map_size(raidPtr));
2457 if (first) {
2458 memcpy(map, &tmp, sizeof(*map));
2459 first = 0;
2460 } else {
2461 rf_paritymap_merge(map, &tmp);
2462 }
2463 }
2464 }
2465
2466 void
2467 rf_markalldirty(RF_Raid_t *raidPtr)
2468 {
2469 RF_ComponentLabel_t *clabel;
2470 int sparecol;
2471 int c;
2472 int j;
2473 int scol = -1;
2474
2475 raidPtr->mod_counter++;
2476 for (c = 0; c < raidPtr->numCol; c++) {
2477 /* we don't want to touch (at all) a disk that has
2478 failed */
2479 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2480 clabel = raidget_component_label(raidPtr, c);
2481 if (clabel->status == rf_ds_spared) {
2482 /* XXX do something special...
2483 but whatever you do, don't
2484 try to access it!! */
2485 } else {
2486 raidmarkdirty(raidPtr, c);
2487 }
2488 }
2489 }
2490
2491 for( c = 0; c < raidPtr->numSpare ; c++) {
2492 sparecol = raidPtr->numCol + c;
2493 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2494 /*
2495
2496 we claim this disk is "optimal" if it's
2497 rf_ds_used_spare, as that means it should be
2498 directly substitutable for the disk it replaced.
2499 We note that too...
2500
2501 */
2502
2503 for(j=0;j<raidPtr->numCol;j++) {
2504 if (raidPtr->Disks[j].spareCol == sparecol) {
2505 scol = j;
2506 break;
2507 }
2508 }
2509
2510 clabel = raidget_component_label(raidPtr, sparecol);
2511 /* make sure status is noted */
2512
2513 raid_init_component_label(raidPtr, clabel);
2514
2515 clabel->row = 0;
2516 clabel->column = scol;
2517 /* Note: we *don't* change status from rf_ds_used_spare
2518 to rf_ds_optimal */
2519 /* clabel.status = rf_ds_optimal; */
2520
2521 raidmarkdirty(raidPtr, sparecol);
2522 }
2523 }
2524 }
2525
2526
2527 void
2528 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2529 {
2530 RF_ComponentLabel_t *clabel;
2531 int sparecol;
2532 int c;
2533 int j;
2534 int scol;
2535 struct raid_softc *rs = raidPtr->softc;
2536
2537 scol = -1;
2538
2539 /* XXX should do extra checks to make sure things really are clean,
2540 rather than blindly setting the clean bit... */
2541
2542 raidPtr->mod_counter++;
2543
2544 for (c = 0; c < raidPtr->numCol; c++) {
2545 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2546 clabel = raidget_component_label(raidPtr, c);
2547 /* make sure status is noted */
2548 clabel->status = rf_ds_optimal;
2549
2550 /* note what unit we are configured as */
2551 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2552 clabel->last_unit = raidPtr->raidid;
2553
2554 raidflush_component_label(raidPtr, c);
2555 if (final == RF_FINAL_COMPONENT_UPDATE) {
2556 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2557 raidmarkclean(raidPtr, c);
2558 }
2559 }
2560 }
2561 /* else we don't touch it.. */
2562 }
2563
2564 for( c = 0; c < raidPtr->numSpare ; c++) {
2565 sparecol = raidPtr->numCol + c;
2566 /* Need to ensure that the reconstruct actually completed! */
2567 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2568 /*
2569
2570 we claim this disk is "optimal" if it's
2571 rf_ds_used_spare, as that means it should be
2572 directly substitutable for the disk it replaced.
2573 We note that too...
2574
2575 */
2576
2577 for(j=0;j<raidPtr->numCol;j++) {
2578 if (raidPtr->Disks[j].spareCol == sparecol) {
2579 scol = j;
2580 break;
2581 }
2582 }
2583
2584 /* XXX shouldn't *really* need this... */
2585 clabel = raidget_component_label(raidPtr, sparecol);
2586 /* make sure status is noted */
2587
2588 raid_init_component_label(raidPtr, clabel);
2589
2590 clabel->column = scol;
2591 clabel->status = rf_ds_optimal;
2592 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2593 clabel->last_unit = raidPtr->raidid;
2594
2595 raidflush_component_label(raidPtr, sparecol);
2596 if (final == RF_FINAL_COMPONENT_UPDATE) {
2597 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2598 raidmarkclean(raidPtr, sparecol);
2599 }
2600 }
2601 }
2602 }
2603 }
2604
2605 void
2606 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2607 {
2608
2609 if (vp != NULL) {
2610 if (auto_configured == 1) {
2611 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2612 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2613 vput(vp);
2614
2615 } else {
2616 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2617 }
2618 }
2619 }
2620
2621
2622 void
2623 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2624 {
2625 int r,c;
2626 struct vnode *vp;
2627 int acd;
2628
2629
2630 /* We take this opportunity to close the vnodes like we should.. */
2631
2632 for (c = 0; c < raidPtr->numCol; c++) {
2633 vp = raidPtr->raid_cinfo[c].ci_vp;
2634 acd = raidPtr->Disks[c].auto_configured;
2635 rf_close_component(raidPtr, vp, acd);
2636 raidPtr->raid_cinfo[c].ci_vp = NULL;
2637 raidPtr->Disks[c].auto_configured = 0;
2638 }
2639
2640 for (r = 0; r < raidPtr->numSpare; r++) {
2641 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2642 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2643 rf_close_component(raidPtr, vp, acd);
2644 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2645 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2646 }
2647 }
2648
2649
2650 void
2651 rf_ReconThread(struct rf_recon_req_internal *req)
2652 {
2653 int s;
2654 RF_Raid_t *raidPtr;
2655
2656 s = splbio();
2657 raidPtr = (RF_Raid_t *) req->raidPtr;
2658 raidPtr->recon_in_progress = 1;
2659
2660 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2661 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2662
2663 RF_Free(req, sizeof(*req));
2664
2665 raidPtr->recon_in_progress = 0;
2666 splx(s);
2667
2668 /* That's all... */
2669 kthread_exit(0); /* does not return */
2670 }
2671
2672 void
2673 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2674 {
2675 int retcode;
2676 int s;
2677
2678 raidPtr->parity_rewrite_stripes_done = 0;
2679 raidPtr->parity_rewrite_in_progress = 1;
2680 s = splbio();
2681 retcode = rf_RewriteParity(raidPtr);
2682 splx(s);
2683 if (retcode) {
2684 printf("raid%d: Error re-writing parity (%d)!\n",
2685 raidPtr->raidid, retcode);
2686 } else {
2687 /* set the clean bit! If we shutdown correctly,
2688 the clean bit on each component label will get
2689 set */
2690 raidPtr->parity_good = RF_RAID_CLEAN;
2691 }
2692 raidPtr->parity_rewrite_in_progress = 0;
2693
2694 /* Anyone waiting for us to stop? If so, inform them... */
2695 if (raidPtr->waitShutdown) {
2696 rf_lock_mutex2(raidPtr->rad_lock);
2697 cv_broadcast(&raidPtr->parity_rewrite_cv);
2698 rf_unlock_mutex2(raidPtr->rad_lock);
2699 }
2700
2701 /* That's all... */
2702 kthread_exit(0); /* does not return */
2703 }
2704
2705
2706 void
2707 rf_CopybackThread(RF_Raid_t *raidPtr)
2708 {
2709 int s;
2710
2711 raidPtr->copyback_in_progress = 1;
2712 s = splbio();
2713 rf_CopybackReconstructedData(raidPtr);
2714 splx(s);
2715 raidPtr->copyback_in_progress = 0;
2716
2717 /* That's all... */
2718 kthread_exit(0); /* does not return */
2719 }
2720
2721
2722 void
2723 rf_ReconstructInPlaceThread(struct rf_recon_req_internal *req)
2724 {
2725 int s;
2726 RF_Raid_t *raidPtr;
2727
2728 s = splbio();
2729 raidPtr = req->raidPtr;
2730 raidPtr->recon_in_progress = 1;
2731 rf_ReconstructInPlace(raidPtr, req->col);
2732 RF_Free(req, sizeof(*req));
2733 raidPtr->recon_in_progress = 0;
2734 splx(s);
2735
2736 /* That's all... */
2737 kthread_exit(0); /* does not return */
2738 }
2739
2740 static RF_AutoConfig_t *
2741 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
2742 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
2743 unsigned secsize)
2744 {
2745 int good_one = 0;
2746 RF_ComponentLabel_t *clabel;
2747 RF_AutoConfig_t *ac;
2748
2749 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
2750 if (clabel == NULL) {
2751 oomem:
2752 while(ac_list) {
2753 ac = ac_list;
2754 if (ac->clabel)
2755 free(ac->clabel, M_RAIDFRAME);
2756 ac_list = ac_list->next;
2757 free(ac, M_RAIDFRAME);
2758 }
2759 printf("RAID auto config: out of memory!\n");
2760 return NULL; /* XXX probably should panic? */
2761 }
2762
2763 if (!raidread_component_label(secsize, dev, vp, clabel)) {
2764 /* Got the label. Does it look reasonable? */
2765 if (rf_reasonable_label(clabel, numsecs) &&
2766 (rf_component_label_partitionsize(clabel) <= size)) {
2767 #ifdef DEBUG
2768 printf("Component on: %s: %llu\n",
2769 cname, (unsigned long long)size);
2770 rf_print_component_label(clabel);
2771 #endif
2772 /* if it's reasonable, add it, else ignore it. */
2773 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
2774 M_NOWAIT);
2775 if (ac == NULL) {
2776 free(clabel, M_RAIDFRAME);
2777 goto oomem;
2778 }
2779 strlcpy(ac->devname, cname, sizeof(ac->devname));
2780 ac->dev = dev;
2781 ac->vp = vp;
2782 ac->clabel = clabel;
2783 ac->next = ac_list;
2784 ac_list = ac;
2785 good_one = 1;
2786 }
2787 }
2788 if (!good_one) {
2789 /* cleanup */
2790 free(clabel, M_RAIDFRAME);
2791 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2792 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2793 vput(vp);
2794 }
2795 return ac_list;
2796 }
2797
2798 RF_AutoConfig_t *
2799 rf_find_raid_components(void)
2800 {
2801 struct vnode *vp;
2802 struct disklabel label;
2803 device_t dv;
2804 deviter_t di;
2805 dev_t dev;
2806 int bmajor, bminor, wedge, rf_part_found;
2807 int error;
2808 int i;
2809 RF_AutoConfig_t *ac_list;
2810 uint64_t numsecs;
2811 unsigned secsize;
2812 int dowedges;
2813
2814 /* initialize the AutoConfig list */
2815 ac_list = NULL;
2816
2817 /*
2818 * we begin by trolling through *all* the devices on the system *twice*
2819 * first we scan for wedges, second for other devices. This avoids
2820 * using a raw partition instead of a wedge that covers the whole disk
2821 */
2822
2823 for (dowedges=1; dowedges>=0; --dowedges) {
2824 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
2825 dv = deviter_next(&di)) {
2826
2827 /* we are only interested in disks... */
2828 if (device_class(dv) != DV_DISK)
2829 continue;
2830
2831 /* we don't care about floppies... */
2832 if (device_is_a(dv, "fd")) {
2833 continue;
2834 }
2835
2836 /* we don't care about CD's... */
2837 if (device_is_a(dv, "cd")) {
2838 continue;
2839 }
2840
2841 /* we don't care about md's... */
2842 if (device_is_a(dv, "md")) {
2843 continue;
2844 }
2845
2846 /* hdfd is the Atari/Hades floppy driver */
2847 if (device_is_a(dv, "hdfd")) {
2848 continue;
2849 }
2850
2851 /* fdisa is the Atari/Milan floppy driver */
2852 if (device_is_a(dv, "fdisa")) {
2853 continue;
2854 }
2855
2856 /* are we in the wedges pass ? */
2857 wedge = device_is_a(dv, "dk");
2858 if (wedge != dowedges) {
2859 continue;
2860 }
2861
2862 /* need to find the device_name_to_block_device_major stuff */
2863 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
2864
2865 rf_part_found = 0; /*No raid partition as yet*/
2866
2867 /* get a vnode for the raw partition of this disk */
2868 bminor = minor(device_unit(dv));
2869 dev = wedge ? makedev(bmajor, bminor) :
2870 MAKEDISKDEV(bmajor, bminor, RAW_PART);
2871 if (bdevvp(dev, &vp))
2872 panic("RAID can't alloc vnode");
2873
2874 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2875 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
2876
2877 if (error) {
2878 /* "Who cares." Continue looking
2879 for something that exists*/
2880 vput(vp);
2881 continue;
2882 }
2883
2884 error = getdisksize(vp, &numsecs, &secsize);
2885 if (error) {
2886 /*
2887 * Pseudo devices like vnd and cgd can be
2888 * opened but may still need some configuration.
2889 * Ignore these quietly.
2890 */
2891 if (error != ENXIO)
2892 printf("RAIDframe: can't get disk size"
2893 " for dev %s (%d)\n",
2894 device_xname(dv), error);
2895 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2896 vput(vp);
2897 continue;
2898 }
2899 if (wedge) {
2900 struct dkwedge_info dkw;
2901 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
2902 NOCRED);
2903 if (error) {
2904 printf("RAIDframe: can't get wedge info for "
2905 "dev %s (%d)\n", device_xname(dv), error);
2906 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2907 vput(vp);
2908 continue;
2909 }
2910
2911 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
2912 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2913 vput(vp);
2914 continue;
2915 }
2916
2917 VOP_UNLOCK(vp);
2918 ac_list = rf_get_component(ac_list, dev, vp,
2919 device_xname(dv), dkw.dkw_size, numsecs, secsize);
2920 rf_part_found = 1; /*There is a raid component on this disk*/
2921 continue;
2922 }
2923
2924 /* Ok, the disk exists. Go get the disklabel. */
2925 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
2926 if (error) {
2927 /*
2928 * XXX can't happen - open() would
2929 * have errored out (or faked up one)
2930 */
2931 if (error != ENOTTY)
2932 printf("RAIDframe: can't get label for dev "
2933 "%s (%d)\n", device_xname(dv), error);
2934 }
2935
2936 /* don't need this any more. We'll allocate it again
2937 a little later if we really do... */
2938 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2939 vput(vp);
2940
2941 if (error)
2942 continue;
2943
2944 rf_part_found = 0; /*No raid partitions yet*/
2945 for (i = 0; i < label.d_npartitions; i++) {
2946 char cname[sizeof(ac_list->devname)];
2947
2948 /* We only support partitions marked as RAID */
2949 if (label.d_partitions[i].p_fstype != FS_RAID)
2950 continue;
2951
2952 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
2953 if (bdevvp(dev, &vp))
2954 panic("RAID can't alloc vnode");
2955
2956 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2957 error = VOP_OPEN(vp, FREAD, NOCRED);
2958 if (error) {
2959 /* Whatever... */
2960 vput(vp);
2961 continue;
2962 }
2963 VOP_UNLOCK(vp);
2964 snprintf(cname, sizeof(cname), "%s%c",
2965 device_xname(dv), 'a' + i);
2966 ac_list = rf_get_component(ac_list, dev, vp, cname,
2967 label.d_partitions[i].p_size, numsecs, secsize);
2968 rf_part_found = 1; /*There is at least one raid partition on this disk*/
2969 }
2970
2971 /*
2972 *If there is no raid component on this disk, either in a
2973 *disklabel or inside a wedge, check the raw partition as well,
2974 *as it is possible to configure raid components on raw disk
2975 *devices.
2976 */
2977
2978 if (!rf_part_found) {
2979 char cname[sizeof(ac_list->devname)];
2980
2981 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
2982 if (bdevvp(dev, &vp))
2983 panic("RAID can't alloc vnode");
2984
2985 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2986
2987 error = VOP_OPEN(vp, FREAD, NOCRED);
2988 if (error) {
2989 /* Whatever... */
2990 vput(vp);
2991 continue;
2992 }
2993 VOP_UNLOCK(vp);
2994 snprintf(cname, sizeof(cname), "%s%c",
2995 device_xname(dv), 'a' + RAW_PART);
2996 ac_list = rf_get_component(ac_list, dev, vp, cname,
2997 label.d_partitions[RAW_PART].p_size, numsecs, secsize);
2998 }
2999 }
3000 deviter_release(&di);
3001 }
3002 return ac_list;
3003 }
3004
3005
3006 int
3007 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3008 {
3009
3010 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
3011 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
3012 ((clabel->clean == RF_RAID_CLEAN) ||
3013 (clabel->clean == RF_RAID_DIRTY)) &&
3014 clabel->row >=0 &&
3015 clabel->column >= 0 &&
3016 clabel->num_rows > 0 &&
3017 clabel->num_columns > 0 &&
3018 clabel->row < clabel->num_rows &&
3019 clabel->column < clabel->num_columns &&
3020 clabel->blockSize > 0 &&
3021 /*
3022 * numBlocksHi may contain garbage, but it is ok since
3023 * the type is unsigned. If it is really garbage,
3024 * rf_fix_old_label_size() will fix it.
3025 */
3026 rf_component_label_numblocks(clabel) > 0) {
3027 /*
3028 * label looks reasonable enough...
3029 * let's make sure it has no old garbage.
3030 */
3031 if (numsecs)
3032 rf_fix_old_label_size(clabel, numsecs);
3033 return(1);
3034 }
3035 return(0);
3036 }
3037
3038
3039 /*
3040 * For reasons yet unknown, some old component labels have garbage in
3041 * the newer numBlocksHi region, and this causes lossage. Since those
3042 * disks will also have numsecs set to less than 32 bits of sectors,
3043 * we can determine when this corruption has occurred, and fix it.
3044 *
3045 * The exact same problem, with the same unknown reason, happens to
3046 * the partitionSizeHi member as well.
3047 */
3048 static void
3049 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3050 {
3051
3052 if (numsecs < ((uint64_t)1 << 32)) {
3053 if (clabel->numBlocksHi) {
3054 printf("WARNING: total sectors < 32 bits, yet "
3055 "numBlocksHi set\n"
3056 "WARNING: resetting numBlocksHi to zero.\n");
3057 clabel->numBlocksHi = 0;
3058 }
3059
3060 if (clabel->partitionSizeHi) {
3061 printf("WARNING: total sectors < 32 bits, yet "
3062 "partitionSizeHi set\n"
3063 "WARNING: resetting partitionSizeHi to zero.\n");
3064 clabel->partitionSizeHi = 0;
3065 }
3066 }
3067 }
3068
3069
3070 #ifdef DEBUG
3071 void
3072 rf_print_component_label(RF_ComponentLabel_t *clabel)
3073 {
3074 uint64_t numBlocks;
3075 static const char *rp[] = {
3076 "No", "Force", "Soft", "*invalid*"
3077 };
3078
3079
3080 numBlocks = rf_component_label_numblocks(clabel);
3081
3082 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3083 clabel->row, clabel->column,
3084 clabel->num_rows, clabel->num_columns);
3085 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3086 clabel->version, clabel->serial_number,
3087 clabel->mod_counter);
3088 printf(" Clean: %s Status: %d\n",
3089 clabel->clean ? "Yes" : "No", clabel->status);
3090 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3091 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3092 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3093 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3094 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3095 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]);
3096 printf(" Last configured as: raid%d\n", clabel->last_unit);
3097 #if 0
3098 printf(" Config order: %d\n", clabel->config_order);
3099 #endif
3100
3101 }
3102 #endif
3103
3104 RF_ConfigSet_t *
3105 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3106 {
3107 RF_AutoConfig_t *ac;
3108 RF_ConfigSet_t *config_sets;
3109 RF_ConfigSet_t *cset;
3110 RF_AutoConfig_t *ac_next;
3111
3112
3113 config_sets = NULL;
3114
3115 /* Go through the AutoConfig list, and figure out which components
3116 belong to what sets. */
3117 ac = ac_list;
3118 while(ac!=NULL) {
3119 /* we're going to putz with ac->next, so save it here
3120 for use at the end of the loop */
3121 ac_next = ac->next;
3122
3123 if (config_sets == NULL) {
3124 /* will need at least this one... */
3125 config_sets = (RF_ConfigSet_t *)
3126 malloc(sizeof(RF_ConfigSet_t),
3127 M_RAIDFRAME, M_NOWAIT);
3128 if (config_sets == NULL) {
3129 panic("rf_create_auto_sets: No memory!");
3130 }
3131 /* this one is easy :) */
3132 config_sets->ac = ac;
3133 config_sets->next = NULL;
3134 config_sets->rootable = 0;
3135 ac->next = NULL;
3136 } else {
3137 /* which set does this component fit into? */
3138 cset = config_sets;
3139 while(cset!=NULL) {
3140 if (rf_does_it_fit(cset, ac)) {
3141 /* looks like it matches... */
3142 ac->next = cset->ac;
3143 cset->ac = ac;
3144 break;
3145 }
3146 cset = cset->next;
3147 }
3148 if (cset==NULL) {
3149 /* didn't find a match above... new set..*/
3150 cset = (RF_ConfigSet_t *)
3151 malloc(sizeof(RF_ConfigSet_t),
3152 M_RAIDFRAME, M_NOWAIT);
3153 if (cset == NULL) {
3154 panic("rf_create_auto_sets: No memory!");
3155 }
3156 cset->ac = ac;
3157 ac->next = NULL;
3158 cset->next = config_sets;
3159 cset->rootable = 0;
3160 config_sets = cset;
3161 }
3162 }
3163 ac = ac_next;
3164 }
3165
3166
3167 return(config_sets);
3168 }
3169
3170 static int
3171 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3172 {
3173 RF_ComponentLabel_t *clabel1, *clabel2;
3174
3175 /* If this one matches the *first* one in the set, that's good
3176 enough, since the other members of the set would have been
3177 through here too... */
3178 /* note that we are not checking partitionSize here..
3179
3180 Note that we are also not checking the mod_counters here.
3181 If everything else matches except the mod_counter, that's
3182 good enough for this test. We will deal with the mod_counters
3183 a little later in the autoconfiguration process.
3184
3185 (clabel1->mod_counter == clabel2->mod_counter) &&
3186
3187 The reason we don't check for this is that failed disks
3188 will have lower modification counts. If those disks are
3189 not added to the set they used to belong to, then they will
3190 form their own set, which may result in 2 different sets,
3191 for example, competing to be configured at raid0, and
3192 perhaps competing to be the root filesystem set. If the
3193 wrong ones get configured, or both attempt to become /,
3194 weird behaviour and or serious lossage will occur. Thus we
3195 need to bring them into the fold here, and kick them out at
3196 a later point.
3197
3198 */
3199
3200 clabel1 = cset->ac->clabel;
3201 clabel2 = ac->clabel;
3202 if ((clabel1->version == clabel2->version) &&
3203 (clabel1->serial_number == clabel2->serial_number) &&
3204 (clabel1->num_rows == clabel2->num_rows) &&
3205 (clabel1->num_columns == clabel2->num_columns) &&
3206 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3207 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3208 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3209 (clabel1->parityConfig == clabel2->parityConfig) &&
3210 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3211 (clabel1->blockSize == clabel2->blockSize) &&
3212 rf_component_label_numblocks(clabel1) ==
3213 rf_component_label_numblocks(clabel2) &&
3214 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3215 (clabel1->root_partition == clabel2->root_partition) &&
3216 (clabel1->last_unit == clabel2->last_unit) &&
3217 (clabel1->config_order == clabel2->config_order)) {
3218 /* if it get's here, it almost *has* to be a match */
3219 } else {
3220 /* it's not consistent with somebody in the set..
3221 punt */
3222 return(0);
3223 }
3224 /* all was fine.. it must fit... */
3225 return(1);
3226 }
3227
3228 int
3229 rf_have_enough_components(RF_ConfigSet_t *cset)
3230 {
3231 RF_AutoConfig_t *ac;
3232 RF_AutoConfig_t *auto_config;
3233 RF_ComponentLabel_t *clabel;
3234 int c;
3235 int num_cols;
3236 int num_missing;
3237 int mod_counter;
3238 int mod_counter_found;
3239 int even_pair_failed;
3240 char parity_type;
3241
3242
3243 /* check to see that we have enough 'live' components
3244 of this set. If so, we can configure it if necessary */
3245
3246 num_cols = cset->ac->clabel->num_columns;
3247 parity_type = cset->ac->clabel->parityConfig;
3248
3249 /* XXX Check for duplicate components!?!?!? */
3250
3251 /* Determine what the mod_counter is supposed to be for this set. */
3252
3253 mod_counter_found = 0;
3254 mod_counter = 0;
3255 ac = cset->ac;
3256 while(ac!=NULL) {
3257 if (mod_counter_found==0) {
3258 mod_counter = ac->clabel->mod_counter;
3259 mod_counter_found = 1;
3260 } else {
3261 if (ac->clabel->mod_counter > mod_counter) {
3262 mod_counter = ac->clabel->mod_counter;
3263 }
3264 }
3265 ac = ac->next;
3266 }
3267
3268 num_missing = 0;
3269 auto_config = cset->ac;
3270
3271 even_pair_failed = 0;
3272 for(c=0; c<num_cols; c++) {
3273 ac = auto_config;
3274 while(ac!=NULL) {
3275 if ((ac->clabel->column == c) &&
3276 (ac->clabel->mod_counter == mod_counter)) {
3277 /* it's this one... */
3278 #ifdef DEBUG
3279 printf("Found: %s at %d\n",
3280 ac->devname,c);
3281 #endif
3282 break;
3283 }
3284 ac=ac->next;
3285 }
3286 if (ac==NULL) {
3287 /* Didn't find one here! */
3288 /* special case for RAID 1, especially
3289 where there are more than 2
3290 components (where RAIDframe treats
3291 things a little differently :( ) */
3292 if (parity_type == '1') {
3293 if (c%2 == 0) { /* even component */
3294 even_pair_failed = 1;
3295 } else { /* odd component. If
3296 we're failed, and
3297 so is the even
3298 component, it's
3299 "Good Night, Charlie" */
3300 if (even_pair_failed == 1) {
3301 return(0);
3302 }
3303 }
3304 } else {
3305 /* normal accounting */
3306 num_missing++;
3307 }
3308 }
3309 if ((parity_type == '1') && (c%2 == 1)) {
3310 /* Just did an even component, and we didn't
3311 bail.. reset the even_pair_failed flag,
3312 and go on to the next component.... */
3313 even_pair_failed = 0;
3314 }
3315 }
3316
3317 clabel = cset->ac->clabel;
3318
3319 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3320 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3321 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3322 /* XXX this needs to be made *much* more general */
3323 /* Too many failures */
3324 return(0);
3325 }
3326 /* otherwise, all is well, and we've got enough to take a kick
3327 at autoconfiguring this set */
3328 return(1);
3329 }
3330
3331 void
3332 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3333 RF_Raid_t *raidPtr)
3334 {
3335 RF_ComponentLabel_t *clabel;
3336 int i;
3337
3338 clabel = ac->clabel;
3339
3340 /* 1. Fill in the common stuff */
3341 config->numCol = clabel->num_columns;
3342 config->numSpare = 0; /* XXX should this be set here? */
3343 config->sectPerSU = clabel->sectPerSU;
3344 config->SUsPerPU = clabel->SUsPerPU;
3345 config->SUsPerRU = clabel->SUsPerRU;
3346 config->parityConfig = clabel->parityConfig;
3347 /* XXX... */
3348 strcpy(config->diskQueueType,"fifo");
3349 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3350 config->layoutSpecificSize = 0; /* XXX ?? */
3351
3352 while(ac!=NULL) {
3353 /* row/col values will be in range due to the checks
3354 in reasonable_label() */
3355 strcpy(config->devnames[0][ac->clabel->column],
3356 ac->devname);
3357 ac = ac->next;
3358 }
3359
3360 for(i=0;i<RF_MAXDBGV;i++) {
3361 config->debugVars[i][0] = 0;
3362 }
3363 }
3364
3365 int
3366 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3367 {
3368 RF_ComponentLabel_t *clabel;
3369 int column;
3370 int sparecol;
3371
3372 raidPtr->autoconfigure = new_value;
3373
3374 for(column=0; column<raidPtr->numCol; column++) {
3375 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3376 clabel = raidget_component_label(raidPtr, column);
3377 clabel->autoconfigure = new_value;
3378 raidflush_component_label(raidPtr, column);
3379 }
3380 }
3381 for(column = 0; column < raidPtr->numSpare ; column++) {
3382 sparecol = raidPtr->numCol + column;
3383 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3384 clabel = raidget_component_label(raidPtr, sparecol);
3385 clabel->autoconfigure = new_value;
3386 raidflush_component_label(raidPtr, sparecol);
3387 }
3388 }
3389 return(new_value);
3390 }
3391
3392 int
3393 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3394 {
3395 RF_ComponentLabel_t *clabel;
3396 int column;
3397 int sparecol;
3398
3399 raidPtr->root_partition = new_value;
3400 for(column=0; column<raidPtr->numCol; column++) {
3401 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3402 clabel = raidget_component_label(raidPtr, column);
3403 clabel->root_partition = new_value;
3404 raidflush_component_label(raidPtr, column);
3405 }
3406 }
3407 for(column = 0; column < raidPtr->numSpare ; column++) {
3408 sparecol = raidPtr->numCol + column;
3409 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3410 clabel = raidget_component_label(raidPtr, sparecol);
3411 clabel->root_partition = new_value;
3412 raidflush_component_label(raidPtr, sparecol);
3413 }
3414 }
3415 return(new_value);
3416 }
3417
3418 void
3419 rf_release_all_vps(RF_ConfigSet_t *cset)
3420 {
3421 RF_AutoConfig_t *ac;
3422
3423 ac = cset->ac;
3424 while(ac!=NULL) {
3425 /* Close the vp, and give it back */
3426 if (ac->vp) {
3427 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3428 VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
3429 vput(ac->vp);
3430 ac->vp = NULL;
3431 }
3432 ac = ac->next;
3433 }
3434 }
3435
3436
3437 void
3438 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3439 {
3440 RF_AutoConfig_t *ac;
3441 RF_AutoConfig_t *next_ac;
3442
3443 ac = cset->ac;
3444 while(ac!=NULL) {
3445 next_ac = ac->next;
3446 /* nuke the label */
3447 free(ac->clabel, M_RAIDFRAME);
3448 /* cleanup the config structure */
3449 free(ac, M_RAIDFRAME);
3450 /* "next.." */
3451 ac = next_ac;
3452 }
3453 /* and, finally, nuke the config set */
3454 free(cset, M_RAIDFRAME);
3455 }
3456
3457
3458 void
3459 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3460 {
3461 /* current version number */
3462 clabel->version = RF_COMPONENT_LABEL_VERSION;
3463 clabel->serial_number = raidPtr->serial_number;
3464 clabel->mod_counter = raidPtr->mod_counter;
3465
3466 clabel->num_rows = 1;
3467 clabel->num_columns = raidPtr->numCol;
3468 clabel->clean = RF_RAID_DIRTY; /* not clean */
3469 clabel->status = rf_ds_optimal; /* "It's good!" */
3470
3471 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3472 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3473 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3474
3475 clabel->blockSize = raidPtr->bytesPerSector;
3476 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3477
3478 /* XXX not portable */
3479 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3480 clabel->maxOutstanding = raidPtr->maxOutstanding;
3481 clabel->autoconfigure = raidPtr->autoconfigure;
3482 clabel->root_partition = raidPtr->root_partition;
3483 clabel->last_unit = raidPtr->raidid;
3484 clabel->config_order = raidPtr->config_order;
3485
3486 #ifndef RF_NO_PARITY_MAP
3487 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3488 #endif
3489 }
3490
3491 struct raid_softc *
3492 rf_auto_config_set(RF_ConfigSet_t *cset)
3493 {
3494 RF_Raid_t *raidPtr;
3495 RF_Config_t *config;
3496 int raidID;
3497 struct raid_softc *sc;
3498
3499 #ifdef DEBUG
3500 printf("RAID autoconfigure\n");
3501 #endif
3502
3503 /* 1. Create a config structure */
3504 config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO);
3505 if (config == NULL) {
3506 printf("%s: Out of mem - config!?!?\n", __func__);
3507 /* XXX do something more intelligent here. */
3508 return NULL;
3509 }
3510
3511 /*
3512 2. Figure out what RAID ID this one is supposed to live at
3513 See if we can get the same RAID dev that it was configured
3514 on last time..
3515 */
3516
3517 raidID = cset->ac->clabel->last_unit;
3518 for (sc = raidget(raidID, false); sc && sc->sc_r.valid != 0;
3519 sc = raidget(++raidID, false))
3520 continue;
3521 #ifdef DEBUG
3522 printf("Configuring raid%d:\n",raidID);
3523 #endif
3524
3525 if (sc == NULL)
3526 sc = raidget(raidID, true);
3527 if (sc == NULL) {
3528 printf("%s: Out of mem - softc!?!?\n", __func__);
3529 /* XXX do something more intelligent here. */
3530 free(config, M_RAIDFRAME);
3531 return NULL;
3532 }
3533
3534 raidPtr = &sc->sc_r;
3535
3536 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3537 raidPtr->softc = sc;
3538 raidPtr->raidid = raidID;
3539 raidPtr->openings = RAIDOUTSTANDING;
3540
3541 /* 3. Build the configuration structure */
3542 rf_create_configuration(cset->ac, config, raidPtr);
3543
3544 /* 4. Do the configuration */
3545 if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3546 raidinit(sc);
3547
3548 rf_markalldirty(raidPtr);
3549 raidPtr->autoconfigure = 1; /* XXX do this here? */
3550 switch (cset->ac->clabel->root_partition) {
3551 case 1: /* Force Root */
3552 case 2: /* Soft Root: root when boot partition part of raid */
3553 /*
3554 * everything configured just fine. Make a note
3555 * that this set is eligible to be root,
3556 * or forced to be root
3557 */
3558 cset->rootable = cset->ac->clabel->root_partition;
3559 /* XXX do this here? */
3560 raidPtr->root_partition = cset->rootable;
3561 break;
3562 default:
3563 break;
3564 }
3565 } else {
3566 raidput(sc);
3567 sc = NULL;
3568 }
3569
3570 /* 5. Cleanup */
3571 free(config, M_RAIDFRAME);
3572 return sc;
3573 }
3574
3575 void
3576 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3577 size_t xmin, size_t xmax)
3578 {
3579 int error;
3580
3581 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3582 pool_sethiwat(p, xmax);
3583 if ((error = pool_prime(p, xmin)) != 0)
3584 panic("%s: failed to prime pool: %d", __func__, error);
3585 pool_setlowat(p, xmin);
3586 }
3587
3588 /*
3589 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buffer queue
3590 * to see if there is IO pending and if that IO could possibly be done
3591 * for a given RAID set. Returns 0 if IO is waiting and can be done, 1
3592 * otherwise.
3593 *
3594 */
3595 int
3596 rf_buf_queue_check(RF_Raid_t *raidPtr)
3597 {
3598 struct raid_softc *rs;
3599 struct dk_softc *dksc;
3600
3601 rs = raidPtr->softc;
3602 dksc = &rs->sc_dksc;
3603
3604 if ((rs->sc_flags & RAIDF_INITED) == 0)
3605 return 1;
3606
3607 if (dk_strategy_pending(dksc) && raidPtr->openings > 0) {
3608 /* there is work to do */
3609 return 0;
3610 }
3611 /* default is nothing to do */
3612 return 1;
3613 }
3614
3615 int
3616 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3617 {
3618 uint64_t numsecs;
3619 unsigned secsize;
3620 int error;
3621
3622 error = getdisksize(vp, &numsecs, &secsize);
3623 if (error == 0) {
3624 diskPtr->blockSize = secsize;
3625 diskPtr->numBlocks = numsecs - rf_protectedSectors;
3626 diskPtr->partitionSize = numsecs;
3627 return 0;
3628 }
3629 return error;
3630 }
3631
3632 static int
3633 raid_match(device_t self, cfdata_t cfdata, void *aux)
3634 {
3635 return 1;
3636 }
3637
3638 static void
3639 raid_attach(device_t parent, device_t self, void *aux)
3640 {
3641 }
3642
3643
3644 static int
3645 raid_detach(device_t self, int flags)
3646 {
3647 int error;
3648 struct raid_softc *rs = raidsoftc(self);
3649
3650 if (rs == NULL)
3651 return ENXIO;
3652
3653 if ((error = raidlock(rs)) != 0)
3654 return (error);
3655
3656 error = raid_detach_unlocked(rs);
3657
3658 raidunlock(rs);
3659
3660 /* XXX raid can be referenced here */
3661
3662 if (error)
3663 return error;
3664
3665 /* Free the softc */
3666 raidput(rs);
3667
3668 return 0;
3669 }
3670
3671 static void
3672 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
3673 {
3674 struct dk_softc *dksc = &rs->sc_dksc;
3675 struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
3676
3677 memset(dg, 0, sizeof(*dg));
3678
3679 dg->dg_secperunit = raidPtr->totalSectors;
3680 dg->dg_secsize = raidPtr->bytesPerSector;
3681 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
3682 dg->dg_ntracks = 4 * raidPtr->numCol;
3683
3684 disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, NULL);
3685 }
3686
3687 /*
3688 * Get cache info for all the components (including spares).
3689 * Returns intersection of all the cache flags of all disks, or first
3690 * error if any encountered.
3691 * XXXfua feature flags can change as spares are added - lock down somehow
3692 */
3693 static int
3694 rf_get_component_caches(RF_Raid_t *raidPtr, int *data)
3695 {
3696 int c;
3697 int error;
3698 int dkwhole = 0, dkpart;
3699
3700 for (c = 0; c < raidPtr->numCol + raidPtr->numSpare; c++) {
3701 /*
3702 * Check any non-dead disk, even when currently being
3703 * reconstructed.
3704 */
3705 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
3706 error = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp,
3707 DIOCGCACHE, &dkpart, FREAD, NOCRED);
3708 if (error) {
3709 if (error != ENODEV) {
3710 printf("raid%d: get cache for component %s failed\n",
3711 raidPtr->raidid,
3712 raidPtr->Disks[c].devname);
3713 }
3714
3715 return error;
3716 }
3717
3718 if (c == 0)
3719 dkwhole = dkpart;
3720 else
3721 dkwhole = DKCACHE_COMBINE(dkwhole, dkpart);
3722 }
3723 }
3724
3725 *data = dkwhole;
3726
3727 return 0;
3728 }
3729
3730 /*
3731 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3732 * We end up returning whatever error was returned by the first cache flush
3733 * that fails.
3734 */
3735
3736 int
3737 rf_sync_component_caches(RF_Raid_t *raidPtr)
3738 {
3739 int c, sparecol;
3740 int e,error;
3741 int force = 1;
3742
3743 error = 0;
3744 for (c = 0; c < raidPtr->numCol; c++) {
3745 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3746 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3747 &force, FWRITE, NOCRED);
3748 if (e) {
3749 if (e != ENODEV)
3750 printf("raid%d: cache flush to component %s failed.\n",
3751 raidPtr->raidid, raidPtr->Disks[c].devname);
3752 if (error == 0) {
3753 error = e;
3754 }
3755 }
3756 }
3757 }
3758
3759 for( c = 0; c < raidPtr->numSpare ; c++) {
3760 sparecol = raidPtr->numCol + c;
3761 /* Need to ensure that the reconstruct actually completed! */
3762 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3763 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3764 DIOCCACHESYNC, &force, FWRITE, NOCRED);
3765 if (e) {
3766 if (e != ENODEV)
3767 printf("raid%d: cache flush to component %s failed.\n",
3768 raidPtr->raidid, raidPtr->Disks[sparecol].devname);
3769 if (error == 0) {
3770 error = e;
3771 }
3772 }
3773 }
3774 }
3775 return error;
3776 }
3777
3778 /* Fill in info with the current status */
3779 void
3780 rf_check_recon_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3781 {
3782
3783 memset(info, 0, sizeof(*info));
3784
3785 if (raidPtr->status != rf_rs_reconstructing) {
3786 info->total = 100;
3787 info->completed = 100;
3788 } else {
3789 info->total = raidPtr->reconControl->numRUsTotal;
3790 info->completed = raidPtr->reconControl->numRUsComplete;
3791 }
3792 info->remaining = info->total - info->completed;
3793 }
3794
3795 /* Fill in info with the current status */
3796 void
3797 rf_check_parityrewrite_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3798 {
3799
3800 memset(info, 0, sizeof(*info));
3801
3802 if (raidPtr->parity_rewrite_in_progress == 1) {
3803 info->total = raidPtr->Layout.numStripe;
3804 info->completed = raidPtr->parity_rewrite_stripes_done;
3805 } else {
3806 info->completed = 100;
3807 info->total = 100;
3808 }
3809 info->remaining = info->total - info->completed;
3810 }
3811
3812 /* Fill in info with the current status */
3813 void
3814 rf_check_copyback_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3815 {
3816
3817 memset(info, 0, sizeof(*info));
3818
3819 if (raidPtr->copyback_in_progress == 1) {
3820 info->total = raidPtr->Layout.numStripe;
3821 info->completed = raidPtr->copyback_stripes_done;
3822 info->remaining = info->total - info->completed;
3823 } else {
3824 info->remaining = 0;
3825 info->completed = 100;
3826 info->total = 100;
3827 }
3828 }
3829
3830 /* Fill in config with the current info */
3831 int
3832 rf_get_info(RF_Raid_t *raidPtr, RF_DeviceConfig_t *config)
3833 {
3834 int d, i, j;
3835
3836 if (!raidPtr->valid)
3837 return (ENODEV);
3838 config->cols = raidPtr->numCol;
3839 config->ndevs = raidPtr->numCol;
3840 if (config->ndevs >= RF_MAX_DISKS)
3841 return (ENOMEM);
3842 config->nspares = raidPtr->numSpare;
3843 if (config->nspares >= RF_MAX_DISKS)
3844 return (ENOMEM);
3845 config->maxqdepth = raidPtr->maxQueueDepth;
3846 d = 0;
3847 for (j = 0; j < config->cols; j++) {
3848 config->devs[d] = raidPtr->Disks[j];
3849 d++;
3850 }
3851 for (j = config->cols, i = 0; i < config->nspares; i++, j++) {
3852 config->spares[i] = raidPtr->Disks[j];
3853 if (config->spares[i].status == rf_ds_rebuilding_spare) {
3854 /* XXX: raidctl(8) expects to see this as a used spare */
3855 config->spares[i].status = rf_ds_used_spare;
3856 }
3857 }
3858 return 0;
3859 }
3860
3861 int
3862 rf_get_component_label(RF_Raid_t *raidPtr, void *data)
3863 {
3864 RF_ComponentLabel_t *clabel = (RF_ComponentLabel_t *)data;
3865 RF_ComponentLabel_t *raid_clabel;
3866 int column = clabel->column;
3867
3868 if ((column < 0) || (column >= raidPtr->numCol + raidPtr->numSpare))
3869 return EINVAL;
3870 raid_clabel = raidget_component_label(raidPtr, column);
3871 memcpy(clabel, raid_clabel, sizeof *clabel);
3872
3873 return 0;
3874 }
3875
3876 /*
3877 * Module interface
3878 */
3879
3880 MODULE(MODULE_CLASS_DRIVER, raid, "dk_subr,bufq_fcfs");
3881
3882 #ifdef _MODULE
3883 CFDRIVER_DECL(raid, DV_DISK, NULL);
3884 #endif
3885
3886 static int raid_modcmd(modcmd_t, void *);
3887 static int raid_modcmd_init(void);
3888 static int raid_modcmd_fini(void);
3889
3890 static int
3891 raid_modcmd(modcmd_t cmd, void *data)
3892 {
3893 int error;
3894
3895 error = 0;
3896 switch (cmd) {
3897 case MODULE_CMD_INIT:
3898 error = raid_modcmd_init();
3899 break;
3900 case MODULE_CMD_FINI:
3901 error = raid_modcmd_fini();
3902 break;
3903 default:
3904 error = ENOTTY;
3905 break;
3906 }
3907 return error;
3908 }
3909
3910 static int
3911 raid_modcmd_init(void)
3912 {
3913 int error;
3914 int bmajor, cmajor;
3915
3916 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
3917 mutex_enter(&raid_lock);
3918 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
3919 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
3920 rf_init_cond2(rf_sparet_wait_cv, "sparetw");
3921 rf_init_cond2(rf_sparet_resp_cv, "rfgst");
3922
3923 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
3924 #endif
3925
3926 bmajor = cmajor = -1;
3927 error = devsw_attach("raid", &raid_bdevsw, &bmajor,
3928 &raid_cdevsw, &cmajor);
3929 if (error != 0 && error != EEXIST) {
3930 aprint_error("%s: devsw_attach failed %d\n", __func__, error);
3931 mutex_exit(&raid_lock);
3932 return error;
3933 }
3934 #ifdef _MODULE
3935 error = config_cfdriver_attach(&raid_cd);
3936 if (error != 0) {
3937 aprint_error("%s: config_cfdriver_attach failed %d\n",
3938 __func__, error);
3939 devsw_detach(&raid_bdevsw, &raid_cdevsw);
3940 mutex_exit(&raid_lock);
3941 return error;
3942 }
3943 #endif
3944 error = config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3945 if (error != 0) {
3946 aprint_error("%s: config_cfattach_attach failed %d\n",
3947 __func__, error);
3948 #ifdef _MODULE
3949 config_cfdriver_detach(&raid_cd);
3950 #endif
3951 devsw_detach(&raid_bdevsw, &raid_cdevsw);
3952 mutex_exit(&raid_lock);
3953 return error;
3954 }
3955
3956 raidautoconfigdone = false;
3957
3958 mutex_exit(&raid_lock);
3959
3960 if (error == 0) {
3961 if (rf_BootRaidframe(true) == 0)
3962 aprint_verbose("Kernelized RAIDframe activated\n");
3963 else
3964 panic("Serious error activating RAID!!");
3965 }
3966
3967 /*
3968 * Register a finalizer which will be used to auto-config RAID
3969 * sets once all real hardware devices have been found.
3970 */
3971 error = config_finalize_register(NULL, rf_autoconfig);
3972 if (error != 0) {
3973 aprint_error("WARNING: unable to register RAIDframe "
3974 "finalizer\n");
3975 error = 0;
3976 }
3977
3978 return error;
3979 }
3980
3981 static int
3982 raid_modcmd_fini(void)
3983 {
3984 int error;
3985
3986 mutex_enter(&raid_lock);
3987
3988 /* Don't allow unload if raid device(s) exist. */
3989 if (!LIST_EMPTY(&raids)) {
3990 mutex_exit(&raid_lock);
3991 return EBUSY;
3992 }
3993
3994 error = config_cfattach_detach(raid_cd.cd_name, &raid_ca);
3995 if (error != 0) {
3996 aprint_error("%s: cannot detach cfattach\n",__func__);
3997 mutex_exit(&raid_lock);
3998 return error;
3999 }
4000 #ifdef _MODULE
4001 error = config_cfdriver_detach(&raid_cd);
4002 if (error != 0) {
4003 aprint_error("%s: cannot detach cfdriver\n",__func__);
4004 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
4005 mutex_exit(&raid_lock);
4006 return error;
4007 }
4008 #endif
4009 error = devsw_detach(&raid_bdevsw, &raid_cdevsw);
4010 if (error != 0) {
4011 aprint_error("%s: cannot detach devsw\n",__func__);
4012 #ifdef _MODULE
4013 config_cfdriver_attach(&raid_cd);
4014 #endif
4015 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
4016 mutex_exit(&raid_lock);
4017 return error;
4018 }
4019 rf_BootRaidframe(false);
4020 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
4021 rf_destroy_mutex2(rf_sparet_wait_mutex);
4022 rf_destroy_cond2(rf_sparet_wait_cv);
4023 rf_destroy_cond2(rf_sparet_resp_cv);
4024 #endif
4025 mutex_exit(&raid_lock);
4026 mutex_destroy(&raid_lock);
4027
4028 return error;
4029 }
4030