rf_netbsdkintf.c revision 1.376.4.3 1 /* $NetBSD: rf_netbsdkintf.c,v 1.376.4.3 2022/08/12 15:18:13 martin Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
89 * School of Computer Science
90 * Carnegie Mellon University
91 * Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97 /***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.376.4.3 2022/08/12 15:18:13 martin Exp $");
105
106 #ifdef _KERNEL_OPT
107 #include "opt_raid_autoconfig.h"
108 #include "opt_compat_netbsd32.h"
109 #endif
110
111 #include <sys/param.h>
112 #include <sys/errno.h>
113 #include <sys/pool.h>
114 #include <sys/proc.h>
115 #include <sys/queue.h>
116 #include <sys/disk.h>
117 #include <sys/device.h>
118 #include <sys/stat.h>
119 #include <sys/ioctl.h>
120 #include <sys/fcntl.h>
121 #include <sys/systm.h>
122 #include <sys/vnode.h>
123 #include <sys/disklabel.h>
124 #include <sys/conf.h>
125 #include <sys/buf.h>
126 #include <sys/bufq.h>
127 #include <sys/reboot.h>
128 #include <sys/kauth.h>
129 #include <sys/module.h>
130 #include <sys/compat_stub.h>
131
132 #include <prop/proplib.h>
133
134 #include <dev/raidframe/raidframevar.h>
135 #include <dev/raidframe/raidframeio.h>
136 #include <dev/raidframe/rf_paritymap.h>
137
138 #include "rf_raid.h"
139 #include "rf_copyback.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_desc.h"
143 #include "rf_diskqueue.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_kintf.h"
147 #include "rf_options.h"
148 #include "rf_driver.h"
149 #include "rf_parityscan.h"
150 #include "rf_threadstuff.h"
151
152 #include "ioconf.h"
153
154 #ifdef DEBUG
155 int rf_kdebug_level = 0;
156 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
157 #else /* DEBUG */
158 #define db1_printf(a) { }
159 #endif /* DEBUG */
160
161 #ifdef DEBUG_ROOT
162 #define DPRINTF(a, ...) printf(a, __VA_ARGS__)
163 #else
164 #define DPRINTF(a, ...)
165 #endif
166
167 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
168 static rf_declare_mutex2(rf_sparet_wait_mutex);
169 static rf_declare_cond2(rf_sparet_wait_cv);
170 static rf_declare_cond2(rf_sparet_resp_cv);
171
172 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
173 * spare table */
174 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
175 * installation process */
176 #endif
177
178 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
179
180 /* prototypes */
181 static void KernelWakeupFunc(struct buf *);
182 static void InitBP(struct buf *, struct vnode *, unsigned,
183 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
184 void *, int, struct proc *);
185 static void raidinit(struct raid_softc *);
186 static int raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp);
187 static int rf_get_component_caches(RF_Raid_t *raidPtr, int *);
188
189 static int raid_match(device_t, cfdata_t, void *);
190 static void raid_attach(device_t, device_t, void *);
191 static int raid_detach(device_t, int);
192
193 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
194 daddr_t, daddr_t);
195 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
196 daddr_t, daddr_t, int);
197
198 static int raidwrite_component_label(unsigned,
199 dev_t, struct vnode *, RF_ComponentLabel_t *);
200 static int raidread_component_label(unsigned,
201 dev_t, struct vnode *, RF_ComponentLabel_t *);
202
203 static int raid_diskstart(device_t, struct buf *bp);
204 static int raid_dumpblocks(device_t, void *, daddr_t, int);
205 static int raid_lastclose(device_t);
206
207 static dev_type_open(raidopen);
208 static dev_type_close(raidclose);
209 static dev_type_read(raidread);
210 static dev_type_write(raidwrite);
211 static dev_type_ioctl(raidioctl);
212 static dev_type_strategy(raidstrategy);
213 static dev_type_dump(raiddump);
214 static dev_type_size(raidsize);
215
216 const struct bdevsw raid_bdevsw = {
217 .d_open = raidopen,
218 .d_close = raidclose,
219 .d_strategy = raidstrategy,
220 .d_ioctl = raidioctl,
221 .d_dump = raiddump,
222 .d_psize = raidsize,
223 .d_discard = nodiscard,
224 .d_flag = D_DISK
225 };
226
227 const struct cdevsw raid_cdevsw = {
228 .d_open = raidopen,
229 .d_close = raidclose,
230 .d_read = raidread,
231 .d_write = raidwrite,
232 .d_ioctl = raidioctl,
233 .d_stop = nostop,
234 .d_tty = notty,
235 .d_poll = nopoll,
236 .d_mmap = nommap,
237 .d_kqfilter = nokqfilter,
238 .d_discard = nodiscard,
239 .d_flag = D_DISK
240 };
241
242 static struct dkdriver rf_dkdriver = {
243 .d_open = raidopen,
244 .d_close = raidclose,
245 .d_strategy = raidstrategy,
246 .d_diskstart = raid_diskstart,
247 .d_dumpblocks = raid_dumpblocks,
248 .d_lastclose = raid_lastclose,
249 .d_minphys = minphys
250 };
251
252 #define raidunit(x) DISKUNIT(x)
253 #define raidsoftc(dev) (((struct raid_softc *)device_private(dev))->sc_r.softc)
254
255 extern struct cfdriver raid_cd;
256 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
257 raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
258 DVF_DETACH_SHUTDOWN);
259
260 /* Internal representation of a rf_recon_req */
261 struct rf_recon_req_internal {
262 RF_RowCol_t col;
263 RF_ReconReqFlags_t flags;
264 void *raidPtr;
265 };
266
267 /*
268 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
269 * Be aware that large numbers can allow the driver to consume a lot of
270 * kernel memory, especially on writes, and in degraded mode reads.
271 *
272 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
273 * a single 64K write will typically require 64K for the old data,
274 * 64K for the old parity, and 64K for the new parity, for a total
275 * of 192K (if the parity buffer is not re-used immediately).
276 * Even it if is used immediately, that's still 128K, which when multiplied
277 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
278 *
279 * Now in degraded mode, for example, a 64K read on the above setup may
280 * require data reconstruction, which will require *all* of the 4 remaining
281 * disks to participate -- 4 * 32K/disk == 128K again.
282 */
283
284 #ifndef RAIDOUTSTANDING
285 #define RAIDOUTSTANDING 6
286 #endif
287
288 #define RAIDLABELDEV(dev) \
289 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
290
291 /* declared here, and made public, for the benefit of KVM stuff.. */
292
293 static int raidlock(struct raid_softc *);
294 static void raidunlock(struct raid_softc *);
295
296 static int raid_detach_unlocked(struct raid_softc *);
297
298 static void rf_markalldirty(RF_Raid_t *);
299 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
300
301 void rf_ReconThread(struct rf_recon_req_internal *);
302 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
303 void rf_CopybackThread(RF_Raid_t *raidPtr);
304 void rf_ReconstructInPlaceThread(struct rf_recon_req_internal *);
305 int rf_autoconfig(device_t);
306 void rf_buildroothack(RF_ConfigSet_t *);
307
308 RF_AutoConfig_t *rf_find_raid_components(void);
309 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
310 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
311 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
312 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
313 int rf_set_autoconfig(RF_Raid_t *, int);
314 int rf_set_rootpartition(RF_Raid_t *, int);
315 void rf_release_all_vps(RF_ConfigSet_t *);
316 void rf_cleanup_config_set(RF_ConfigSet_t *);
317 int rf_have_enough_components(RF_ConfigSet_t *);
318 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
319 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
320
321 /*
322 * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
323 * Note that this is overridden by having RAID_AUTOCONFIG as an option
324 * in the kernel config file.
325 */
326 #ifdef RAID_AUTOCONFIG
327 int raidautoconfig = 1;
328 #else
329 int raidautoconfig = 0;
330 #endif
331 static bool raidautoconfigdone = false;
332
333 struct RF_Pools_s rf_pools;
334
335 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
336 static kmutex_t raid_lock;
337
338 static struct raid_softc *
339 raidcreate(int unit) {
340 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
341 sc->sc_unit = unit;
342 cv_init(&sc->sc_cv, "raidunit");
343 mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_NONE);
344 return sc;
345 }
346
347 static void
348 raiddestroy(struct raid_softc *sc) {
349 cv_destroy(&sc->sc_cv);
350 mutex_destroy(&sc->sc_mutex);
351 kmem_free(sc, sizeof(*sc));
352 }
353
354 static struct raid_softc *
355 raidget(int unit, bool create) {
356 struct raid_softc *sc;
357 if (unit < 0) {
358 #ifdef DIAGNOSTIC
359 panic("%s: unit %d!", __func__, unit);
360 #endif
361 return NULL;
362 }
363 mutex_enter(&raid_lock);
364 LIST_FOREACH(sc, &raids, sc_link) {
365 if (sc->sc_unit == unit) {
366 mutex_exit(&raid_lock);
367 return sc;
368 }
369 }
370 mutex_exit(&raid_lock);
371 if (!create)
372 return NULL;
373 if ((sc = raidcreate(unit)) == NULL)
374 return NULL;
375 mutex_enter(&raid_lock);
376 LIST_INSERT_HEAD(&raids, sc, sc_link);
377 mutex_exit(&raid_lock);
378 return sc;
379 }
380
381 static void
382 raidput(struct raid_softc *sc) {
383 mutex_enter(&raid_lock);
384 LIST_REMOVE(sc, sc_link);
385 mutex_exit(&raid_lock);
386 raiddestroy(sc);
387 }
388
389 void
390 raidattach(int num)
391 {
392
393 /*
394 * Device attachment and associated initialization now occurs
395 * as part of the module initialization.
396 */
397 }
398
399 int
400 rf_autoconfig(device_t self)
401 {
402 RF_AutoConfig_t *ac_list;
403 RF_ConfigSet_t *config_sets;
404
405 if (!raidautoconfig || raidautoconfigdone == true)
406 return (0);
407
408 /* XXX This code can only be run once. */
409 raidautoconfigdone = true;
410
411 #ifdef __HAVE_CPU_BOOTCONF
412 /*
413 * 0. find the boot device if needed first so we can use it later
414 * this needs to be done before we autoconfigure any raid sets,
415 * because if we use wedges we are not going to be able to open
416 * the boot device later
417 */
418 if (booted_device == NULL)
419 cpu_bootconf();
420 #endif
421 /* 1. locate all RAID components on the system */
422 aprint_debug("Searching for RAID components...\n");
423 ac_list = rf_find_raid_components();
424
425 /* 2. Sort them into their respective sets. */
426 config_sets = rf_create_auto_sets(ac_list);
427
428 /*
429 * 3. Evaluate each set and configure the valid ones.
430 * This gets done in rf_buildroothack().
431 */
432 rf_buildroothack(config_sets);
433
434 return 1;
435 }
436
437 int
438 rf_inited(const struct raid_softc *rs) {
439 return (rs->sc_flags & RAIDF_INITED) != 0;
440 }
441
442 RF_Raid_t *
443 rf_get_raid(struct raid_softc *rs) {
444 return &rs->sc_r;
445 }
446
447 int
448 rf_get_unit(const struct raid_softc *rs) {
449 return rs->sc_unit;
450 }
451
452 static int
453 rf_containsboot(RF_Raid_t *r, device_t bdv) {
454 const char *bootname;
455 size_t len;
456
457 /* if bdv is NULL, the set can't contain it. exit early. */
458 if (bdv == NULL)
459 return 0;
460
461 bootname = device_xname(bdv);
462 len = strlen(bootname);
463
464 for (int col = 0; col < r->numCol; col++) {
465 const char *devname = r->Disks[col].devname;
466 devname += sizeof("/dev/") - 1;
467 if (strncmp(devname, "dk", 2) == 0) {
468 const char *parent =
469 dkwedge_get_parent_name(r->Disks[col].dev);
470 if (parent != NULL)
471 devname = parent;
472 }
473 if (strncmp(devname, bootname, len) == 0) {
474 struct raid_softc *sc = r->softc;
475 aprint_debug("raid%d includes boot device %s\n",
476 sc->sc_unit, devname);
477 return 1;
478 }
479 }
480 return 0;
481 }
482
483 void
484 rf_buildroothack(RF_ConfigSet_t *config_sets)
485 {
486 RF_ConfigSet_t *cset;
487 RF_ConfigSet_t *next_cset;
488 int num_root;
489 struct raid_softc *sc, *rsc;
490 struct dk_softc *dksc;
491
492 sc = rsc = NULL;
493 num_root = 0;
494 cset = config_sets;
495 while (cset != NULL) {
496 next_cset = cset->next;
497 if (rf_have_enough_components(cset) &&
498 cset->ac->clabel->autoconfigure == 1) {
499 sc = rf_auto_config_set(cset);
500 if (sc != NULL) {
501 aprint_debug("raid%d: configured ok, rootable %d\n",
502 sc->sc_unit, cset->rootable);
503 if (cset->rootable) {
504 rsc = sc;
505 num_root++;
506 }
507 } else {
508 /* The autoconfig didn't work :( */
509 aprint_debug("Autoconfig failed\n");
510 rf_release_all_vps(cset);
511 }
512 } else {
513 /* we're not autoconfiguring this set...
514 release the associated resources */
515 rf_release_all_vps(cset);
516 }
517 /* cleanup */
518 rf_cleanup_config_set(cset);
519 cset = next_cset;
520 }
521 dksc = &rsc->sc_dksc;
522
523 /* if the user has specified what the root device should be
524 then we don't touch booted_device or boothowto... */
525
526 if (rootspec != NULL) {
527 DPRINTF("%s: rootspec %s\n", __func__, rootspec);
528 return;
529 }
530
531 /* we found something bootable... */
532
533 /*
534 * XXX: The following code assumes that the root raid
535 * is the first ('a') partition. This is about the best
536 * we can do with a BSD disklabel, but we might be able
537 * to do better with a GPT label, by setting a specified
538 * attribute to indicate the root partition. We can then
539 * stash the partition number in the r->root_partition
540 * high bits (the bottom 2 bits are already used). For
541 * now we just set booted_partition to 0 when we override
542 * root.
543 */
544 if (num_root == 1) {
545 device_t candidate_root;
546 if (dksc->sc_dkdev.dk_nwedges != 0) {
547 char cname[sizeof(cset->ac->devname)];
548 /* XXX: assume partition 'a' first */
549 snprintf(cname, sizeof(cname), "%s%c",
550 device_xname(dksc->sc_dev), 'a');
551 candidate_root = dkwedge_find_by_wname(cname);
552 DPRINTF("%s: candidate wedge root=%s\n", __func__,
553 cname);
554 if (candidate_root == NULL) {
555 /*
556 * If that is not found, because we don't use
557 * disklabel, return the first dk child
558 * XXX: we can skip the 'a' check above
559 * and always do this...
560 */
561 size_t i = 0;
562 candidate_root = dkwedge_find_by_parent(
563 device_xname(dksc->sc_dev), &i);
564 }
565 DPRINTF("%s: candidate wedge root=%p\n", __func__,
566 candidate_root);
567 } else
568 candidate_root = dksc->sc_dev;
569 DPRINTF("%s: candidate root=%p\n", __func__, candidate_root);
570 DPRINTF("%s: booted_device=%p root_partition=%d "
571 "contains_boot=%d",
572 __func__, booted_device, rsc->sc_r.root_partition,
573 rf_containsboot(&rsc->sc_r, booted_device));
574 /* XXX the check for booted_device == NULL can probably be
575 * dropped, now that rf_containsboot handles that case.
576 */
577 if (booted_device == NULL ||
578 rsc->sc_r.root_partition == 1 ||
579 rf_containsboot(&rsc->sc_r, booted_device)) {
580 booted_device = candidate_root;
581 booted_method = "raidframe/single";
582 booted_partition = 0; /* XXX assume 'a' */
583 }
584 } else if (num_root > 1) {
585 DPRINTF("%s: many roots=%d, %p\n", __func__, num_root,
586 booted_device);
587
588 /*
589 * Maybe the MD code can help. If it cannot, then
590 * setroot() will discover that we have no
591 * booted_device and will ask the user if nothing was
592 * hardwired in the kernel config file
593 */
594 if (booted_device == NULL)
595 return;
596
597 num_root = 0;
598 mutex_enter(&raid_lock);
599 LIST_FOREACH(sc, &raids, sc_link) {
600 RF_Raid_t *r = &sc->sc_r;
601 if (r->valid == 0)
602 continue;
603
604 if (r->root_partition == 0)
605 continue;
606
607 if (rf_containsboot(r, booted_device)) {
608 num_root++;
609 rsc = sc;
610 dksc = &rsc->sc_dksc;
611 }
612 }
613 mutex_exit(&raid_lock);
614
615 if (num_root == 1) {
616 booted_device = dksc->sc_dev;
617 booted_method = "raidframe/multi";
618 booted_partition = 0; /* XXX assume 'a' */
619 } else {
620 /* we can't guess.. require the user to answer... */
621 boothowto |= RB_ASKNAME;
622 }
623 }
624 }
625
626 static int
627 raidsize(dev_t dev)
628 {
629 struct raid_softc *rs;
630 struct dk_softc *dksc;
631 unsigned int unit;
632
633 unit = raidunit(dev);
634 if ((rs = raidget(unit, false)) == NULL)
635 return -1;
636 dksc = &rs->sc_dksc;
637
638 if ((rs->sc_flags & RAIDF_INITED) == 0)
639 return -1;
640
641 return dk_size(dksc, dev);
642 }
643
644 static int
645 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
646 {
647 unsigned int unit;
648 struct raid_softc *rs;
649 struct dk_softc *dksc;
650
651 unit = raidunit(dev);
652 if ((rs = raidget(unit, false)) == NULL)
653 return ENXIO;
654 dksc = &rs->sc_dksc;
655
656 if ((rs->sc_flags & RAIDF_INITED) == 0)
657 return ENODEV;
658
659 /*
660 Note that blkno is relative to this particular partition.
661 By adding adding RF_PROTECTED_SECTORS, we get a value that
662 is relative to the partition used for the underlying component.
663 */
664 blkno += RF_PROTECTED_SECTORS;
665
666 return dk_dump(dksc, dev, blkno, va, size, DK_DUMP_RECURSIVE);
667 }
668
669 static int
670 raid_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk)
671 {
672 struct raid_softc *rs = raidsoftc(dev);
673 const struct bdevsw *bdev;
674 RF_Raid_t *raidPtr;
675 int c, sparecol, j, scol, dumpto;
676 int error = 0;
677
678 raidPtr = &rs->sc_r;
679
680 /* we only support dumping to RAID 1 sets */
681 if (raidPtr->Layout.numDataCol != 1 ||
682 raidPtr->Layout.numParityCol != 1)
683 return EINVAL;
684
685 if ((error = raidlock(rs)) != 0)
686 return error;
687
688 /* figure out what device is alive.. */
689
690 /*
691 Look for a component to dump to. The preference for the
692 component to dump to is as follows:
693 1) the master
694 2) a used_spare of the master
695 3) the slave
696 4) a used_spare of the slave
697 */
698
699 dumpto = -1;
700 for (c = 0; c < raidPtr->numCol; c++) {
701 if (raidPtr->Disks[c].status == rf_ds_optimal) {
702 /* this might be the one */
703 dumpto = c;
704 break;
705 }
706 }
707
708 /*
709 At this point we have possibly selected a live master or a
710 live slave. We now check to see if there is a spared
711 master (or a spared slave), if we didn't find a live master
712 or a live slave.
713 */
714
715 for (c = 0; c < raidPtr->numSpare; c++) {
716 sparecol = raidPtr->numCol + c;
717 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
718 /* How about this one? */
719 scol = -1;
720 for(j=0;j<raidPtr->numCol;j++) {
721 if (raidPtr->Disks[j].spareCol == sparecol) {
722 scol = j;
723 break;
724 }
725 }
726 if (scol == 0) {
727 /*
728 We must have found a spared master!
729 We'll take that over anything else
730 found so far. (We couldn't have
731 found a real master before, since
732 this is a used spare, and it's
733 saying that it's replacing the
734 master.) On reboot (with
735 autoconfiguration turned on)
736 sparecol will become the 1st
737 component (component0) of this set.
738 */
739 dumpto = sparecol;
740 break;
741 } else if (scol != -1) {
742 /*
743 Must be a spared slave. We'll dump
744 to that if we havn't found anything
745 else so far.
746 */
747 if (dumpto == -1)
748 dumpto = sparecol;
749 }
750 }
751 }
752
753 if (dumpto == -1) {
754 /* we couldn't find any live components to dump to!?!?
755 */
756 error = EINVAL;
757 goto out;
758 }
759
760 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
761 if (bdev == NULL) {
762 error = ENXIO;
763 goto out;
764 }
765
766 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
767 blkno, va, nblk * raidPtr->bytesPerSector);
768
769 out:
770 raidunlock(rs);
771
772 return error;
773 }
774
775 /* ARGSUSED */
776 static int
777 raidopen(dev_t dev, int flags, int fmt,
778 struct lwp *l)
779 {
780 int unit = raidunit(dev);
781 struct raid_softc *rs;
782 struct dk_softc *dksc;
783 int error = 0;
784 int part, pmask;
785
786 if ((rs = raidget(unit, true)) == NULL)
787 return ENXIO;
788 if ((error = raidlock(rs)) != 0)
789 return (error);
790
791 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
792 error = EBUSY;
793 goto bad;
794 }
795
796 dksc = &rs->sc_dksc;
797
798 part = DISKPART(dev);
799 pmask = (1 << part);
800
801 if (!DK_BUSY(dksc, pmask) &&
802 ((rs->sc_flags & RAIDF_INITED) != 0)) {
803 /* First one... mark things as dirty... Note that we *MUST*
804 have done a configure before this. I DO NOT WANT TO BE
805 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
806 THAT THEY BELONG TOGETHER!!!!! */
807 /* XXX should check to see if we're only open for reading
808 here... If so, we needn't do this, but then need some
809 other way of keeping track of what's happened.. */
810
811 rf_markalldirty(&rs->sc_r);
812 }
813
814 if ((rs->sc_flags & RAIDF_INITED) != 0)
815 error = dk_open(dksc, dev, flags, fmt, l);
816
817 bad:
818 raidunlock(rs);
819
820 return (error);
821
822
823 }
824
825 static int
826 raid_lastclose(device_t self)
827 {
828 struct raid_softc *rs = raidsoftc(self);
829
830 /* Last one... device is not unconfigured yet.
831 Device shutdown has taken care of setting the
832 clean bits if RAIDF_INITED is not set
833 mark things as clean... */
834
835 rf_update_component_labels(&rs->sc_r,
836 RF_FINAL_COMPONENT_UPDATE);
837
838 /* pass to unlocked code */
839 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
840 rs->sc_flags |= RAIDF_DETACH;
841
842 return 0;
843 }
844
845 /* ARGSUSED */
846 static int
847 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
848 {
849 int unit = raidunit(dev);
850 struct raid_softc *rs;
851 struct dk_softc *dksc;
852 cfdata_t cf;
853 int error = 0, do_detach = 0, do_put = 0;
854
855 if ((rs = raidget(unit, false)) == NULL)
856 return ENXIO;
857 dksc = &rs->sc_dksc;
858
859 if ((error = raidlock(rs)) != 0)
860 return (error);
861
862 if ((rs->sc_flags & RAIDF_INITED) != 0) {
863 error = dk_close(dksc, dev, flags, fmt, l);
864 if ((rs->sc_flags & RAIDF_DETACH) != 0)
865 do_detach = 1;
866 } else if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
867 do_put = 1;
868
869 raidunlock(rs);
870
871 if (do_detach) {
872 /* free the pseudo device attach bits */
873 cf = device_cfdata(dksc->sc_dev);
874 error = config_detach(dksc->sc_dev, 0);
875 if (error == 0)
876 free(cf, M_RAIDFRAME);
877 } else if (do_put) {
878 raidput(rs);
879 }
880
881 return (error);
882
883 }
884
885 static void
886 raid_wakeup(RF_Raid_t *raidPtr)
887 {
888 rf_lock_mutex2(raidPtr->iodone_lock);
889 rf_signal_cond2(raidPtr->iodone_cv);
890 rf_unlock_mutex2(raidPtr->iodone_lock);
891 }
892
893 static void
894 raidstrategy(struct buf *bp)
895 {
896 unsigned int unit;
897 struct raid_softc *rs;
898 struct dk_softc *dksc;
899 RF_Raid_t *raidPtr;
900
901 unit = raidunit(bp->b_dev);
902 if ((rs = raidget(unit, false)) == NULL) {
903 bp->b_error = ENXIO;
904 goto fail;
905 }
906 if ((rs->sc_flags & RAIDF_INITED) == 0) {
907 bp->b_error = ENXIO;
908 goto fail;
909 }
910 dksc = &rs->sc_dksc;
911 raidPtr = &rs->sc_r;
912
913 /* Queue IO only */
914 if (dk_strategy_defer(dksc, bp))
915 goto done;
916
917 /* schedule the IO to happen at the next convenient time */
918 raid_wakeup(raidPtr);
919
920 done:
921 return;
922
923 fail:
924 bp->b_resid = bp->b_bcount;
925 biodone(bp);
926 }
927
928 static int
929 raid_diskstart(device_t dev, struct buf *bp)
930 {
931 struct raid_softc *rs = raidsoftc(dev);
932 RF_Raid_t *raidPtr;
933
934 raidPtr = &rs->sc_r;
935 if (!raidPtr->valid) {
936 db1_printf(("raid is not valid..\n"));
937 return ENODEV;
938 }
939
940 /* XXX */
941 bp->b_resid = 0;
942
943 return raiddoaccess(raidPtr, bp);
944 }
945
946 void
947 raiddone(RF_Raid_t *raidPtr, struct buf *bp)
948 {
949 struct raid_softc *rs;
950 struct dk_softc *dksc;
951
952 rs = raidPtr->softc;
953 dksc = &rs->sc_dksc;
954
955 dk_done(dksc, bp);
956
957 rf_lock_mutex2(raidPtr->mutex);
958 raidPtr->openings++;
959 rf_unlock_mutex2(raidPtr->mutex);
960
961 /* schedule more IO */
962 raid_wakeup(raidPtr);
963 }
964
965 /* ARGSUSED */
966 static int
967 raidread(dev_t dev, struct uio *uio, int flags)
968 {
969 int unit = raidunit(dev);
970 struct raid_softc *rs;
971
972 if ((rs = raidget(unit, false)) == NULL)
973 return ENXIO;
974
975 if ((rs->sc_flags & RAIDF_INITED) == 0)
976 return (ENXIO);
977
978 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
979
980 }
981
982 /* ARGSUSED */
983 static int
984 raidwrite(dev_t dev, struct uio *uio, int flags)
985 {
986 int unit = raidunit(dev);
987 struct raid_softc *rs;
988
989 if ((rs = raidget(unit, false)) == NULL)
990 return ENXIO;
991
992 if ((rs->sc_flags & RAIDF_INITED) == 0)
993 return (ENXIO);
994
995 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
996
997 }
998
999 static int
1000 raid_detach_unlocked(struct raid_softc *rs)
1001 {
1002 struct dk_softc *dksc = &rs->sc_dksc;
1003 RF_Raid_t *raidPtr;
1004 int error;
1005
1006 raidPtr = &rs->sc_r;
1007
1008 if (DK_BUSY(dksc, 0) ||
1009 raidPtr->recon_in_progress != 0 ||
1010 raidPtr->parity_rewrite_in_progress != 0 ||
1011 raidPtr->copyback_in_progress != 0)
1012 return EBUSY;
1013
1014 if ((rs->sc_flags & RAIDF_INITED) == 0)
1015 return 0;
1016
1017 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1018
1019 if ((error = rf_Shutdown(raidPtr)) != 0)
1020 return error;
1021
1022 rs->sc_flags &= ~RAIDF_INITED;
1023
1024 /* Kill off any queued buffers */
1025 dk_drain(dksc);
1026 bufq_free(dksc->sc_bufq);
1027
1028 /* Detach the disk. */
1029 dkwedge_delall(&dksc->sc_dkdev);
1030 disk_detach(&dksc->sc_dkdev);
1031 disk_destroy(&dksc->sc_dkdev);
1032 dk_detach(dksc);
1033
1034 return 0;
1035 }
1036
1037 static bool
1038 rf_must_be_initialized(const struct raid_softc *rs, u_long cmd)
1039 {
1040 switch (cmd) {
1041 case RAIDFRAME_ADD_HOT_SPARE:
1042 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1043 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1044 case RAIDFRAME_CHECK_PARITY:
1045 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1046 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1047 case RAIDFRAME_CHECK_RECON_STATUS:
1048 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1049 case RAIDFRAME_COPYBACK:
1050 case RAIDFRAME_DELETE_COMPONENT:
1051 case RAIDFRAME_FAIL_DISK:
1052 case RAIDFRAME_GET_ACCTOTALS:
1053 case RAIDFRAME_GET_COMPONENT_LABEL:
1054 case RAIDFRAME_GET_INFO:
1055 case RAIDFRAME_GET_SIZE:
1056 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1057 case RAIDFRAME_INIT_LABELS:
1058 case RAIDFRAME_KEEP_ACCTOTALS:
1059 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1060 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1061 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1062 case RAIDFRAME_PARITYMAP_STATUS:
1063 case RAIDFRAME_REBUILD_IN_PLACE:
1064 case RAIDFRAME_REMOVE_HOT_SPARE:
1065 case RAIDFRAME_RESET_ACCTOTALS:
1066 case RAIDFRAME_REWRITEPARITY:
1067 case RAIDFRAME_SET_AUTOCONFIG:
1068 case RAIDFRAME_SET_COMPONENT_LABEL:
1069 case RAIDFRAME_SET_ROOT:
1070 return (rs->sc_flags & RAIDF_INITED) == 0;
1071 }
1072 return false;
1073 }
1074
1075 int
1076 rf_fail_disk(RF_Raid_t *raidPtr, struct rf_recon_req *rr)
1077 {
1078 struct rf_recon_req_internal *rrint;
1079
1080 if (raidPtr->Layout.map->faultsTolerated == 0) {
1081 /* Can't do this on a RAID 0!! */
1082 return EINVAL;
1083 }
1084
1085 if (rr->col < 0 || rr->col >= raidPtr->numCol) {
1086 /* bad column */
1087 return EINVAL;
1088 }
1089
1090 rf_lock_mutex2(raidPtr->mutex);
1091 if (raidPtr->status == rf_rs_reconstructing) {
1092 /* you can't fail a disk while we're reconstructing! */
1093 /* XXX wrong for RAID6 */
1094 goto out;
1095 }
1096 if ((raidPtr->Disks[rr->col].status == rf_ds_optimal) &&
1097 (raidPtr->numFailures > 0)) {
1098 /* some other component has failed. Let's not make
1099 things worse. XXX wrong for RAID6 */
1100 goto out;
1101 }
1102 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1103 /* Can't fail a spared disk! */
1104 goto out;
1105 }
1106 rf_unlock_mutex2(raidPtr->mutex);
1107
1108 /* make a copy of the recon request so that we don't rely on
1109 * the user's buffer */
1110 rrint = RF_Malloc(sizeof(*rrint));
1111 if (rrint == NULL)
1112 return(ENOMEM);
1113 rrint->col = rr->col;
1114 rrint->flags = rr->flags;
1115 rrint->raidPtr = raidPtr;
1116
1117 return RF_CREATE_THREAD(raidPtr->recon_thread, rf_ReconThread,
1118 rrint, "raid_recon");
1119 out:
1120 rf_unlock_mutex2(raidPtr->mutex);
1121 return EINVAL;
1122 }
1123
1124 static int
1125 rf_copyinspecificbuf(RF_Config_t *k_cfg)
1126 {
1127 /* allocate a buffer for the layout-specific data, and copy it in */
1128 if (k_cfg->layoutSpecificSize == 0)
1129 return 0;
1130
1131 if (k_cfg->layoutSpecificSize > 10000) {
1132 /* sanity check */
1133 return EINVAL;
1134 }
1135
1136 u_char *specific_buf;
1137 specific_buf = RF_Malloc(k_cfg->layoutSpecificSize);
1138 if (specific_buf == NULL)
1139 return ENOMEM;
1140
1141 int retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1142 k_cfg->layoutSpecificSize);
1143 if (retcode) {
1144 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1145 db1_printf(("%s: retcode=%d copyin.2\n", __func__, retcode));
1146 return retcode;
1147 }
1148
1149 k_cfg->layoutSpecific = specific_buf;
1150 return 0;
1151 }
1152
1153 static int
1154 rf_getConfiguration(struct raid_softc *rs, void *data, RF_Config_t **k_cfg)
1155 {
1156 RF_Config_t *u_cfg = *((RF_Config_t **) data);
1157
1158 if (rs->sc_r.valid) {
1159 /* There is a valid RAID set running on this unit! */
1160 printf("raid%d: Device already configured!\n", rs->sc_unit);
1161 return EINVAL;
1162 }
1163
1164 /* copy-in the configuration information */
1165 /* data points to a pointer to the configuration structure */
1166 *k_cfg = RF_Malloc(sizeof(**k_cfg));
1167 if (*k_cfg == NULL) {
1168 return ENOMEM;
1169 }
1170 int retcode = copyin(u_cfg, *k_cfg, sizeof(RF_Config_t));
1171 if (retcode == 0)
1172 return 0;
1173 RF_Free(*k_cfg, sizeof(RF_Config_t));
1174 db1_printf(("%s: retcode=%d copyin.1\n", __func__, retcode));
1175 rs->sc_flags |= RAIDF_SHUTDOWN;
1176 return retcode;
1177 }
1178
1179 int
1180 rf_construct(struct raid_softc *rs, RF_Config_t *k_cfg)
1181 {
1182 int retcode, i;
1183 RF_Raid_t *raidPtr = &rs->sc_r;
1184
1185 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1186
1187 if ((retcode = rf_copyinspecificbuf(k_cfg)) != 0)
1188 goto out;
1189
1190 /* should do some kind of sanity check on the configuration.
1191 * Store the sum of all the bytes in the last byte? */
1192
1193 /* Force nul-termination on all strings. */
1194 #define ZERO_FINAL(s) do { s[sizeof(s) - 1] = '\0'; } while (0)
1195 for (i = 0; i < RF_MAXCOL; i++) {
1196 ZERO_FINAL(k_cfg->devnames[0][i]);
1197 }
1198 for (i = 0; i < RF_MAXSPARE; i++) {
1199 ZERO_FINAL(k_cfg->spare_names[i]);
1200 }
1201 for (i = 0; i < RF_MAXDBGV; i++) {
1202 ZERO_FINAL(k_cfg->debugVars[i]);
1203 }
1204 #undef ZERO_FINAL
1205
1206 /* Check some basic limits. */
1207 if (k_cfg->numCol >= RF_MAXCOL || k_cfg->numCol < 0) {
1208 retcode = EINVAL;
1209 goto out;
1210 }
1211 if (k_cfg->numSpare >= RF_MAXSPARE || k_cfg->numSpare < 0) {
1212 retcode = EINVAL;
1213 goto out;
1214 }
1215
1216 /* configure the system */
1217
1218 /*
1219 * Clear the entire RAID descriptor, just to make sure
1220 * there is no stale data left in the case of a
1221 * reconfiguration
1222 */
1223 memset(raidPtr, 0, sizeof(*raidPtr));
1224 raidPtr->softc = rs;
1225 raidPtr->raidid = rs->sc_unit;
1226
1227 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1228
1229 if (retcode == 0) {
1230 /* allow this many simultaneous IO's to
1231 this RAID device */
1232 raidPtr->openings = RAIDOUTSTANDING;
1233
1234 raidinit(rs);
1235 raid_wakeup(raidPtr);
1236 rf_markalldirty(raidPtr);
1237 }
1238
1239 /* free the buffers. No return code here. */
1240 if (k_cfg->layoutSpecificSize) {
1241 RF_Free(k_cfg->layoutSpecific, k_cfg->layoutSpecificSize);
1242 }
1243 out:
1244 RF_Free(k_cfg, sizeof(RF_Config_t));
1245 if (retcode) {
1246 /*
1247 * If configuration failed, set sc_flags so that we
1248 * will detach the device when we close it.
1249 */
1250 rs->sc_flags |= RAIDF_SHUTDOWN;
1251 }
1252 return retcode;
1253 }
1254
1255 #if RF_DISABLED
1256 static int
1257 rf_set_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
1258 {
1259
1260 /* XXX check the label for valid stuff... */
1261 /* Note that some things *should not* get modified --
1262 the user should be re-initing the labels instead of
1263 trying to patch things.
1264 */
1265 #ifdef DEBUG
1266 int raidid = raidPtr->raidid;
1267 printf("raid%d: Got component label:\n", raidid);
1268 printf("raid%d: Version: %d\n", raidid, clabel->version);
1269 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1270 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1271 printf("raid%d: Column: %d\n", raidid, clabel->column);
1272 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1273 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1274 printf("raid%d: Status: %d\n", raidid, clabel->status);
1275 #endif /* DEBUG */
1276 clabel->row = 0;
1277 int column = clabel->column;
1278
1279 if ((column < 0) || (column >= raidPtr->numCol)) {
1280 return(EINVAL);
1281 }
1282
1283 /* XXX this isn't allowed to do anything for now :-) */
1284
1285 /* XXX and before it is, we need to fill in the rest
1286 of the fields!?!?!?! */
1287 memcpy(raidget_component_label(raidPtr, column),
1288 clabel, sizeof(*clabel));
1289 raidflush_component_label(raidPtr, column);
1290 return 0;
1291 }
1292 #endif
1293
1294 static int
1295 rf_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
1296 {
1297 /*
1298 we only want the serial number from
1299 the above. We get all the rest of the information
1300 from the config that was used to create this RAID
1301 set.
1302 */
1303
1304 raidPtr->serial_number = clabel->serial_number;
1305
1306 for (int column = 0; column < raidPtr->numCol; column++) {
1307 RF_RaidDisk_t *diskPtr = &raidPtr->Disks[column];
1308 if (RF_DEAD_DISK(diskPtr->status))
1309 continue;
1310 RF_ComponentLabel_t *ci_label = raidget_component_label(
1311 raidPtr, column);
1312 /* Zeroing this is important. */
1313 memset(ci_label, 0, sizeof(*ci_label));
1314 raid_init_component_label(raidPtr, ci_label);
1315 ci_label->serial_number = raidPtr->serial_number;
1316 ci_label->row = 0; /* we dont' pretend to support more */
1317 rf_component_label_set_partitionsize(ci_label,
1318 diskPtr->partitionSize);
1319 ci_label->column = column;
1320 raidflush_component_label(raidPtr, column);
1321 /* XXXjld what about the spares? */
1322 }
1323
1324 return 0;
1325 }
1326
1327 static int
1328 rf_rebuild_in_place(RF_Raid_t *raidPtr, RF_SingleComponent_t *componentPtr)
1329 {
1330
1331 if (raidPtr->Layout.map->faultsTolerated == 0) {
1332 /* Can't do this on a RAID 0!! */
1333 return EINVAL;
1334 }
1335
1336 if (raidPtr->recon_in_progress == 1) {
1337 /* a reconstruct is already in progress! */
1338 return EINVAL;
1339 }
1340
1341 RF_SingleComponent_t component;
1342 memcpy(&component, componentPtr, sizeof(RF_SingleComponent_t));
1343 component.row = 0; /* we don't support any more */
1344 int column = component.column;
1345
1346 if ((column < 0) || (column >= raidPtr->numCol)) {
1347 return EINVAL;
1348 }
1349
1350 rf_lock_mutex2(raidPtr->mutex);
1351 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1352 (raidPtr->numFailures > 0)) {
1353 /* XXX 0 above shouldn't be constant!!! */
1354 /* some component other than this has failed.
1355 Let's not make things worse than they already
1356 are... */
1357 printf("raid%d: Unable to reconstruct to disk at:\n",
1358 raidPtr->raidid);
1359 printf("raid%d: Col: %d Too many failures.\n",
1360 raidPtr->raidid, column);
1361 rf_unlock_mutex2(raidPtr->mutex);
1362 return EINVAL;
1363 }
1364
1365 if (raidPtr->Disks[column].status == rf_ds_reconstructing) {
1366 printf("raid%d: Unable to reconstruct to disk at:\n",
1367 raidPtr->raidid);
1368 printf("raid%d: Col: %d "
1369 "Reconstruction already occurring!\n",
1370 raidPtr->raidid, column);
1371
1372 rf_unlock_mutex2(raidPtr->mutex);
1373 return EINVAL;
1374 }
1375
1376 if (raidPtr->Disks[column].status == rf_ds_spared) {
1377 rf_unlock_mutex2(raidPtr->mutex);
1378 return EINVAL;
1379 }
1380
1381 rf_unlock_mutex2(raidPtr->mutex);
1382
1383 struct rf_recon_req_internal *rrint;
1384 rrint = RF_Malloc(sizeof(*rrint));
1385 if (rrint == NULL)
1386 return ENOMEM;
1387
1388 rrint->col = column;
1389 rrint->raidPtr = raidPtr;
1390
1391 return RF_CREATE_THREAD(raidPtr->recon_thread,
1392 rf_ReconstructInPlaceThread, rrint, "raid_reconip");
1393 }
1394
1395 static int
1396 rf_check_recon_status(RF_Raid_t *raidPtr, int *data)
1397 {
1398 /*
1399 * This makes no sense on a RAID 0, or if we are not reconstructing
1400 * so tell the user it's done.
1401 */
1402 if (raidPtr->Layout.map->faultsTolerated == 0 ||
1403 raidPtr->status != rf_rs_reconstructing) {
1404 *data = 100;
1405 return 0;
1406 }
1407 if (raidPtr->reconControl->numRUsTotal == 0) {
1408 *data = 0;
1409 return 0;
1410 }
1411 *data = (raidPtr->reconControl->numRUsComplete * 100
1412 / raidPtr->reconControl->numRUsTotal);
1413 return 0;
1414 }
1415
1416 /*
1417 * Copy a RF_SingleComponent_t from 'data', ensuring nul-termination
1418 * on the component_name[] array.
1419 */
1420 static void
1421 rf_copy_single_component(RF_SingleComponent_t *component, void *data)
1422 {
1423
1424 memcpy(component, data, sizeof *component);
1425 component->component_name[sizeof(component->component_name) - 1] = '\0';
1426 }
1427
1428 static int
1429 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1430 {
1431 int unit = raidunit(dev);
1432 int part, pmask;
1433 struct raid_softc *rs;
1434 struct dk_softc *dksc;
1435 RF_Config_t *k_cfg;
1436 RF_Raid_t *raidPtr;
1437 RF_AccTotals_t *totals;
1438 RF_SingleComponent_t component;
1439 RF_DeviceConfig_t *d_cfg, *ucfgp;
1440 int retcode = 0;
1441 int column;
1442 RF_ComponentLabel_t *clabel;
1443 int d;
1444
1445 if ((rs = raidget(unit, false)) == NULL)
1446 return ENXIO;
1447
1448 dksc = &rs->sc_dksc;
1449 raidPtr = &rs->sc_r;
1450
1451 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1452 (int) DISKPART(dev), (int) unit, cmd));
1453
1454 /* Must be initialized for these... */
1455 if (rf_must_be_initialized(rs, cmd))
1456 return ENXIO;
1457
1458 switch (cmd) {
1459 /* configure the system */
1460 case RAIDFRAME_CONFIGURE:
1461 if ((retcode = rf_getConfiguration(rs, data, &k_cfg)) != 0)
1462 return retcode;
1463 return rf_construct(rs, k_cfg);
1464
1465 /* shutdown the system */
1466 case RAIDFRAME_SHUTDOWN:
1467
1468 part = DISKPART(dev);
1469 pmask = (1 << part);
1470
1471 if ((retcode = raidlock(rs)) != 0)
1472 return retcode;
1473
1474 if (DK_BUSY(dksc, pmask) ||
1475 raidPtr->recon_in_progress != 0 ||
1476 raidPtr->parity_rewrite_in_progress != 0 ||
1477 raidPtr->copyback_in_progress != 0)
1478 retcode = EBUSY;
1479 else {
1480 /* detach and free on close */
1481 rs->sc_flags |= RAIDF_SHUTDOWN;
1482 retcode = 0;
1483 }
1484
1485 raidunlock(rs);
1486
1487 return retcode;
1488 case RAIDFRAME_GET_COMPONENT_LABEL:
1489 return rf_get_component_label(raidPtr, data);
1490
1491 #if RF_DISABLED
1492 case RAIDFRAME_SET_COMPONENT_LABEL:
1493 return rf_set_component_label(raidPtr, data);
1494 #endif
1495
1496 case RAIDFRAME_INIT_LABELS:
1497 return rf_init_component_label(raidPtr, data);
1498
1499 case RAIDFRAME_SET_AUTOCONFIG:
1500 d = rf_set_autoconfig(raidPtr, *(int *) data);
1501 printf("raid%d: New autoconfig value is: %d\n",
1502 raidPtr->raidid, d);
1503 *(int *) data = d;
1504 return retcode;
1505
1506 case RAIDFRAME_SET_ROOT:
1507 d = rf_set_rootpartition(raidPtr, *(int *) data);
1508 printf("raid%d: New rootpartition value is: %d\n",
1509 raidPtr->raidid, d);
1510 *(int *) data = d;
1511 return retcode;
1512
1513 /* initialize all parity */
1514 case RAIDFRAME_REWRITEPARITY:
1515
1516 if (raidPtr->Layout.map->faultsTolerated == 0) {
1517 /* Parity for RAID 0 is trivially correct */
1518 raidPtr->parity_good = RF_RAID_CLEAN;
1519 return 0;
1520 }
1521
1522 if (raidPtr->parity_rewrite_in_progress == 1) {
1523 /* Re-write is already in progress! */
1524 return EINVAL;
1525 }
1526
1527 return RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1528 rf_RewriteParityThread, raidPtr,"raid_parity");
1529
1530 case RAIDFRAME_ADD_HOT_SPARE:
1531 rf_copy_single_component(&component, data);
1532 return rf_add_hot_spare(raidPtr, &component);
1533
1534 case RAIDFRAME_REMOVE_HOT_SPARE:
1535 return retcode;
1536
1537 case RAIDFRAME_DELETE_COMPONENT:
1538 rf_copy_single_component(&component, data);
1539 return rf_delete_component(raidPtr, &component);
1540
1541 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1542 rf_copy_single_component(&component, data);
1543 return rf_incorporate_hot_spare(raidPtr, &component);
1544
1545 case RAIDFRAME_REBUILD_IN_PLACE:
1546 return rf_rebuild_in_place(raidPtr, data);
1547
1548 case RAIDFRAME_GET_INFO:
1549 ucfgp = *(RF_DeviceConfig_t **)data;
1550 d_cfg = RF_Malloc(sizeof(*d_cfg));
1551 if (d_cfg == NULL)
1552 return ENOMEM;
1553 retcode = rf_get_info(raidPtr, d_cfg);
1554 if (retcode == 0) {
1555 retcode = copyout(d_cfg, ucfgp, sizeof(*d_cfg));
1556 }
1557 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1558 return retcode;
1559
1560 case RAIDFRAME_CHECK_PARITY:
1561 *(int *) data = raidPtr->parity_good;
1562 return 0;
1563
1564 case RAIDFRAME_PARITYMAP_STATUS:
1565 if (rf_paritymap_ineligible(raidPtr))
1566 return EINVAL;
1567 rf_paritymap_status(raidPtr->parity_map, data);
1568 return 0;
1569
1570 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1571 if (rf_paritymap_ineligible(raidPtr))
1572 return EINVAL;
1573 if (raidPtr->parity_map == NULL)
1574 return ENOENT; /* ??? */
1575 if (rf_paritymap_set_params(raidPtr->parity_map, data, 1) != 0)
1576 return EINVAL;
1577 return 0;
1578
1579 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1580 if (rf_paritymap_ineligible(raidPtr))
1581 return EINVAL;
1582 *(int *) data = rf_paritymap_get_disable(raidPtr);
1583 return 0;
1584
1585 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1586 if (rf_paritymap_ineligible(raidPtr))
1587 return EINVAL;
1588 rf_paritymap_set_disable(raidPtr, *(int *)data);
1589 /* XXX should errors be passed up? */
1590 return 0;
1591
1592 case RAIDFRAME_RESET_ACCTOTALS:
1593 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1594 return 0;
1595
1596 case RAIDFRAME_GET_ACCTOTALS:
1597 totals = (RF_AccTotals_t *) data;
1598 *totals = raidPtr->acc_totals;
1599 return 0;
1600
1601 case RAIDFRAME_KEEP_ACCTOTALS:
1602 raidPtr->keep_acc_totals = *(int *)data;
1603 return 0;
1604
1605 case RAIDFRAME_GET_SIZE:
1606 *(int *) data = raidPtr->totalSectors;
1607 return 0;
1608
1609 case RAIDFRAME_FAIL_DISK:
1610 return rf_fail_disk(raidPtr, data);
1611
1612 /* invoke a copyback operation after recon on whatever disk
1613 * needs it, if any */
1614 case RAIDFRAME_COPYBACK:
1615
1616 if (raidPtr->Layout.map->faultsTolerated == 0) {
1617 /* This makes no sense on a RAID 0!! */
1618 return EINVAL;
1619 }
1620
1621 if (raidPtr->copyback_in_progress == 1) {
1622 /* Copyback is already in progress! */
1623 return EINVAL;
1624 }
1625
1626 return RF_CREATE_THREAD(raidPtr->copyback_thread,
1627 rf_CopybackThread, raidPtr, "raid_copyback");
1628
1629 /* return the percentage completion of reconstruction */
1630 case RAIDFRAME_CHECK_RECON_STATUS:
1631 return rf_check_recon_status(raidPtr, data);
1632
1633 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1634 rf_check_recon_status_ext(raidPtr, data);
1635 return 0;
1636
1637 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1638 if (raidPtr->Layout.map->faultsTolerated == 0) {
1639 /* This makes no sense on a RAID 0, so tell the
1640 user it's done. */
1641 *(int *) data = 100;
1642 return 0;
1643 }
1644 if (raidPtr->parity_rewrite_in_progress == 1) {
1645 *(int *) data = 100 *
1646 raidPtr->parity_rewrite_stripes_done /
1647 raidPtr->Layout.numStripe;
1648 } else {
1649 *(int *) data = 100;
1650 }
1651 return 0;
1652
1653 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1654 rf_check_parityrewrite_status_ext(raidPtr, data);
1655 return 0;
1656
1657 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1658 if (raidPtr->Layout.map->faultsTolerated == 0) {
1659 /* This makes no sense on a RAID 0 */
1660 *(int *) data = 100;
1661 return 0;
1662 }
1663 if (raidPtr->copyback_in_progress == 1) {
1664 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1665 raidPtr->Layout.numStripe;
1666 } else {
1667 *(int *) data = 100;
1668 }
1669 return 0;
1670
1671 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1672 rf_check_copyback_status_ext(raidPtr, data);
1673 return 0;
1674
1675 case RAIDFRAME_SET_LAST_UNIT:
1676 for (column = 0; column < raidPtr->numCol; column++)
1677 if (raidPtr->Disks[column].status != rf_ds_optimal)
1678 return EBUSY;
1679
1680 for (column = 0; column < raidPtr->numCol; column++) {
1681 clabel = raidget_component_label(raidPtr, column);
1682 clabel->last_unit = *(int *)data;
1683 raidflush_component_label(raidPtr, column);
1684 }
1685 rs->sc_cflags |= RAIDF_UNIT_CHANGED;
1686 return 0;
1687
1688 /* the sparetable daemon calls this to wait for the kernel to
1689 * need a spare table. this ioctl does not return until a
1690 * spare table is needed. XXX -- calling mpsleep here in the
1691 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1692 * -- I should either compute the spare table in the kernel,
1693 * or have a different -- XXX XXX -- interface (a different
1694 * character device) for delivering the table -- XXX */
1695 #if RF_DISABLED
1696 case RAIDFRAME_SPARET_WAIT:
1697 rf_lock_mutex2(rf_sparet_wait_mutex);
1698 while (!rf_sparet_wait_queue)
1699 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1700 RF_SparetWait_t *waitreq = rf_sparet_wait_queue;
1701 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1702 rf_unlock_mutex2(rf_sparet_wait_mutex);
1703
1704 /* structure assignment */
1705 *((RF_SparetWait_t *) data) = *waitreq;
1706
1707 RF_Free(waitreq, sizeof(*waitreq));
1708 return 0;
1709
1710 /* wakes up a process waiting on SPARET_WAIT and puts an error
1711 * code in it that will cause the dameon to exit */
1712 case RAIDFRAME_ABORT_SPARET_WAIT:
1713 waitreq = RF_Malloc(sizeof(*waitreq));
1714 waitreq->fcol = -1;
1715 rf_lock_mutex2(rf_sparet_wait_mutex);
1716 waitreq->next = rf_sparet_wait_queue;
1717 rf_sparet_wait_queue = waitreq;
1718 rf_broadcast_cond2(rf_sparet_wait_cv);
1719 rf_unlock_mutex2(rf_sparet_wait_mutex);
1720 return 0;
1721
1722 /* used by the spare table daemon to deliver a spare table
1723 * into the kernel */
1724 case RAIDFRAME_SEND_SPARET:
1725
1726 /* install the spare table */
1727 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1728
1729 /* respond to the requestor. the return status of the spare
1730 * table installation is passed in the "fcol" field */
1731 waitred = RF_Malloc(sizeof(*waitreq));
1732 waitreq->fcol = retcode;
1733 rf_lock_mutex2(rf_sparet_wait_mutex);
1734 waitreq->next = rf_sparet_resp_queue;
1735 rf_sparet_resp_queue = waitreq;
1736 rf_broadcast_cond2(rf_sparet_resp_cv);
1737 rf_unlock_mutex2(rf_sparet_wait_mutex);
1738
1739 return retcode;
1740 #endif
1741 default:
1742 /*
1743 * Don't bother trying to load compat modules
1744 * if it is not our ioctl. This is more efficient
1745 * and makes rump tests not depend on compat code
1746 */
1747 if (IOCGROUP(cmd) != 'r')
1748 break;
1749 #ifdef _LP64
1750 if ((l->l_proc->p_flag & PK_32) != 0) {
1751 module_autoload("compat_netbsd32_raid",
1752 MODULE_CLASS_EXEC);
1753 MODULE_HOOK_CALL(raidframe_netbsd32_ioctl_hook,
1754 (rs, cmd, data), enosys(), retcode);
1755 if (retcode != EPASSTHROUGH)
1756 return retcode;
1757 }
1758 #endif
1759 module_autoload("compat_raid_80", MODULE_CLASS_EXEC);
1760 MODULE_HOOK_CALL(raidframe_ioctl_80_hook,
1761 (rs, cmd, data), enosys(), retcode);
1762 if (retcode != EPASSTHROUGH)
1763 return retcode;
1764
1765 module_autoload("compat_raid_50", MODULE_CLASS_EXEC);
1766 MODULE_HOOK_CALL(raidframe_ioctl_50_hook,
1767 (rs, cmd, data), enosys(), retcode);
1768 if (retcode != EPASSTHROUGH)
1769 return retcode;
1770 break; /* fall through to the os-specific code below */
1771
1772 }
1773
1774 if (!raidPtr->valid)
1775 return (EINVAL);
1776
1777 /*
1778 * Add support for "regular" device ioctls here.
1779 */
1780
1781 switch (cmd) {
1782 case DIOCGCACHE:
1783 retcode = rf_get_component_caches(raidPtr, (int *)data);
1784 break;
1785
1786 case DIOCCACHESYNC:
1787 retcode = rf_sync_component_caches(raidPtr);
1788 break;
1789
1790 default:
1791 retcode = dk_ioctl(dksc, dev, cmd, data, flag, l);
1792 break;
1793 }
1794
1795 return (retcode);
1796
1797 }
1798
1799
1800 /* raidinit -- complete the rest of the initialization for the
1801 RAIDframe device. */
1802
1803
1804 static void
1805 raidinit(struct raid_softc *rs)
1806 {
1807 cfdata_t cf;
1808 unsigned int unit;
1809 struct dk_softc *dksc = &rs->sc_dksc;
1810 RF_Raid_t *raidPtr = &rs->sc_r;
1811 device_t dev;
1812
1813 unit = raidPtr->raidid;
1814
1815 /* XXX doesn't check bounds. */
1816 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%u", unit);
1817
1818 /* attach the pseudo device */
1819 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1820 cf->cf_name = raid_cd.cd_name;
1821 cf->cf_atname = raid_cd.cd_name;
1822 cf->cf_unit = unit;
1823 cf->cf_fstate = FSTATE_STAR;
1824
1825 dev = config_attach_pseudo(cf);
1826 if (dev == NULL) {
1827 printf("raid%d: config_attach_pseudo failed\n",
1828 raidPtr->raidid);
1829 free(cf, M_RAIDFRAME);
1830 return;
1831 }
1832
1833 /* provide a backpointer to the real softc */
1834 raidsoftc(dev) = rs;
1835
1836 /* disk_attach actually creates space for the CPU disklabel, among
1837 * other things, so it's critical to call this *BEFORE* we try putzing
1838 * with disklabels. */
1839 dk_init(dksc, dev, DKTYPE_RAID);
1840 disk_init(&dksc->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1841
1842 /* XXX There may be a weird interaction here between this, and
1843 * protectedSectors, as used in RAIDframe. */
1844
1845 rs->sc_size = raidPtr->totalSectors;
1846
1847 /* Attach dk and disk subsystems */
1848 dk_attach(dksc);
1849 disk_attach(&dksc->sc_dkdev);
1850 rf_set_geometry(rs, raidPtr);
1851
1852 bufq_alloc(&dksc->sc_bufq, "fcfs", BUFQ_SORT_RAWBLOCK);
1853
1854 /* mark unit as usuable */
1855 rs->sc_flags |= RAIDF_INITED;
1856
1857 dkwedge_discover(&dksc->sc_dkdev);
1858 }
1859
1860 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1861 /* wake up the daemon & tell it to get us a spare table
1862 * XXX
1863 * the entries in the queues should be tagged with the raidPtr
1864 * so that in the extremely rare case that two recons happen at once,
1865 * we know for which device were requesting a spare table
1866 * XXX
1867 *
1868 * XXX This code is not currently used. GO
1869 */
1870 int
1871 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1872 {
1873 int retcode;
1874
1875 rf_lock_mutex2(rf_sparet_wait_mutex);
1876 req->next = rf_sparet_wait_queue;
1877 rf_sparet_wait_queue = req;
1878 rf_broadcast_cond2(rf_sparet_wait_cv);
1879
1880 /* mpsleep unlocks the mutex */
1881 while (!rf_sparet_resp_queue) {
1882 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
1883 }
1884 req = rf_sparet_resp_queue;
1885 rf_sparet_resp_queue = req->next;
1886 rf_unlock_mutex2(rf_sparet_wait_mutex);
1887
1888 retcode = req->fcol;
1889 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1890 * alloc'd */
1891 return (retcode);
1892 }
1893 #endif
1894
1895 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1896 * bp & passes it down.
1897 * any calls originating in the kernel must use non-blocking I/O
1898 * do some extra sanity checking to return "appropriate" error values for
1899 * certain conditions (to make some standard utilities work)
1900 *
1901 * Formerly known as: rf_DoAccessKernel
1902 */
1903 void
1904 raidstart(RF_Raid_t *raidPtr)
1905 {
1906 struct raid_softc *rs;
1907 struct dk_softc *dksc;
1908
1909 rs = raidPtr->softc;
1910 dksc = &rs->sc_dksc;
1911 /* quick check to see if anything has died recently */
1912 rf_lock_mutex2(raidPtr->mutex);
1913 if (raidPtr->numNewFailures > 0) {
1914 rf_unlock_mutex2(raidPtr->mutex);
1915 rf_update_component_labels(raidPtr,
1916 RF_NORMAL_COMPONENT_UPDATE);
1917 rf_lock_mutex2(raidPtr->mutex);
1918 raidPtr->numNewFailures--;
1919 }
1920 rf_unlock_mutex2(raidPtr->mutex);
1921
1922 if ((rs->sc_flags & RAIDF_INITED) == 0) {
1923 printf("raid%d: raidstart not ready\n", raidPtr->raidid);
1924 return;
1925 }
1926
1927 dk_start(dksc, NULL);
1928 }
1929
1930 static int
1931 raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp)
1932 {
1933 RF_SectorCount_t num_blocks, pb, sum;
1934 RF_RaidAddr_t raid_addr;
1935 daddr_t blocknum;
1936 int do_async;
1937 int rc;
1938
1939 rf_lock_mutex2(raidPtr->mutex);
1940 if (raidPtr->openings == 0) {
1941 rf_unlock_mutex2(raidPtr->mutex);
1942 return EAGAIN;
1943 }
1944 rf_unlock_mutex2(raidPtr->mutex);
1945
1946 blocknum = bp->b_rawblkno;
1947
1948 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1949 (int) blocknum));
1950
1951 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1952 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1953
1954 /* *THIS* is where we adjust what block we're going to...
1955 * but DO NOT TOUCH bp->b_blkno!!! */
1956 raid_addr = blocknum;
1957
1958 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1959 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1960 sum = raid_addr + num_blocks + pb;
1961 if (1 || rf_debugKernelAccess) {
1962 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1963 (int) raid_addr, (int) sum, (int) num_blocks,
1964 (int) pb, (int) bp->b_resid));
1965 }
1966 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1967 || (sum < num_blocks) || (sum < pb)) {
1968 rc = ENOSPC;
1969 goto done;
1970 }
1971 /*
1972 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1973 */
1974
1975 if (bp->b_bcount & raidPtr->sectorMask) {
1976 rc = ENOSPC;
1977 goto done;
1978 }
1979 db1_printf(("Calling DoAccess..\n"));
1980
1981
1982 rf_lock_mutex2(raidPtr->mutex);
1983 raidPtr->openings--;
1984 rf_unlock_mutex2(raidPtr->mutex);
1985
1986 /*
1987 * Everything is async.
1988 */
1989 do_async = 1;
1990
1991 /* don't ever condition on bp->b_flags & B_WRITE.
1992 * always condition on B_READ instead */
1993
1994 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1995 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1996 do_async, raid_addr, num_blocks,
1997 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1998
1999 done:
2000 return rc;
2001 }
2002
2003 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
2004
2005 int
2006 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
2007 {
2008 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2009 struct buf *bp;
2010
2011 req->queue = queue;
2012 bp = req->bp;
2013
2014 switch (req->type) {
2015 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2016 /* XXX need to do something extra here.. */
2017 /* I'm leaving this in, as I've never actually seen it used,
2018 * and I'd like folks to report it... GO */
2019 printf(("WAKEUP CALLED\n"));
2020 queue->numOutstanding++;
2021
2022 bp->b_flags = 0;
2023 bp->b_private = req;
2024
2025 KernelWakeupFunc(bp);
2026 break;
2027
2028 case RF_IO_TYPE_READ:
2029 case RF_IO_TYPE_WRITE:
2030 #if RF_ACC_TRACE > 0
2031 if (req->tracerec) {
2032 RF_ETIMER_START(req->tracerec->timer);
2033 }
2034 #endif
2035 InitBP(bp, queue->rf_cinfo->ci_vp,
2036 op, queue->rf_cinfo->ci_dev,
2037 req->sectorOffset, req->numSector,
2038 req->buf, KernelWakeupFunc, (void *) req,
2039 queue->raidPtr->logBytesPerSector, req->b_proc);
2040
2041 if (rf_debugKernelAccess) {
2042 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2043 (long) bp->b_blkno));
2044 }
2045 queue->numOutstanding++;
2046 queue->last_deq_sector = req->sectorOffset;
2047 /* acc wouldn't have been let in if there were any pending
2048 * reqs at any other priority */
2049 queue->curPriority = req->priority;
2050
2051 db1_printf(("Going for %c to unit %d col %d\n",
2052 req->type, queue->raidPtr->raidid,
2053 queue->col));
2054 db1_printf(("sector %d count %d (%d bytes) %d\n",
2055 (int) req->sectorOffset, (int) req->numSector,
2056 (int) (req->numSector <<
2057 queue->raidPtr->logBytesPerSector),
2058 (int) queue->raidPtr->logBytesPerSector));
2059
2060 /*
2061 * XXX: drop lock here since this can block at
2062 * least with backing SCSI devices. Retake it
2063 * to minimize fuss with calling interfaces.
2064 */
2065
2066 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2067 bdev_strategy(bp);
2068 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2069 break;
2070
2071 default:
2072 panic("bad req->type in rf_DispatchKernelIO");
2073 }
2074 db1_printf(("Exiting from DispatchKernelIO\n"));
2075
2076 return (0);
2077 }
2078 /* this is the callback function associated with a I/O invoked from
2079 kernel code.
2080 */
2081 static void
2082 KernelWakeupFunc(struct buf *bp)
2083 {
2084 RF_DiskQueueData_t *req = NULL;
2085 RF_DiskQueue_t *queue;
2086
2087 db1_printf(("recovering the request queue:\n"));
2088
2089 req = bp->b_private;
2090
2091 queue = (RF_DiskQueue_t *) req->queue;
2092
2093 rf_lock_mutex2(queue->raidPtr->iodone_lock);
2094
2095 #if RF_ACC_TRACE > 0
2096 if (req->tracerec) {
2097 RF_ETIMER_STOP(req->tracerec->timer);
2098 RF_ETIMER_EVAL(req->tracerec->timer);
2099 rf_lock_mutex2(rf_tracing_mutex);
2100 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2101 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2102 req->tracerec->num_phys_ios++;
2103 rf_unlock_mutex2(rf_tracing_mutex);
2104 }
2105 #endif
2106
2107 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2108 * ballistic, and mark the component as hosed... */
2109
2110 if (bp->b_error != 0) {
2111 /* Mark the disk as dead */
2112 /* but only mark it once... */
2113 /* and only if it wouldn't leave this RAID set
2114 completely broken */
2115 if (((queue->raidPtr->Disks[queue->col].status ==
2116 rf_ds_optimal) ||
2117 (queue->raidPtr->Disks[queue->col].status ==
2118 rf_ds_used_spare)) &&
2119 (queue->raidPtr->numFailures <
2120 queue->raidPtr->Layout.map->faultsTolerated)) {
2121 printf("raid%d: IO Error (%d). Marking %s as failed.\n",
2122 queue->raidPtr->raidid,
2123 bp->b_error,
2124 queue->raidPtr->Disks[queue->col].devname);
2125 queue->raidPtr->Disks[queue->col].status =
2126 rf_ds_failed;
2127 queue->raidPtr->status = rf_rs_degraded;
2128 queue->raidPtr->numFailures++;
2129 queue->raidPtr->numNewFailures++;
2130 } else { /* Disk is already dead... */
2131 /* printf("Disk already marked as dead!\n"); */
2132 }
2133
2134 }
2135
2136 /* Fill in the error value */
2137 req->error = bp->b_error;
2138
2139 /* Drop this one on the "finished" queue... */
2140 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2141
2142 /* Let the raidio thread know there is work to be done. */
2143 rf_signal_cond2(queue->raidPtr->iodone_cv);
2144
2145 rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2146 }
2147
2148
2149 /*
2150 * initialize a buf structure for doing an I/O in the kernel.
2151 */
2152 static void
2153 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2154 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2155 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2156 struct proc *b_proc)
2157 {
2158 /* bp->b_flags = B_PHYS | rw_flag; */
2159 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */
2160 bp->b_oflags = 0;
2161 bp->b_cflags = 0;
2162 bp->b_bcount = numSect << logBytesPerSector;
2163 bp->b_bufsize = bp->b_bcount;
2164 bp->b_error = 0;
2165 bp->b_dev = dev;
2166 bp->b_data = bf;
2167 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2168 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2169 if (bp->b_bcount == 0) {
2170 panic("bp->b_bcount is zero in InitBP!!");
2171 }
2172 bp->b_proc = b_proc;
2173 bp->b_iodone = cbFunc;
2174 bp->b_private = cbArg;
2175 }
2176
2177 /*
2178 * Wait interruptibly for an exclusive lock.
2179 *
2180 * XXX
2181 * Several drivers do this; it should be abstracted and made MP-safe.
2182 * (Hmm... where have we seen this warning before :-> GO )
2183 */
2184 static int
2185 raidlock(struct raid_softc *rs)
2186 {
2187 int error;
2188
2189 error = 0;
2190 mutex_enter(&rs->sc_mutex);
2191 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2192 rs->sc_flags |= RAIDF_WANTED;
2193 error = cv_wait_sig(&rs->sc_cv, &rs->sc_mutex);
2194 if (error != 0)
2195 goto done;
2196 }
2197 rs->sc_flags |= RAIDF_LOCKED;
2198 done:
2199 mutex_exit(&rs->sc_mutex);
2200 return (error);
2201 }
2202 /*
2203 * Unlock and wake up any waiters.
2204 */
2205 static void
2206 raidunlock(struct raid_softc *rs)
2207 {
2208
2209 mutex_enter(&rs->sc_mutex);
2210 rs->sc_flags &= ~RAIDF_LOCKED;
2211 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2212 rs->sc_flags &= ~RAIDF_WANTED;
2213 cv_broadcast(&rs->sc_cv);
2214 }
2215 mutex_exit(&rs->sc_mutex);
2216 }
2217
2218
2219 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2220 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2221 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2222
2223 static daddr_t
2224 rf_component_info_offset(void)
2225 {
2226
2227 return RF_COMPONENT_INFO_OFFSET;
2228 }
2229
2230 static daddr_t
2231 rf_component_info_size(unsigned secsize)
2232 {
2233 daddr_t info_size;
2234
2235 KASSERT(secsize);
2236 if (secsize > RF_COMPONENT_INFO_SIZE)
2237 info_size = secsize;
2238 else
2239 info_size = RF_COMPONENT_INFO_SIZE;
2240
2241 return info_size;
2242 }
2243
2244 static daddr_t
2245 rf_parity_map_offset(RF_Raid_t *raidPtr)
2246 {
2247 daddr_t map_offset;
2248
2249 KASSERT(raidPtr->bytesPerSector);
2250 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2251 map_offset = raidPtr->bytesPerSector;
2252 else
2253 map_offset = RF_COMPONENT_INFO_SIZE;
2254 map_offset += rf_component_info_offset();
2255
2256 return map_offset;
2257 }
2258
2259 static daddr_t
2260 rf_parity_map_size(RF_Raid_t *raidPtr)
2261 {
2262 daddr_t map_size;
2263
2264 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2265 map_size = raidPtr->bytesPerSector;
2266 else
2267 map_size = RF_PARITY_MAP_SIZE;
2268
2269 return map_size;
2270 }
2271
2272 int
2273 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2274 {
2275 RF_ComponentLabel_t *clabel;
2276
2277 clabel = raidget_component_label(raidPtr, col);
2278 clabel->clean = RF_RAID_CLEAN;
2279 raidflush_component_label(raidPtr, col);
2280 return(0);
2281 }
2282
2283
2284 int
2285 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2286 {
2287 RF_ComponentLabel_t *clabel;
2288
2289 clabel = raidget_component_label(raidPtr, col);
2290 clabel->clean = RF_RAID_DIRTY;
2291 raidflush_component_label(raidPtr, col);
2292 return(0);
2293 }
2294
2295 int
2296 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2297 {
2298 KASSERT(raidPtr->bytesPerSector);
2299 return raidread_component_label(raidPtr->bytesPerSector,
2300 raidPtr->Disks[col].dev,
2301 raidPtr->raid_cinfo[col].ci_vp,
2302 &raidPtr->raid_cinfo[col].ci_label);
2303 }
2304
2305 RF_ComponentLabel_t *
2306 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2307 {
2308 return &raidPtr->raid_cinfo[col].ci_label;
2309 }
2310
2311 int
2312 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2313 {
2314 RF_ComponentLabel_t *label;
2315
2316 label = &raidPtr->raid_cinfo[col].ci_label;
2317 label->mod_counter = raidPtr->mod_counter;
2318 #ifndef RF_NO_PARITY_MAP
2319 label->parity_map_modcount = label->mod_counter;
2320 #endif
2321 return raidwrite_component_label(raidPtr->bytesPerSector,
2322 raidPtr->Disks[col].dev,
2323 raidPtr->raid_cinfo[col].ci_vp, label);
2324 }
2325
2326
2327 static int
2328 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2329 RF_ComponentLabel_t *clabel)
2330 {
2331 return raidread_component_area(dev, b_vp, clabel,
2332 sizeof(RF_ComponentLabel_t),
2333 rf_component_info_offset(),
2334 rf_component_info_size(secsize));
2335 }
2336
2337 /* ARGSUSED */
2338 static int
2339 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2340 size_t msize, daddr_t offset, daddr_t dsize)
2341 {
2342 struct buf *bp;
2343 int error;
2344
2345 /* XXX should probably ensure that we don't try to do this if
2346 someone has changed rf_protected_sectors. */
2347
2348 if (b_vp == NULL) {
2349 /* For whatever reason, this component is not valid.
2350 Don't try to read a component label from it. */
2351 return(EINVAL);
2352 }
2353
2354 /* get a block of the appropriate size... */
2355 bp = geteblk((int)dsize);
2356 bp->b_dev = dev;
2357
2358 /* get our ducks in a row for the read */
2359 bp->b_blkno = offset / DEV_BSIZE;
2360 bp->b_bcount = dsize;
2361 bp->b_flags |= B_READ;
2362 bp->b_resid = dsize;
2363
2364 bdev_strategy(bp);
2365 error = biowait(bp);
2366
2367 if (!error) {
2368 memcpy(data, bp->b_data, msize);
2369 }
2370
2371 brelse(bp, 0);
2372 return(error);
2373 }
2374
2375
2376 static int
2377 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2378 RF_ComponentLabel_t *clabel)
2379 {
2380 return raidwrite_component_area(dev, b_vp, clabel,
2381 sizeof(RF_ComponentLabel_t),
2382 rf_component_info_offset(),
2383 rf_component_info_size(secsize), 0);
2384 }
2385
2386 /* ARGSUSED */
2387 static int
2388 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2389 size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2390 {
2391 struct buf *bp;
2392 int error;
2393
2394 /* get a block of the appropriate size... */
2395 bp = geteblk((int)dsize);
2396 bp->b_dev = dev;
2397
2398 /* get our ducks in a row for the write */
2399 bp->b_blkno = offset / DEV_BSIZE;
2400 bp->b_bcount = dsize;
2401 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2402 bp->b_resid = dsize;
2403
2404 memset(bp->b_data, 0, dsize);
2405 memcpy(bp->b_data, data, msize);
2406
2407 bdev_strategy(bp);
2408 if (asyncp)
2409 return 0;
2410 error = biowait(bp);
2411 brelse(bp, 0);
2412 if (error) {
2413 #if 1
2414 printf("Failed to write RAID component info!\n");
2415 #endif
2416 }
2417
2418 return(error);
2419 }
2420
2421 void
2422 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2423 {
2424 int c;
2425
2426 for (c = 0; c < raidPtr->numCol; c++) {
2427 /* Skip dead disks. */
2428 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2429 continue;
2430 /* XXXjld: what if an error occurs here? */
2431 raidwrite_component_area(raidPtr->Disks[c].dev,
2432 raidPtr->raid_cinfo[c].ci_vp, map,
2433 RF_PARITYMAP_NBYTE,
2434 rf_parity_map_offset(raidPtr),
2435 rf_parity_map_size(raidPtr), 0);
2436 }
2437 }
2438
2439 void
2440 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2441 {
2442 struct rf_paritymap_ondisk tmp;
2443 int c,first;
2444
2445 first=1;
2446 for (c = 0; c < raidPtr->numCol; c++) {
2447 /* Skip dead disks. */
2448 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2449 continue;
2450 raidread_component_area(raidPtr->Disks[c].dev,
2451 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2452 RF_PARITYMAP_NBYTE,
2453 rf_parity_map_offset(raidPtr),
2454 rf_parity_map_size(raidPtr));
2455 if (first) {
2456 memcpy(map, &tmp, sizeof(*map));
2457 first = 0;
2458 } else {
2459 rf_paritymap_merge(map, &tmp);
2460 }
2461 }
2462 }
2463
2464 void
2465 rf_markalldirty(RF_Raid_t *raidPtr)
2466 {
2467 RF_ComponentLabel_t *clabel;
2468 int sparecol;
2469 int c;
2470 int j;
2471 int scol = -1;
2472
2473 raidPtr->mod_counter++;
2474 for (c = 0; c < raidPtr->numCol; c++) {
2475 /* we don't want to touch (at all) a disk that has
2476 failed */
2477 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2478 clabel = raidget_component_label(raidPtr, c);
2479 if (clabel->status == rf_ds_spared) {
2480 /* XXX do something special...
2481 but whatever you do, don't
2482 try to access it!! */
2483 } else {
2484 raidmarkdirty(raidPtr, c);
2485 }
2486 }
2487 }
2488
2489 for( c = 0; c < raidPtr->numSpare ; c++) {
2490 sparecol = raidPtr->numCol + c;
2491 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2492 /*
2493
2494 we claim this disk is "optimal" if it's
2495 rf_ds_used_spare, as that means it should be
2496 directly substitutable for the disk it replaced.
2497 We note that too...
2498
2499 */
2500
2501 for(j=0;j<raidPtr->numCol;j++) {
2502 if (raidPtr->Disks[j].spareCol == sparecol) {
2503 scol = j;
2504 break;
2505 }
2506 }
2507
2508 clabel = raidget_component_label(raidPtr, sparecol);
2509 /* make sure status is noted */
2510
2511 raid_init_component_label(raidPtr, clabel);
2512
2513 clabel->row = 0;
2514 clabel->column = scol;
2515 /* Note: we *don't* change status from rf_ds_used_spare
2516 to rf_ds_optimal */
2517 /* clabel.status = rf_ds_optimal; */
2518
2519 raidmarkdirty(raidPtr, sparecol);
2520 }
2521 }
2522 }
2523
2524
2525 void
2526 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2527 {
2528 RF_ComponentLabel_t *clabel;
2529 int sparecol;
2530 int c;
2531 int j;
2532 int scol;
2533 struct raid_softc *rs = raidPtr->softc;
2534
2535 scol = -1;
2536
2537 /* XXX should do extra checks to make sure things really are clean,
2538 rather than blindly setting the clean bit... */
2539
2540 raidPtr->mod_counter++;
2541
2542 for (c = 0; c < raidPtr->numCol; c++) {
2543 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2544 clabel = raidget_component_label(raidPtr, c);
2545 /* make sure status is noted */
2546 clabel->status = rf_ds_optimal;
2547
2548 /* note what unit we are configured as */
2549 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2550 clabel->last_unit = raidPtr->raidid;
2551
2552 raidflush_component_label(raidPtr, c);
2553 if (final == RF_FINAL_COMPONENT_UPDATE) {
2554 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2555 raidmarkclean(raidPtr, c);
2556 }
2557 }
2558 }
2559 /* else we don't touch it.. */
2560 }
2561
2562 for( c = 0; c < raidPtr->numSpare ; c++) {
2563 sparecol = raidPtr->numCol + c;
2564 /* Need to ensure that the reconstruct actually completed! */
2565 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2566 /*
2567
2568 we claim this disk is "optimal" if it's
2569 rf_ds_used_spare, as that means it should be
2570 directly substitutable for the disk it replaced.
2571 We note that too...
2572
2573 */
2574
2575 for(j=0;j<raidPtr->numCol;j++) {
2576 if (raidPtr->Disks[j].spareCol == sparecol) {
2577 scol = j;
2578 break;
2579 }
2580 }
2581
2582 /* XXX shouldn't *really* need this... */
2583 clabel = raidget_component_label(raidPtr, sparecol);
2584 /* make sure status is noted */
2585
2586 raid_init_component_label(raidPtr, clabel);
2587
2588 clabel->column = scol;
2589 clabel->status = rf_ds_optimal;
2590 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2591 clabel->last_unit = raidPtr->raidid;
2592
2593 raidflush_component_label(raidPtr, sparecol);
2594 if (final == RF_FINAL_COMPONENT_UPDATE) {
2595 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2596 raidmarkclean(raidPtr, sparecol);
2597 }
2598 }
2599 }
2600 }
2601 }
2602
2603 void
2604 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2605 {
2606
2607 if (vp != NULL) {
2608 if (auto_configured == 1) {
2609 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2610 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2611 vput(vp);
2612
2613 } else {
2614 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2615 }
2616 }
2617 }
2618
2619
2620 void
2621 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2622 {
2623 int r,c;
2624 struct vnode *vp;
2625 int acd;
2626
2627
2628 /* We take this opportunity to close the vnodes like we should.. */
2629
2630 for (c = 0; c < raidPtr->numCol; c++) {
2631 vp = raidPtr->raid_cinfo[c].ci_vp;
2632 acd = raidPtr->Disks[c].auto_configured;
2633 rf_close_component(raidPtr, vp, acd);
2634 raidPtr->raid_cinfo[c].ci_vp = NULL;
2635 raidPtr->Disks[c].auto_configured = 0;
2636 }
2637
2638 for (r = 0; r < raidPtr->numSpare; r++) {
2639 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2640 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2641 rf_close_component(raidPtr, vp, acd);
2642 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2643 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2644 }
2645 }
2646
2647
2648 void
2649 rf_ReconThread(struct rf_recon_req_internal *req)
2650 {
2651 int s;
2652 RF_Raid_t *raidPtr;
2653
2654 s = splbio();
2655 raidPtr = (RF_Raid_t *) req->raidPtr;
2656 raidPtr->recon_in_progress = 1;
2657
2658 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2659 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2660
2661 RF_Free(req, sizeof(*req));
2662
2663 raidPtr->recon_in_progress = 0;
2664 splx(s);
2665
2666 /* That's all... */
2667 kthread_exit(0); /* does not return */
2668 }
2669
2670 void
2671 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2672 {
2673 int retcode;
2674 int s;
2675
2676 raidPtr->parity_rewrite_stripes_done = 0;
2677 raidPtr->parity_rewrite_in_progress = 1;
2678 s = splbio();
2679 retcode = rf_RewriteParity(raidPtr);
2680 splx(s);
2681 if (retcode) {
2682 printf("raid%d: Error re-writing parity (%d)!\n",
2683 raidPtr->raidid, retcode);
2684 } else {
2685 /* set the clean bit! If we shutdown correctly,
2686 the clean bit on each component label will get
2687 set */
2688 raidPtr->parity_good = RF_RAID_CLEAN;
2689 }
2690 raidPtr->parity_rewrite_in_progress = 0;
2691
2692 /* Anyone waiting for us to stop? If so, inform them... */
2693 if (raidPtr->waitShutdown) {
2694 rf_lock_mutex2(raidPtr->rad_lock);
2695 cv_broadcast(&raidPtr->parity_rewrite_cv);
2696 rf_unlock_mutex2(raidPtr->rad_lock);
2697 }
2698
2699 /* That's all... */
2700 kthread_exit(0); /* does not return */
2701 }
2702
2703
2704 void
2705 rf_CopybackThread(RF_Raid_t *raidPtr)
2706 {
2707 int s;
2708
2709 raidPtr->copyback_in_progress = 1;
2710 s = splbio();
2711 rf_CopybackReconstructedData(raidPtr);
2712 splx(s);
2713 raidPtr->copyback_in_progress = 0;
2714
2715 /* That's all... */
2716 kthread_exit(0); /* does not return */
2717 }
2718
2719
2720 void
2721 rf_ReconstructInPlaceThread(struct rf_recon_req_internal *req)
2722 {
2723 int s;
2724 RF_Raid_t *raidPtr;
2725
2726 s = splbio();
2727 raidPtr = req->raidPtr;
2728 raidPtr->recon_in_progress = 1;
2729 rf_ReconstructInPlace(raidPtr, req->col);
2730 RF_Free(req, sizeof(*req));
2731 raidPtr->recon_in_progress = 0;
2732 splx(s);
2733
2734 /* That's all... */
2735 kthread_exit(0); /* does not return */
2736 }
2737
2738 static RF_AutoConfig_t *
2739 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
2740 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
2741 unsigned secsize)
2742 {
2743 int good_one = 0;
2744 RF_ComponentLabel_t *clabel;
2745 RF_AutoConfig_t *ac;
2746
2747 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
2748 if (clabel == NULL) {
2749 oomem:
2750 while(ac_list) {
2751 ac = ac_list;
2752 if (ac->clabel)
2753 free(ac->clabel, M_RAIDFRAME);
2754 ac_list = ac_list->next;
2755 free(ac, M_RAIDFRAME);
2756 }
2757 printf("RAID auto config: out of memory!\n");
2758 return NULL; /* XXX probably should panic? */
2759 }
2760
2761 if (!raidread_component_label(secsize, dev, vp, clabel)) {
2762 /* Got the label. Does it look reasonable? */
2763 if (rf_reasonable_label(clabel, numsecs) &&
2764 (rf_component_label_partitionsize(clabel) <= size)) {
2765 #ifdef DEBUG
2766 printf("Component on: %s: %llu\n",
2767 cname, (unsigned long long)size);
2768 rf_print_component_label(clabel);
2769 #endif
2770 /* if it's reasonable, add it, else ignore it. */
2771 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
2772 M_NOWAIT);
2773 if (ac == NULL) {
2774 free(clabel, M_RAIDFRAME);
2775 goto oomem;
2776 }
2777 strlcpy(ac->devname, cname, sizeof(ac->devname));
2778 ac->dev = dev;
2779 ac->vp = vp;
2780 ac->clabel = clabel;
2781 ac->next = ac_list;
2782 ac_list = ac;
2783 good_one = 1;
2784 }
2785 }
2786 if (!good_one) {
2787 /* cleanup */
2788 free(clabel, M_RAIDFRAME);
2789 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2790 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2791 vput(vp);
2792 }
2793 return ac_list;
2794 }
2795
2796 RF_AutoConfig_t *
2797 rf_find_raid_components(void)
2798 {
2799 struct vnode *vp;
2800 struct disklabel label;
2801 device_t dv;
2802 deviter_t di;
2803 dev_t dev;
2804 int bmajor, bminor, wedge, rf_part_found;
2805 int error;
2806 int i;
2807 RF_AutoConfig_t *ac_list;
2808 uint64_t numsecs;
2809 unsigned secsize;
2810 int dowedges;
2811
2812 /* initialize the AutoConfig list */
2813 ac_list = NULL;
2814
2815 /*
2816 * we begin by trolling through *all* the devices on the system *twice*
2817 * first we scan for wedges, second for other devices. This avoids
2818 * using a raw partition instead of a wedge that covers the whole disk
2819 */
2820
2821 for (dowedges=1; dowedges>=0; --dowedges) {
2822 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
2823 dv = deviter_next(&di)) {
2824
2825 /* we are only interested in disks... */
2826 if (device_class(dv) != DV_DISK)
2827 continue;
2828
2829 /* we don't care about floppies... */
2830 if (device_is_a(dv, "fd")) {
2831 continue;
2832 }
2833
2834 /* we don't care about CD's... */
2835 if (device_is_a(dv, "cd")) {
2836 continue;
2837 }
2838
2839 /* we don't care about md's... */
2840 if (device_is_a(dv, "md")) {
2841 continue;
2842 }
2843
2844 /* hdfd is the Atari/Hades floppy driver */
2845 if (device_is_a(dv, "hdfd")) {
2846 continue;
2847 }
2848
2849 /* fdisa is the Atari/Milan floppy driver */
2850 if (device_is_a(dv, "fdisa")) {
2851 continue;
2852 }
2853
2854 /* are we in the wedges pass ? */
2855 wedge = device_is_a(dv, "dk");
2856 if (wedge != dowedges) {
2857 continue;
2858 }
2859
2860 /* need to find the device_name_to_block_device_major stuff */
2861 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
2862
2863 rf_part_found = 0; /*No raid partition as yet*/
2864
2865 /* get a vnode for the raw partition of this disk */
2866 bminor = minor(device_unit(dv));
2867 dev = wedge ? makedev(bmajor, bminor) :
2868 MAKEDISKDEV(bmajor, bminor, RAW_PART);
2869 if (bdevvp(dev, &vp))
2870 panic("RAID can't alloc vnode");
2871
2872 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2873 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
2874
2875 if (error) {
2876 /* "Who cares." Continue looking
2877 for something that exists*/
2878 vput(vp);
2879 continue;
2880 }
2881
2882 error = getdisksize(vp, &numsecs, &secsize);
2883 if (error) {
2884 /*
2885 * Pseudo devices like vnd and cgd can be
2886 * opened but may still need some configuration.
2887 * Ignore these quietly.
2888 */
2889 if (error != ENXIO)
2890 printf("RAIDframe: can't get disk size"
2891 " for dev %s (%d)\n",
2892 device_xname(dv), error);
2893 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2894 vput(vp);
2895 continue;
2896 }
2897 if (wedge) {
2898 struct dkwedge_info dkw;
2899 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
2900 NOCRED);
2901 if (error) {
2902 printf("RAIDframe: can't get wedge info for "
2903 "dev %s (%d)\n", device_xname(dv), error);
2904 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2905 vput(vp);
2906 continue;
2907 }
2908
2909 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
2910 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2911 vput(vp);
2912 continue;
2913 }
2914
2915 VOP_UNLOCK(vp);
2916 ac_list = rf_get_component(ac_list, dev, vp,
2917 device_xname(dv), dkw.dkw_size, numsecs, secsize);
2918 rf_part_found = 1; /*There is a raid component on this disk*/
2919 continue;
2920 }
2921
2922 /* Ok, the disk exists. Go get the disklabel. */
2923 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
2924 if (error) {
2925 /*
2926 * XXX can't happen - open() would
2927 * have errored out (or faked up one)
2928 */
2929 if (error != ENOTTY)
2930 printf("RAIDframe: can't get label for dev "
2931 "%s (%d)\n", device_xname(dv), error);
2932 }
2933
2934 /* don't need this any more. We'll allocate it again
2935 a little later if we really do... */
2936 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2937 vput(vp);
2938
2939 if (error)
2940 continue;
2941
2942 rf_part_found = 0; /*No raid partitions yet*/
2943 for (i = 0; i < label.d_npartitions; i++) {
2944 char cname[sizeof(ac_list->devname)];
2945
2946 /* We only support partitions marked as RAID */
2947 if (label.d_partitions[i].p_fstype != FS_RAID)
2948 continue;
2949
2950 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
2951 if (bdevvp(dev, &vp))
2952 panic("RAID can't alloc vnode");
2953
2954 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2955 error = VOP_OPEN(vp, FREAD, NOCRED);
2956 if (error) {
2957 /* Whatever... */
2958 vput(vp);
2959 continue;
2960 }
2961 VOP_UNLOCK(vp);
2962 snprintf(cname, sizeof(cname), "%s%c",
2963 device_xname(dv), 'a' + i);
2964 ac_list = rf_get_component(ac_list, dev, vp, cname,
2965 label.d_partitions[i].p_size, numsecs, secsize);
2966 rf_part_found = 1; /*There is at least one raid partition on this disk*/
2967 }
2968
2969 /*
2970 *If there is no raid component on this disk, either in a
2971 *disklabel or inside a wedge, check the raw partition as well,
2972 *as it is possible to configure raid components on raw disk
2973 *devices.
2974 */
2975
2976 if (!rf_part_found) {
2977 char cname[sizeof(ac_list->devname)];
2978
2979 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
2980 if (bdevvp(dev, &vp))
2981 panic("RAID can't alloc vnode");
2982
2983 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2984
2985 error = VOP_OPEN(vp, FREAD, NOCRED);
2986 if (error) {
2987 /* Whatever... */
2988 vput(vp);
2989 continue;
2990 }
2991 VOP_UNLOCK(vp);
2992 snprintf(cname, sizeof(cname), "%s%c",
2993 device_xname(dv), 'a' + RAW_PART);
2994 ac_list = rf_get_component(ac_list, dev, vp, cname,
2995 label.d_partitions[RAW_PART].p_size, numsecs, secsize);
2996 }
2997 }
2998 deviter_release(&di);
2999 }
3000 return ac_list;
3001 }
3002
3003
3004 int
3005 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3006 {
3007
3008 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
3009 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
3010 ((clabel->clean == RF_RAID_CLEAN) ||
3011 (clabel->clean == RF_RAID_DIRTY)) &&
3012 clabel->row >=0 &&
3013 clabel->column >= 0 &&
3014 clabel->num_rows > 0 &&
3015 clabel->num_columns > 0 &&
3016 clabel->row < clabel->num_rows &&
3017 clabel->column < clabel->num_columns &&
3018 clabel->blockSize > 0 &&
3019 /*
3020 * numBlocksHi may contain garbage, but it is ok since
3021 * the type is unsigned. If it is really garbage,
3022 * rf_fix_old_label_size() will fix it.
3023 */
3024 rf_component_label_numblocks(clabel) > 0) {
3025 /*
3026 * label looks reasonable enough...
3027 * let's make sure it has no old garbage.
3028 */
3029 if (numsecs)
3030 rf_fix_old_label_size(clabel, numsecs);
3031 return(1);
3032 }
3033 return(0);
3034 }
3035
3036
3037 /*
3038 * For reasons yet unknown, some old component labels have garbage in
3039 * the newer numBlocksHi region, and this causes lossage. Since those
3040 * disks will also have numsecs set to less than 32 bits of sectors,
3041 * we can determine when this corruption has occurred, and fix it.
3042 *
3043 * The exact same problem, with the same unknown reason, happens to
3044 * the partitionSizeHi member as well.
3045 */
3046 static void
3047 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3048 {
3049
3050 if (numsecs < ((uint64_t)1 << 32)) {
3051 if (clabel->numBlocksHi) {
3052 printf("WARNING: total sectors < 32 bits, yet "
3053 "numBlocksHi set\n"
3054 "WARNING: resetting numBlocksHi to zero.\n");
3055 clabel->numBlocksHi = 0;
3056 }
3057
3058 if (clabel->partitionSizeHi) {
3059 printf("WARNING: total sectors < 32 bits, yet "
3060 "partitionSizeHi set\n"
3061 "WARNING: resetting partitionSizeHi to zero.\n");
3062 clabel->partitionSizeHi = 0;
3063 }
3064 }
3065 }
3066
3067
3068 #ifdef DEBUG
3069 void
3070 rf_print_component_label(RF_ComponentLabel_t *clabel)
3071 {
3072 uint64_t numBlocks;
3073 static const char *rp[] = {
3074 "No", "Force", "Soft", "*invalid*"
3075 };
3076
3077
3078 numBlocks = rf_component_label_numblocks(clabel);
3079
3080 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3081 clabel->row, clabel->column,
3082 clabel->num_rows, clabel->num_columns);
3083 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3084 clabel->version, clabel->serial_number,
3085 clabel->mod_counter);
3086 printf(" Clean: %s Status: %d\n",
3087 clabel->clean ? "Yes" : "No", clabel->status);
3088 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3089 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3090 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3091 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3092 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3093 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]);
3094 printf(" Last configured as: raid%d\n", clabel->last_unit);
3095 #if 0
3096 printf(" Config order: %d\n", clabel->config_order);
3097 #endif
3098
3099 }
3100 #endif
3101
3102 RF_ConfigSet_t *
3103 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3104 {
3105 RF_AutoConfig_t *ac;
3106 RF_ConfigSet_t *config_sets;
3107 RF_ConfigSet_t *cset;
3108 RF_AutoConfig_t *ac_next;
3109
3110
3111 config_sets = NULL;
3112
3113 /* Go through the AutoConfig list, and figure out which components
3114 belong to what sets. */
3115 ac = ac_list;
3116 while(ac!=NULL) {
3117 /* we're going to putz with ac->next, so save it here
3118 for use at the end of the loop */
3119 ac_next = ac->next;
3120
3121 if (config_sets == NULL) {
3122 /* will need at least this one... */
3123 config_sets = (RF_ConfigSet_t *)
3124 malloc(sizeof(RF_ConfigSet_t),
3125 M_RAIDFRAME, M_NOWAIT);
3126 if (config_sets == NULL) {
3127 panic("rf_create_auto_sets: No memory!");
3128 }
3129 /* this one is easy :) */
3130 config_sets->ac = ac;
3131 config_sets->next = NULL;
3132 config_sets->rootable = 0;
3133 ac->next = NULL;
3134 } else {
3135 /* which set does this component fit into? */
3136 cset = config_sets;
3137 while(cset!=NULL) {
3138 if (rf_does_it_fit(cset, ac)) {
3139 /* looks like it matches... */
3140 ac->next = cset->ac;
3141 cset->ac = ac;
3142 break;
3143 }
3144 cset = cset->next;
3145 }
3146 if (cset==NULL) {
3147 /* didn't find a match above... new set..*/
3148 cset = (RF_ConfigSet_t *)
3149 malloc(sizeof(RF_ConfigSet_t),
3150 M_RAIDFRAME, M_NOWAIT);
3151 if (cset == NULL) {
3152 panic("rf_create_auto_sets: No memory!");
3153 }
3154 cset->ac = ac;
3155 ac->next = NULL;
3156 cset->next = config_sets;
3157 cset->rootable = 0;
3158 config_sets = cset;
3159 }
3160 }
3161 ac = ac_next;
3162 }
3163
3164
3165 return(config_sets);
3166 }
3167
3168 static int
3169 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3170 {
3171 RF_ComponentLabel_t *clabel1, *clabel2;
3172
3173 /* If this one matches the *first* one in the set, that's good
3174 enough, since the other members of the set would have been
3175 through here too... */
3176 /* note that we are not checking partitionSize here..
3177
3178 Note that we are also not checking the mod_counters here.
3179 If everything else matches except the mod_counter, that's
3180 good enough for this test. We will deal with the mod_counters
3181 a little later in the autoconfiguration process.
3182
3183 (clabel1->mod_counter == clabel2->mod_counter) &&
3184
3185 The reason we don't check for this is that failed disks
3186 will have lower modification counts. If those disks are
3187 not added to the set they used to belong to, then they will
3188 form their own set, which may result in 2 different sets,
3189 for example, competing to be configured at raid0, and
3190 perhaps competing to be the root filesystem set. If the
3191 wrong ones get configured, or both attempt to become /,
3192 weird behaviour and or serious lossage will occur. Thus we
3193 need to bring them into the fold here, and kick them out at
3194 a later point.
3195
3196 */
3197
3198 clabel1 = cset->ac->clabel;
3199 clabel2 = ac->clabel;
3200 if ((clabel1->version == clabel2->version) &&
3201 (clabel1->serial_number == clabel2->serial_number) &&
3202 (clabel1->num_rows == clabel2->num_rows) &&
3203 (clabel1->num_columns == clabel2->num_columns) &&
3204 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3205 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3206 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3207 (clabel1->parityConfig == clabel2->parityConfig) &&
3208 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3209 (clabel1->blockSize == clabel2->blockSize) &&
3210 rf_component_label_numblocks(clabel1) ==
3211 rf_component_label_numblocks(clabel2) &&
3212 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3213 (clabel1->root_partition == clabel2->root_partition) &&
3214 (clabel1->last_unit == clabel2->last_unit) &&
3215 (clabel1->config_order == clabel2->config_order)) {
3216 /* if it get's here, it almost *has* to be a match */
3217 } else {
3218 /* it's not consistent with somebody in the set..
3219 punt */
3220 return(0);
3221 }
3222 /* all was fine.. it must fit... */
3223 return(1);
3224 }
3225
3226 int
3227 rf_have_enough_components(RF_ConfigSet_t *cset)
3228 {
3229 RF_AutoConfig_t *ac;
3230 RF_AutoConfig_t *auto_config;
3231 RF_ComponentLabel_t *clabel;
3232 int c;
3233 int num_cols;
3234 int num_missing;
3235 int mod_counter;
3236 int mod_counter_found;
3237 int even_pair_failed;
3238 char parity_type;
3239
3240
3241 /* check to see that we have enough 'live' components
3242 of this set. If so, we can configure it if necessary */
3243
3244 num_cols = cset->ac->clabel->num_columns;
3245 parity_type = cset->ac->clabel->parityConfig;
3246
3247 /* XXX Check for duplicate components!?!?!? */
3248
3249 /* Determine what the mod_counter is supposed to be for this set. */
3250
3251 mod_counter_found = 0;
3252 mod_counter = 0;
3253 ac = cset->ac;
3254 while(ac!=NULL) {
3255 if (mod_counter_found==0) {
3256 mod_counter = ac->clabel->mod_counter;
3257 mod_counter_found = 1;
3258 } else {
3259 if (ac->clabel->mod_counter > mod_counter) {
3260 mod_counter = ac->clabel->mod_counter;
3261 }
3262 }
3263 ac = ac->next;
3264 }
3265
3266 num_missing = 0;
3267 auto_config = cset->ac;
3268
3269 even_pair_failed = 0;
3270 for(c=0; c<num_cols; c++) {
3271 ac = auto_config;
3272 while(ac!=NULL) {
3273 if ((ac->clabel->column == c) &&
3274 (ac->clabel->mod_counter == mod_counter)) {
3275 /* it's this one... */
3276 #ifdef DEBUG
3277 printf("Found: %s at %d\n",
3278 ac->devname,c);
3279 #endif
3280 break;
3281 }
3282 ac=ac->next;
3283 }
3284 if (ac==NULL) {
3285 /* Didn't find one here! */
3286 /* special case for RAID 1, especially
3287 where there are more than 2
3288 components (where RAIDframe treats
3289 things a little differently :( ) */
3290 if (parity_type == '1') {
3291 if (c%2 == 0) { /* even component */
3292 even_pair_failed = 1;
3293 } else { /* odd component. If
3294 we're failed, and
3295 so is the even
3296 component, it's
3297 "Good Night, Charlie" */
3298 if (even_pair_failed == 1) {
3299 return(0);
3300 }
3301 }
3302 } else {
3303 /* normal accounting */
3304 num_missing++;
3305 }
3306 }
3307 if ((parity_type == '1') && (c%2 == 1)) {
3308 /* Just did an even component, and we didn't
3309 bail.. reset the even_pair_failed flag,
3310 and go on to the next component.... */
3311 even_pair_failed = 0;
3312 }
3313 }
3314
3315 clabel = cset->ac->clabel;
3316
3317 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3318 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3319 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3320 /* XXX this needs to be made *much* more general */
3321 /* Too many failures */
3322 return(0);
3323 }
3324 /* otherwise, all is well, and we've got enough to take a kick
3325 at autoconfiguring this set */
3326 return(1);
3327 }
3328
3329 void
3330 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3331 RF_Raid_t *raidPtr)
3332 {
3333 RF_ComponentLabel_t *clabel;
3334 int i;
3335
3336 clabel = ac->clabel;
3337
3338 /* 1. Fill in the common stuff */
3339 config->numCol = clabel->num_columns;
3340 config->numSpare = 0; /* XXX should this be set here? */
3341 config->sectPerSU = clabel->sectPerSU;
3342 config->SUsPerPU = clabel->SUsPerPU;
3343 config->SUsPerRU = clabel->SUsPerRU;
3344 config->parityConfig = clabel->parityConfig;
3345 /* XXX... */
3346 strcpy(config->diskQueueType,"fifo");
3347 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3348 config->layoutSpecificSize = 0; /* XXX ?? */
3349
3350 while(ac!=NULL) {
3351 /* row/col values will be in range due to the checks
3352 in reasonable_label() */
3353 strcpy(config->devnames[0][ac->clabel->column],
3354 ac->devname);
3355 ac = ac->next;
3356 }
3357
3358 for(i=0;i<RF_MAXDBGV;i++) {
3359 config->debugVars[i][0] = 0;
3360 }
3361 }
3362
3363 int
3364 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3365 {
3366 RF_ComponentLabel_t *clabel;
3367 int column;
3368 int sparecol;
3369
3370 raidPtr->autoconfigure = new_value;
3371
3372 for(column=0; column<raidPtr->numCol; column++) {
3373 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3374 clabel = raidget_component_label(raidPtr, column);
3375 clabel->autoconfigure = new_value;
3376 raidflush_component_label(raidPtr, column);
3377 }
3378 }
3379 for(column = 0; column < raidPtr->numSpare ; column++) {
3380 sparecol = raidPtr->numCol + column;
3381 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3382 clabel = raidget_component_label(raidPtr, sparecol);
3383 clabel->autoconfigure = new_value;
3384 raidflush_component_label(raidPtr, sparecol);
3385 }
3386 }
3387 return(new_value);
3388 }
3389
3390 int
3391 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3392 {
3393 RF_ComponentLabel_t *clabel;
3394 int column;
3395 int sparecol;
3396
3397 raidPtr->root_partition = new_value;
3398 for(column=0; column<raidPtr->numCol; column++) {
3399 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3400 clabel = raidget_component_label(raidPtr, column);
3401 clabel->root_partition = new_value;
3402 raidflush_component_label(raidPtr, column);
3403 }
3404 }
3405 for(column = 0; column < raidPtr->numSpare ; column++) {
3406 sparecol = raidPtr->numCol + column;
3407 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3408 clabel = raidget_component_label(raidPtr, sparecol);
3409 clabel->root_partition = new_value;
3410 raidflush_component_label(raidPtr, sparecol);
3411 }
3412 }
3413 return(new_value);
3414 }
3415
3416 void
3417 rf_release_all_vps(RF_ConfigSet_t *cset)
3418 {
3419 RF_AutoConfig_t *ac;
3420
3421 ac = cset->ac;
3422 while(ac!=NULL) {
3423 /* Close the vp, and give it back */
3424 if (ac->vp) {
3425 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3426 VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
3427 vput(ac->vp);
3428 ac->vp = NULL;
3429 }
3430 ac = ac->next;
3431 }
3432 }
3433
3434
3435 void
3436 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3437 {
3438 RF_AutoConfig_t *ac;
3439 RF_AutoConfig_t *next_ac;
3440
3441 ac = cset->ac;
3442 while(ac!=NULL) {
3443 next_ac = ac->next;
3444 /* nuke the label */
3445 free(ac->clabel, M_RAIDFRAME);
3446 /* cleanup the config structure */
3447 free(ac, M_RAIDFRAME);
3448 /* "next.." */
3449 ac = next_ac;
3450 }
3451 /* and, finally, nuke the config set */
3452 free(cset, M_RAIDFRAME);
3453 }
3454
3455
3456 void
3457 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3458 {
3459 /* current version number */
3460 clabel->version = RF_COMPONENT_LABEL_VERSION;
3461 clabel->serial_number = raidPtr->serial_number;
3462 clabel->mod_counter = raidPtr->mod_counter;
3463
3464 clabel->num_rows = 1;
3465 clabel->num_columns = raidPtr->numCol;
3466 clabel->clean = RF_RAID_DIRTY; /* not clean */
3467 clabel->status = rf_ds_optimal; /* "It's good!" */
3468
3469 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3470 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3471 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3472
3473 clabel->blockSize = raidPtr->bytesPerSector;
3474 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3475
3476 /* XXX not portable */
3477 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3478 clabel->maxOutstanding = raidPtr->maxOutstanding;
3479 clabel->autoconfigure = raidPtr->autoconfigure;
3480 clabel->root_partition = raidPtr->root_partition;
3481 clabel->last_unit = raidPtr->raidid;
3482 clabel->config_order = raidPtr->config_order;
3483
3484 #ifndef RF_NO_PARITY_MAP
3485 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3486 #endif
3487 }
3488
3489 struct raid_softc *
3490 rf_auto_config_set(RF_ConfigSet_t *cset)
3491 {
3492 RF_Raid_t *raidPtr;
3493 RF_Config_t *config;
3494 int raidID;
3495 struct raid_softc *sc;
3496
3497 #ifdef DEBUG
3498 printf("RAID autoconfigure\n");
3499 #endif
3500
3501 /* 1. Create a config structure */
3502 config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO);
3503 if (config == NULL) {
3504 printf("%s: Out of mem - config!?!?\n", __func__);
3505 /* XXX do something more intelligent here. */
3506 return NULL;
3507 }
3508
3509 /*
3510 2. Figure out what RAID ID this one is supposed to live at
3511 See if we can get the same RAID dev that it was configured
3512 on last time..
3513 */
3514
3515 raidID = cset->ac->clabel->last_unit;
3516 for (sc = raidget(raidID, false); sc && sc->sc_r.valid != 0;
3517 sc = raidget(++raidID, false))
3518 continue;
3519 #ifdef DEBUG
3520 printf("Configuring raid%d:\n",raidID);
3521 #endif
3522
3523 if (sc == NULL)
3524 sc = raidget(raidID, true);
3525 if (sc == NULL) {
3526 printf("%s: Out of mem - softc!?!?\n", __func__);
3527 /* XXX do something more intelligent here. */
3528 free(config, M_RAIDFRAME);
3529 return NULL;
3530 }
3531
3532 raidPtr = &sc->sc_r;
3533
3534 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3535 raidPtr->softc = sc;
3536 raidPtr->raidid = raidID;
3537 raidPtr->openings = RAIDOUTSTANDING;
3538
3539 /* 3. Build the configuration structure */
3540 rf_create_configuration(cset->ac, config, raidPtr);
3541
3542 /* 4. Do the configuration */
3543 if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3544 raidinit(sc);
3545
3546 rf_markalldirty(raidPtr);
3547 raidPtr->autoconfigure = 1; /* XXX do this here? */
3548 switch (cset->ac->clabel->root_partition) {
3549 case 1: /* Force Root */
3550 case 2: /* Soft Root: root when boot partition part of raid */
3551 /*
3552 * everything configured just fine. Make a note
3553 * that this set is eligible to be root,
3554 * or forced to be root
3555 */
3556 cset->rootable = cset->ac->clabel->root_partition;
3557 /* XXX do this here? */
3558 raidPtr->root_partition = cset->rootable;
3559 break;
3560 default:
3561 break;
3562 }
3563 } else {
3564 raidput(sc);
3565 sc = NULL;
3566 }
3567
3568 /* 5. Cleanup */
3569 free(config, M_RAIDFRAME);
3570 return sc;
3571 }
3572
3573 void
3574 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3575 size_t xmin, size_t xmax)
3576 {
3577 int error;
3578
3579 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3580 pool_sethiwat(p, xmax);
3581 if ((error = pool_prime(p, xmin)) != 0)
3582 panic("%s: failed to prime pool: %d", __func__, error);
3583 pool_setlowat(p, xmin);
3584 }
3585
3586 /*
3587 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buffer queue
3588 * to see if there is IO pending and if that IO could possibly be done
3589 * for a given RAID set. Returns 0 if IO is waiting and can be done, 1
3590 * otherwise.
3591 *
3592 */
3593 int
3594 rf_buf_queue_check(RF_Raid_t *raidPtr)
3595 {
3596 struct raid_softc *rs;
3597 struct dk_softc *dksc;
3598
3599 rs = raidPtr->softc;
3600 dksc = &rs->sc_dksc;
3601
3602 if ((rs->sc_flags & RAIDF_INITED) == 0)
3603 return 1;
3604
3605 if (dk_strategy_pending(dksc) && raidPtr->openings > 0) {
3606 /* there is work to do */
3607 return 0;
3608 }
3609 /* default is nothing to do */
3610 return 1;
3611 }
3612
3613 int
3614 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3615 {
3616 uint64_t numsecs;
3617 unsigned secsize;
3618 int error;
3619
3620 error = getdisksize(vp, &numsecs, &secsize);
3621 if (error == 0) {
3622 diskPtr->blockSize = secsize;
3623 diskPtr->numBlocks = numsecs - rf_protectedSectors;
3624 diskPtr->partitionSize = numsecs;
3625 return 0;
3626 }
3627 return error;
3628 }
3629
3630 static int
3631 raid_match(device_t self, cfdata_t cfdata, void *aux)
3632 {
3633 return 1;
3634 }
3635
3636 static void
3637 raid_attach(device_t parent, device_t self, void *aux)
3638 {
3639 }
3640
3641
3642 static int
3643 raid_detach(device_t self, int flags)
3644 {
3645 int error;
3646 struct raid_softc *rs = raidsoftc(self);
3647
3648 if (rs == NULL)
3649 return ENXIO;
3650
3651 if ((error = raidlock(rs)) != 0)
3652 return (error);
3653
3654 error = raid_detach_unlocked(rs);
3655
3656 raidunlock(rs);
3657
3658 /* XXX raid can be referenced here */
3659
3660 if (error)
3661 return error;
3662
3663 /* Free the softc */
3664 raidput(rs);
3665
3666 return 0;
3667 }
3668
3669 static void
3670 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
3671 {
3672 struct dk_softc *dksc = &rs->sc_dksc;
3673 struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
3674
3675 memset(dg, 0, sizeof(*dg));
3676
3677 dg->dg_secperunit = raidPtr->totalSectors;
3678 dg->dg_secsize = raidPtr->bytesPerSector;
3679 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
3680 dg->dg_ntracks = 4 * raidPtr->numCol;
3681
3682 disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, NULL);
3683 }
3684
3685 /*
3686 * Get cache info for all the components (including spares).
3687 * Returns intersection of all the cache flags of all disks, or first
3688 * error if any encountered.
3689 * XXXfua feature flags can change as spares are added - lock down somehow
3690 */
3691 static int
3692 rf_get_component_caches(RF_Raid_t *raidPtr, int *data)
3693 {
3694 int c;
3695 int error;
3696 int dkwhole = 0, dkpart;
3697
3698 for (c = 0; c < raidPtr->numCol + raidPtr->numSpare; c++) {
3699 /*
3700 * Check any non-dead disk, even when currently being
3701 * reconstructed.
3702 */
3703 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)
3704 || raidPtr->Disks[c].status == rf_ds_reconstructing) {
3705 error = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp,
3706 DIOCGCACHE, &dkpart, FREAD, NOCRED);
3707 if (error) {
3708 if (error != ENODEV) {
3709 printf("raid%d: get cache for component %s failed\n",
3710 raidPtr->raidid,
3711 raidPtr->Disks[c].devname);
3712 }
3713
3714 return error;
3715 }
3716
3717 if (c == 0)
3718 dkwhole = dkpart;
3719 else
3720 dkwhole = DKCACHE_COMBINE(dkwhole, dkpart);
3721 }
3722 }
3723
3724 *data = dkwhole;
3725
3726 return 0;
3727 }
3728
3729 /*
3730 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3731 * We end up returning whatever error was returned by the first cache flush
3732 * that fails.
3733 */
3734
3735 int
3736 rf_sync_component_caches(RF_Raid_t *raidPtr)
3737 {
3738 int c, sparecol;
3739 int e,error;
3740 int force = 1;
3741
3742 error = 0;
3743 for (c = 0; c < raidPtr->numCol; c++) {
3744 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3745 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3746 &force, FWRITE, NOCRED);
3747 if (e) {
3748 if (e != ENODEV)
3749 printf("raid%d: cache flush to component %s failed.\n",
3750 raidPtr->raidid, raidPtr->Disks[c].devname);
3751 if (error == 0) {
3752 error = e;
3753 }
3754 }
3755 }
3756 }
3757
3758 for( c = 0; c < raidPtr->numSpare ; c++) {
3759 sparecol = raidPtr->numCol + c;
3760 /* Need to ensure that the reconstruct actually completed! */
3761 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3762 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3763 DIOCCACHESYNC, &force, FWRITE, NOCRED);
3764 if (e) {
3765 if (e != ENODEV)
3766 printf("raid%d: cache flush to component %s failed.\n",
3767 raidPtr->raidid, raidPtr->Disks[sparecol].devname);
3768 if (error == 0) {
3769 error = e;
3770 }
3771 }
3772 }
3773 }
3774 return error;
3775 }
3776
3777 /* Fill in info with the current status */
3778 void
3779 rf_check_recon_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3780 {
3781
3782 memset(info, 0, sizeof(*info));
3783
3784 if (raidPtr->status != rf_rs_reconstructing) {
3785 info->total = 100;
3786 info->completed = 100;
3787 } else {
3788 info->total = raidPtr->reconControl->numRUsTotal;
3789 info->completed = raidPtr->reconControl->numRUsComplete;
3790 }
3791 info->remaining = info->total - info->completed;
3792 }
3793
3794 /* Fill in info with the current status */
3795 void
3796 rf_check_parityrewrite_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3797 {
3798
3799 memset(info, 0, sizeof(*info));
3800
3801 if (raidPtr->parity_rewrite_in_progress == 1) {
3802 info->total = raidPtr->Layout.numStripe;
3803 info->completed = raidPtr->parity_rewrite_stripes_done;
3804 } else {
3805 info->completed = 100;
3806 info->total = 100;
3807 }
3808 info->remaining = info->total - info->completed;
3809 }
3810
3811 /* Fill in info with the current status */
3812 void
3813 rf_check_copyback_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3814 {
3815
3816 memset(info, 0, sizeof(*info));
3817
3818 if (raidPtr->copyback_in_progress == 1) {
3819 info->total = raidPtr->Layout.numStripe;
3820 info->completed = raidPtr->copyback_stripes_done;
3821 info->remaining = info->total - info->completed;
3822 } else {
3823 info->remaining = 0;
3824 info->completed = 100;
3825 info->total = 100;
3826 }
3827 }
3828
3829 /* Fill in config with the current info */
3830 int
3831 rf_get_info(RF_Raid_t *raidPtr, RF_DeviceConfig_t *config)
3832 {
3833 int d, i, j;
3834
3835 if (!raidPtr->valid)
3836 return (ENODEV);
3837 config->cols = raidPtr->numCol;
3838 config->ndevs = raidPtr->numCol;
3839 if (config->ndevs >= RF_MAX_DISKS)
3840 return (ENOMEM);
3841 config->nspares = raidPtr->numSpare;
3842 if (config->nspares >= RF_MAX_DISKS)
3843 return (ENOMEM);
3844 config->maxqdepth = raidPtr->maxQueueDepth;
3845 d = 0;
3846 for (j = 0; j < config->cols; j++) {
3847 config->devs[d] = raidPtr->Disks[j];
3848 d++;
3849 }
3850 for (j = config->cols, i = 0; i < config->nspares; i++, j++) {
3851 config->spares[i] = raidPtr->Disks[j];
3852 if (config->spares[i].status == rf_ds_rebuilding_spare) {
3853 /* XXX: raidctl(8) expects to see this as a used spare */
3854 config->spares[i].status = rf_ds_used_spare;
3855 }
3856 }
3857 return 0;
3858 }
3859
3860 int
3861 rf_get_component_label(RF_Raid_t *raidPtr, void *data)
3862 {
3863 RF_ComponentLabel_t *clabel = (RF_ComponentLabel_t *)data;
3864 RF_ComponentLabel_t *raid_clabel;
3865 int column = clabel->column;
3866
3867 if ((column < 0) || (column >= raidPtr->numCol + raidPtr->numSpare))
3868 return EINVAL;
3869 raid_clabel = raidget_component_label(raidPtr, column);
3870 memcpy(clabel, raid_clabel, sizeof *clabel);
3871
3872 return 0;
3873 }
3874
3875 /*
3876 * Module interface
3877 */
3878
3879 MODULE(MODULE_CLASS_DRIVER, raid, "dk_subr,bufq_fcfs");
3880
3881 #ifdef _MODULE
3882 CFDRIVER_DECL(raid, DV_DISK, NULL);
3883 #endif
3884
3885 static int raid_modcmd(modcmd_t, void *);
3886 static int raid_modcmd_init(void);
3887 static int raid_modcmd_fini(void);
3888
3889 static int
3890 raid_modcmd(modcmd_t cmd, void *data)
3891 {
3892 int error;
3893
3894 error = 0;
3895 switch (cmd) {
3896 case MODULE_CMD_INIT:
3897 error = raid_modcmd_init();
3898 break;
3899 case MODULE_CMD_FINI:
3900 error = raid_modcmd_fini();
3901 break;
3902 default:
3903 error = ENOTTY;
3904 break;
3905 }
3906 return error;
3907 }
3908
3909 static int
3910 raid_modcmd_init(void)
3911 {
3912 int error;
3913 int bmajor, cmajor;
3914
3915 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
3916 mutex_enter(&raid_lock);
3917 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
3918 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
3919 rf_init_cond2(rf_sparet_wait_cv, "sparetw");
3920 rf_init_cond2(rf_sparet_resp_cv, "rfgst");
3921
3922 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
3923 #endif
3924
3925 bmajor = cmajor = -1;
3926 error = devsw_attach("raid", &raid_bdevsw, &bmajor,
3927 &raid_cdevsw, &cmajor);
3928 if (error != 0 && error != EEXIST) {
3929 aprint_error("%s: devsw_attach failed %d\n", __func__, error);
3930 mutex_exit(&raid_lock);
3931 return error;
3932 }
3933 #ifdef _MODULE
3934 error = config_cfdriver_attach(&raid_cd);
3935 if (error != 0) {
3936 aprint_error("%s: config_cfdriver_attach failed %d\n",
3937 __func__, error);
3938 devsw_detach(&raid_bdevsw, &raid_cdevsw);
3939 mutex_exit(&raid_lock);
3940 return error;
3941 }
3942 #endif
3943 error = config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3944 if (error != 0) {
3945 aprint_error("%s: config_cfattach_attach failed %d\n",
3946 __func__, error);
3947 #ifdef _MODULE
3948 config_cfdriver_detach(&raid_cd);
3949 #endif
3950 devsw_detach(&raid_bdevsw, &raid_cdevsw);
3951 mutex_exit(&raid_lock);
3952 return error;
3953 }
3954
3955 raidautoconfigdone = false;
3956
3957 mutex_exit(&raid_lock);
3958
3959 if (error == 0) {
3960 if (rf_BootRaidframe(true) == 0)
3961 aprint_verbose("Kernelized RAIDframe activated\n");
3962 else
3963 panic("Serious error activating RAID!!");
3964 }
3965
3966 /*
3967 * Register a finalizer which will be used to auto-config RAID
3968 * sets once all real hardware devices have been found.
3969 */
3970 error = config_finalize_register(NULL, rf_autoconfig);
3971 if (error != 0) {
3972 aprint_error("WARNING: unable to register RAIDframe "
3973 "finalizer\n");
3974 error = 0;
3975 }
3976
3977 return error;
3978 }
3979
3980 static int
3981 raid_modcmd_fini(void)
3982 {
3983 int error;
3984
3985 mutex_enter(&raid_lock);
3986
3987 /* Don't allow unload if raid device(s) exist. */
3988 if (!LIST_EMPTY(&raids)) {
3989 mutex_exit(&raid_lock);
3990 return EBUSY;
3991 }
3992
3993 error = config_cfattach_detach(raid_cd.cd_name, &raid_ca);
3994 if (error != 0) {
3995 aprint_error("%s: cannot detach cfattach\n",__func__);
3996 mutex_exit(&raid_lock);
3997 return error;
3998 }
3999 #ifdef _MODULE
4000 error = config_cfdriver_detach(&raid_cd);
4001 if (error != 0) {
4002 aprint_error("%s: cannot detach cfdriver\n",__func__);
4003 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
4004 mutex_exit(&raid_lock);
4005 return error;
4006 }
4007 #endif
4008 error = devsw_detach(&raid_bdevsw, &raid_cdevsw);
4009 if (error != 0) {
4010 aprint_error("%s: cannot detach devsw\n",__func__);
4011 #ifdef _MODULE
4012 config_cfdriver_attach(&raid_cd);
4013 #endif
4014 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
4015 mutex_exit(&raid_lock);
4016 return error;
4017 }
4018 rf_BootRaidframe(false);
4019 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
4020 rf_destroy_mutex2(rf_sparet_wait_mutex);
4021 rf_destroy_cond2(rf_sparet_wait_cv);
4022 rf_destroy_cond2(rf_sparet_resp_cv);
4023 #endif
4024 mutex_exit(&raid_lock);
4025 mutex_destroy(&raid_lock);
4026
4027 return error;
4028 }
4029