rf_netbsdkintf.c revision 1.395 1 /* $NetBSD: rf_netbsdkintf.c,v 1.395 2021/07/23 00:54:45 oster Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: cd.c 1.6 90/11/28$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
89 * School of Computer Science
90 * Carnegie Mellon University
91 * Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97 /***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.395 2021/07/23 00:54:45 oster Exp $");
105
106 #ifdef _KERNEL_OPT
107 #include "opt_raid_autoconfig.h"
108 #include "opt_compat_netbsd32.h"
109 #endif
110
111 #include <sys/param.h>
112 #include <sys/errno.h>
113 #include <sys/pool.h>
114 #include <sys/proc.h>
115 #include <sys/queue.h>
116 #include <sys/disk.h>
117 #include <sys/device.h>
118 #include <sys/stat.h>
119 #include <sys/ioctl.h>
120 #include <sys/fcntl.h>
121 #include <sys/systm.h>
122 #include <sys/vnode.h>
123 #include <sys/disklabel.h>
124 #include <sys/conf.h>
125 #include <sys/buf.h>
126 #include <sys/bufq.h>
127 #include <sys/reboot.h>
128 #include <sys/kauth.h>
129 #include <sys/module.h>
130 #include <sys/compat_stub.h>
131
132 #include <prop/proplib.h>
133
134 #include <dev/raidframe/raidframevar.h>
135 #include <dev/raidframe/raidframeio.h>
136 #include <dev/raidframe/rf_paritymap.h>
137
138 #include "rf_raid.h"
139 #include "rf_copyback.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_desc.h"
143 #include "rf_diskqueue.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_kintf.h"
147 #include "rf_options.h"
148 #include "rf_driver.h"
149 #include "rf_parityscan.h"
150 #include "rf_threadstuff.h"
151
152 #include "ioconf.h"
153
154 #ifdef DEBUG
155 int rf_kdebug_level = 0;
156 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
157 #else /* DEBUG */
158 #define db1_printf(a) { }
159 #endif /* DEBUG */
160
161 #ifdef DEBUG_ROOT
162 #define DPRINTF(a, ...) printf(a, __VA_ARGS__)
163 #else
164 #define DPRINTF(a, ...)
165 #endif
166
167 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
168 static rf_declare_mutex2(rf_sparet_wait_mutex);
169 static rf_declare_cond2(rf_sparet_wait_cv);
170 static rf_declare_cond2(rf_sparet_resp_cv);
171
172 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
173 * spare table */
174 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
175 * installation process */
176 #endif
177
178 const int rf_b_pass = (B_PHYS|B_RAW|B_MEDIA_FLAGS);
179
180 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
181
182 /* prototypes */
183 static void KernelWakeupFunc(struct buf *);
184 static void InitBP(struct buf *, struct vnode *, unsigned,
185 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
186 void *, int);
187 static void raidinit(struct raid_softc *);
188 static int raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp);
189 static int rf_get_component_caches(RF_Raid_t *raidPtr, int *);
190
191 static int raid_match(device_t, cfdata_t, void *);
192 static void raid_attach(device_t, device_t, void *);
193 static int raid_detach(device_t, int);
194
195 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
196 daddr_t, daddr_t);
197 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
198 daddr_t, daddr_t, int);
199
200 static int raidwrite_component_label(unsigned,
201 dev_t, struct vnode *, RF_ComponentLabel_t *);
202 static int raidread_component_label(unsigned,
203 dev_t, struct vnode *, RF_ComponentLabel_t *);
204
205 static int raid_diskstart(device_t, struct buf *bp);
206 static int raid_dumpblocks(device_t, void *, daddr_t, int);
207 static int raid_lastclose(device_t);
208
209 static dev_type_open(raidopen);
210 static dev_type_close(raidclose);
211 static dev_type_read(raidread);
212 static dev_type_write(raidwrite);
213 static dev_type_ioctl(raidioctl);
214 static dev_type_strategy(raidstrategy);
215 static dev_type_dump(raiddump);
216 static dev_type_size(raidsize);
217
218 const struct bdevsw raid_bdevsw = {
219 .d_open = raidopen,
220 .d_close = raidclose,
221 .d_strategy = raidstrategy,
222 .d_ioctl = raidioctl,
223 .d_dump = raiddump,
224 .d_psize = raidsize,
225 .d_discard = nodiscard,
226 .d_flag = D_DISK
227 };
228
229 const struct cdevsw raid_cdevsw = {
230 .d_open = raidopen,
231 .d_close = raidclose,
232 .d_read = raidread,
233 .d_write = raidwrite,
234 .d_ioctl = raidioctl,
235 .d_stop = nostop,
236 .d_tty = notty,
237 .d_poll = nopoll,
238 .d_mmap = nommap,
239 .d_kqfilter = nokqfilter,
240 .d_discard = nodiscard,
241 .d_flag = D_DISK
242 };
243
244 static struct dkdriver rf_dkdriver = {
245 .d_open = raidopen,
246 .d_close = raidclose,
247 .d_strategy = raidstrategy,
248 .d_diskstart = raid_diskstart,
249 .d_dumpblocks = raid_dumpblocks,
250 .d_lastclose = raid_lastclose,
251 .d_minphys = minphys
252 };
253
254 #define raidunit(x) DISKUNIT(x)
255 #define raidsoftc(dev) (((struct raid_softc *)device_private(dev))->sc_r.softc)
256
257 extern struct cfdriver raid_cd;
258 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
259 raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
260 DVF_DETACH_SHUTDOWN);
261
262 /* Internal representation of a rf_recon_req */
263 struct rf_recon_req_internal {
264 RF_RowCol_t col;
265 RF_ReconReqFlags_t flags;
266 void *raidPtr;
267 };
268
269 /*
270 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
271 * Be aware that large numbers can allow the driver to consume a lot of
272 * kernel memory, especially on writes, and in degraded mode reads.
273 *
274 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
275 * a single 64K write will typically require 64K for the old data,
276 * 64K for the old parity, and 64K for the new parity, for a total
277 * of 192K (if the parity buffer is not re-used immediately).
278 * Even it if is used immediately, that's still 128K, which when multiplied
279 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
280 *
281 * Now in degraded mode, for example, a 64K read on the above setup may
282 * require data reconstruction, which will require *all* of the 4 remaining
283 * disks to participate -- 4 * 32K/disk == 128K again.
284 */
285
286 #ifndef RAIDOUTSTANDING
287 #define RAIDOUTSTANDING 6
288 #endif
289
290 #define RAIDLABELDEV(dev) \
291 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
292
293 /* declared here, and made public, for the benefit of KVM stuff.. */
294
295 static int raidlock(struct raid_softc *);
296 static void raidunlock(struct raid_softc *);
297
298 static int raid_detach_unlocked(struct raid_softc *);
299
300 static void rf_markalldirty(RF_Raid_t *);
301 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
302
303 static void rf_ReconThread(struct rf_recon_req_internal *);
304 static void rf_RewriteParityThread(RF_Raid_t *raidPtr);
305 static void rf_CopybackThread(RF_Raid_t *raidPtr);
306 static void rf_ReconstructInPlaceThread(struct rf_recon_req_internal *);
307 static int rf_autoconfig(device_t);
308 static void rf_buildroothack(RF_ConfigSet_t *);
309
310 static RF_AutoConfig_t *rf_find_raid_components(void);
311 static RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
312 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
313 static void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
314 static int rf_set_autoconfig(RF_Raid_t *, int);
315 static int rf_set_rootpartition(RF_Raid_t *, int);
316 static void rf_release_all_vps(RF_ConfigSet_t *);
317 static void rf_cleanup_config_set(RF_ConfigSet_t *);
318 static int rf_have_enough_components(RF_ConfigSet_t *);
319 static struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
320 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
321
322 /*
323 * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
324 * Note that this is overridden by having RAID_AUTOCONFIG as an option
325 * in the kernel config file.
326 */
327 #ifdef RAID_AUTOCONFIG
328 int raidautoconfig = 1;
329 #else
330 int raidautoconfig = 0;
331 #endif
332 static bool raidautoconfigdone = false;
333
334 struct pool rf_alloclist_pool; /* AllocList */
335
336 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
337 static kmutex_t raid_lock;
338
339 static struct raid_softc *
340 raidcreate(int unit) {
341 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
342 sc->sc_unit = unit;
343 cv_init(&sc->sc_cv, "raidunit");
344 mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_NONE);
345 return sc;
346 }
347
348 static void
349 raiddestroy(struct raid_softc *sc) {
350 cv_destroy(&sc->sc_cv);
351 mutex_destroy(&sc->sc_mutex);
352 kmem_free(sc, sizeof(*sc));
353 }
354
355 static struct raid_softc *
356 raidget(int unit, bool create) {
357 struct raid_softc *sc;
358 if (unit < 0) {
359 #ifdef DIAGNOSTIC
360 panic("%s: unit %d!", __func__, unit);
361 #endif
362 return NULL;
363 }
364 mutex_enter(&raid_lock);
365 LIST_FOREACH(sc, &raids, sc_link) {
366 if (sc->sc_unit == unit) {
367 mutex_exit(&raid_lock);
368 return sc;
369 }
370 }
371 mutex_exit(&raid_lock);
372 if (!create)
373 return NULL;
374 sc = raidcreate(unit);
375 mutex_enter(&raid_lock);
376 LIST_INSERT_HEAD(&raids, sc, sc_link);
377 mutex_exit(&raid_lock);
378 return sc;
379 }
380
381 static void
382 raidput(struct raid_softc *sc) {
383 mutex_enter(&raid_lock);
384 LIST_REMOVE(sc, sc_link);
385 mutex_exit(&raid_lock);
386 raiddestroy(sc);
387 }
388
389 void
390 raidattach(int num)
391 {
392
393 /*
394 * Device attachment and associated initialization now occurs
395 * as part of the module initialization.
396 */
397 }
398
399 static int
400 rf_autoconfig(device_t self)
401 {
402 RF_AutoConfig_t *ac_list;
403 RF_ConfigSet_t *config_sets;
404
405 if (!raidautoconfig || raidautoconfigdone == true)
406 return 0;
407
408 /* XXX This code can only be run once. */
409 raidautoconfigdone = true;
410
411 #ifdef __HAVE_CPU_BOOTCONF
412 /*
413 * 0. find the boot device if needed first so we can use it later
414 * this needs to be done before we autoconfigure any raid sets,
415 * because if we use wedges we are not going to be able to open
416 * the boot device later
417 */
418 if (booted_device == NULL)
419 cpu_bootconf();
420 #endif
421 /* 1. locate all RAID components on the system */
422 aprint_debug("Searching for RAID components...\n");
423 ac_list = rf_find_raid_components();
424
425 /* 2. Sort them into their respective sets. */
426 config_sets = rf_create_auto_sets(ac_list);
427
428 /*
429 * 3. Evaluate each set and configure the valid ones.
430 * This gets done in rf_buildroothack().
431 */
432 rf_buildroothack(config_sets);
433
434 return 1;
435 }
436
437 int
438 rf_inited(const struct raid_softc *rs) {
439 return (rs->sc_flags & RAIDF_INITED) != 0;
440 }
441
442 RF_Raid_t *
443 rf_get_raid(struct raid_softc *rs) {
444 return &rs->sc_r;
445 }
446
447 int
448 rf_get_unit(const struct raid_softc *rs) {
449 return rs->sc_unit;
450 }
451
452 static int
453 rf_containsboot(RF_Raid_t *r, device_t bdv) {
454 const char *bootname;
455 size_t len;
456
457 /* if bdv is NULL, the set can't contain it. exit early. */
458 if (bdv == NULL)
459 return 0;
460
461 bootname = device_xname(bdv);
462 len = strlen(bootname);
463
464 for (int col = 0; col < r->numCol; col++) {
465 const char *devname = r->Disks[col].devname;
466 devname += sizeof("/dev/") - 1;
467 if (strncmp(devname, "dk", 2) == 0) {
468 const char *parent =
469 dkwedge_get_parent_name(r->Disks[col].dev);
470 if (parent != NULL)
471 devname = parent;
472 }
473 if (strncmp(devname, bootname, len) == 0) {
474 struct raid_softc *sc = r->softc;
475 aprint_debug("raid%d includes boot device %s\n",
476 sc->sc_unit, devname);
477 return 1;
478 }
479 }
480 return 0;
481 }
482
483 static void
484 rf_buildroothack(RF_ConfigSet_t *config_sets)
485 {
486 RF_ConfigSet_t *cset;
487 RF_ConfigSet_t *next_cset;
488 int num_root;
489 struct raid_softc *sc, *rsc;
490 struct dk_softc *dksc = NULL; /* XXX gcc -Os: may be used uninit. */
491
492 sc = rsc = NULL;
493 num_root = 0;
494 cset = config_sets;
495 while (cset != NULL) {
496 next_cset = cset->next;
497 if (rf_have_enough_components(cset) &&
498 cset->ac->clabel->autoconfigure == 1) {
499 sc = rf_auto_config_set(cset);
500 if (sc != NULL) {
501 aprint_debug("raid%d: configured ok, rootable %d\n",
502 sc->sc_unit, cset->rootable);
503 if (cset->rootable) {
504 rsc = sc;
505 num_root++;
506 }
507 } else {
508 /* The autoconfig didn't work :( */
509 aprint_debug("Autoconfig failed\n");
510 rf_release_all_vps(cset);
511 }
512 } else {
513 /* we're not autoconfiguring this set...
514 release the associated resources */
515 rf_release_all_vps(cset);
516 }
517 /* cleanup */
518 rf_cleanup_config_set(cset);
519 cset = next_cset;
520 }
521
522 /* if the user has specified what the root device should be
523 then we don't touch booted_device or boothowto... */
524
525 if (rootspec != NULL) {
526 DPRINTF("%s: rootspec %s\n", __func__, rootspec);
527 return;
528 }
529
530 /* we found something bootable... */
531
532 /*
533 * XXX: The following code assumes that the root raid
534 * is the first ('a') partition. This is about the best
535 * we can do with a BSD disklabel, but we might be able
536 * to do better with a GPT label, by setting a specified
537 * attribute to indicate the root partition. We can then
538 * stash the partition number in the r->root_partition
539 * high bits (the bottom 2 bits are already used). For
540 * now we just set booted_partition to 0 when we override
541 * root.
542 */
543 if (num_root == 1) {
544 device_t candidate_root;
545 dksc = &rsc->sc_dksc;
546 if (dksc->sc_dkdev.dk_nwedges != 0) {
547 char cname[sizeof(cset->ac->devname)];
548 /* XXX: assume partition 'a' first */
549 snprintf(cname, sizeof(cname), "%s%c",
550 device_xname(dksc->sc_dev), 'a');
551 candidate_root = dkwedge_find_by_wname(cname);
552 DPRINTF("%s: candidate wedge root=%s\n", __func__,
553 cname);
554 if (candidate_root == NULL) {
555 /*
556 * If that is not found, because we don't use
557 * disklabel, return the first dk child
558 * XXX: we can skip the 'a' check above
559 * and always do this...
560 */
561 size_t i = 0;
562 candidate_root = dkwedge_find_by_parent(
563 device_xname(dksc->sc_dev), &i);
564 }
565 DPRINTF("%s: candidate wedge root=%p\n", __func__,
566 candidate_root);
567 } else
568 candidate_root = dksc->sc_dev;
569 DPRINTF("%s: candidate root=%p\n", __func__, candidate_root);
570 DPRINTF("%s: booted_device=%p root_partition=%d "
571 "contains_boot=%d",
572 __func__, booted_device, rsc->sc_r.root_partition,
573 rf_containsboot(&rsc->sc_r, booted_device));
574 /* XXX the check for booted_device == NULL can probably be
575 * dropped, now that rf_containsboot handles that case.
576 */
577 if (booted_device == NULL ||
578 rsc->sc_r.root_partition == 1 ||
579 rf_containsboot(&rsc->sc_r, booted_device)) {
580 booted_device = candidate_root;
581 booted_method = "raidframe/single";
582 booted_partition = 0; /* XXX assume 'a' */
583 DPRINTF("%s: set booted_device=%s(%p)\n", __func__,
584 device_xname(booted_device), booted_device);
585 }
586 } else if (num_root > 1) {
587 DPRINTF("%s: many roots=%d, %p\n", __func__, num_root,
588 booted_device);
589
590 /*
591 * Maybe the MD code can help. If it cannot, then
592 * setroot() will discover that we have no
593 * booted_device and will ask the user if nothing was
594 * hardwired in the kernel config file
595 */
596 if (booted_device == NULL)
597 return;
598
599 num_root = 0;
600 mutex_enter(&raid_lock);
601 LIST_FOREACH(sc, &raids, sc_link) {
602 RF_Raid_t *r = &sc->sc_r;
603 if (r->valid == 0)
604 continue;
605
606 if (r->root_partition == 0)
607 continue;
608
609 if (rf_containsboot(r, booted_device)) {
610 num_root++;
611 rsc = sc;
612 dksc = &rsc->sc_dksc;
613 }
614 }
615 mutex_exit(&raid_lock);
616
617 if (num_root == 1) {
618 booted_device = dksc->sc_dev;
619 booted_method = "raidframe/multi";
620 booted_partition = 0; /* XXX assume 'a' */
621 } else {
622 /* we can't guess.. require the user to answer... */
623 boothowto |= RB_ASKNAME;
624 }
625 }
626 }
627
628 static int
629 raidsize(dev_t dev)
630 {
631 struct raid_softc *rs;
632 struct dk_softc *dksc;
633 unsigned int unit;
634
635 unit = raidunit(dev);
636 if ((rs = raidget(unit, false)) == NULL)
637 return -1;
638 dksc = &rs->sc_dksc;
639
640 if ((rs->sc_flags & RAIDF_INITED) == 0)
641 return -1;
642
643 return dk_size(dksc, dev);
644 }
645
646 static int
647 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
648 {
649 unsigned int unit;
650 struct raid_softc *rs;
651 struct dk_softc *dksc;
652
653 unit = raidunit(dev);
654 if ((rs = raidget(unit, false)) == NULL)
655 return ENXIO;
656 dksc = &rs->sc_dksc;
657
658 if ((rs->sc_flags & RAIDF_INITED) == 0)
659 return ENODEV;
660
661 /*
662 Note that blkno is relative to this particular partition.
663 By adding adding RF_PROTECTED_SECTORS, we get a value that
664 is relative to the partition used for the underlying component.
665 */
666 blkno += RF_PROTECTED_SECTORS;
667
668 return dk_dump(dksc, dev, blkno, va, size, DK_DUMP_RECURSIVE);
669 }
670
671 static int
672 raid_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk)
673 {
674 struct raid_softc *rs = raidsoftc(dev);
675 const struct bdevsw *bdev;
676 RF_Raid_t *raidPtr;
677 int c, sparecol, j, scol, dumpto;
678 int error = 0;
679
680 raidPtr = &rs->sc_r;
681
682 /* we only support dumping to RAID 1 sets */
683 if (raidPtr->Layout.numDataCol != 1 ||
684 raidPtr->Layout.numParityCol != 1)
685 return EINVAL;
686
687 if ((error = raidlock(rs)) != 0)
688 return error;
689
690 /* figure out what device is alive.. */
691
692 /*
693 Look for a component to dump to. The preference for the
694 component to dump to is as follows:
695 1) the first component
696 2) a used_spare of the first component
697 3) the second component
698 4) a used_spare of the second component
699 */
700
701 dumpto = -1;
702 for (c = 0; c < raidPtr->numCol; c++) {
703 if (raidPtr->Disks[c].status == rf_ds_optimal) {
704 /* this might be the one */
705 dumpto = c;
706 break;
707 }
708 }
709
710 /*
711 At this point we have possibly selected a live component.
712 If we didn't find a live ocmponent, we now check to see
713 if there is a relevant spared component.
714 */
715
716 for (c = 0; c < raidPtr->numSpare; c++) {
717 sparecol = raidPtr->numCol + c;
718 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
719 /* How about this one? */
720 scol = -1;
721 for(j=0;j<raidPtr->numCol;j++) {
722 if (raidPtr->Disks[j].spareCol == sparecol) {
723 scol = j;
724 break;
725 }
726 }
727 if (scol == 0) {
728 /*
729 We must have found a spared first
730 component! We'll take that over
731 anything else found so far. (We
732 couldn't have found a real first
733 component before, since this is a
734 used spare, and it's saying that
735 it's replacing the first
736 component.) On reboot (with
737 autoconfiguration turned on)
738 sparecol will become the first
739 component (component0) of this set.
740 */
741 dumpto = sparecol;
742 break;
743 } else if (scol != -1) {
744 /*
745 Must be a spared second component.
746 We'll dump to that if we havn't found
747 anything else so far.
748 */
749 if (dumpto == -1)
750 dumpto = sparecol;
751 }
752 }
753 }
754
755 if (dumpto == -1) {
756 /* we couldn't find any live components to dump to!?!?
757 */
758 error = EINVAL;
759 goto out;
760 }
761
762 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
763 if (bdev == NULL) {
764 error = ENXIO;
765 goto out;
766 }
767
768 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
769 blkno, va, nblk * raidPtr->bytesPerSector);
770
771 out:
772 raidunlock(rs);
773
774 return error;
775 }
776
777 /* ARGSUSED */
778 static int
779 raidopen(dev_t dev, int flags, int fmt,
780 struct lwp *l)
781 {
782 int unit = raidunit(dev);
783 struct raid_softc *rs;
784 struct dk_softc *dksc;
785 int error = 0;
786 int part, pmask;
787
788 if ((rs = raidget(unit, true)) == NULL)
789 return ENXIO;
790 if ((error = raidlock(rs)) != 0)
791 return error;
792
793 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
794 error = EBUSY;
795 goto bad;
796 }
797
798 dksc = &rs->sc_dksc;
799
800 part = DISKPART(dev);
801 pmask = (1 << part);
802
803 if (!DK_BUSY(dksc, pmask) &&
804 ((rs->sc_flags & RAIDF_INITED) != 0)) {
805 /* First one... mark things as dirty... Note that we *MUST*
806 have done a configure before this. I DO NOT WANT TO BE
807 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
808 THAT THEY BELONG TOGETHER!!!!! */
809 /* XXX should check to see if we're only open for reading
810 here... If so, we needn't do this, but then need some
811 other way of keeping track of what's happened.. */
812
813 rf_markalldirty(&rs->sc_r);
814 }
815
816 if ((rs->sc_flags & RAIDF_INITED) != 0)
817 error = dk_open(dksc, dev, flags, fmt, l);
818
819 bad:
820 raidunlock(rs);
821
822 return error;
823
824
825 }
826
827 static int
828 raid_lastclose(device_t self)
829 {
830 struct raid_softc *rs = raidsoftc(self);
831
832 /* Last one... device is not unconfigured yet.
833 Device shutdown has taken care of setting the
834 clean bits if RAIDF_INITED is not set
835 mark things as clean... */
836
837 rf_update_component_labels(&rs->sc_r,
838 RF_FINAL_COMPONENT_UPDATE);
839
840 /* pass to unlocked code */
841 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
842 rs->sc_flags |= RAIDF_DETACH;
843
844 return 0;
845 }
846
847 /* ARGSUSED */
848 static int
849 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
850 {
851 int unit = raidunit(dev);
852 struct raid_softc *rs;
853 struct dk_softc *dksc;
854 cfdata_t cf;
855 int error = 0, do_detach = 0, do_put = 0;
856
857 if ((rs = raidget(unit, false)) == NULL)
858 return ENXIO;
859 dksc = &rs->sc_dksc;
860
861 if ((error = raidlock(rs)) != 0)
862 return error;
863
864 if ((rs->sc_flags & RAIDF_INITED) != 0) {
865 error = dk_close(dksc, dev, flags, fmt, l);
866 if ((rs->sc_flags & RAIDF_DETACH) != 0)
867 do_detach = 1;
868 } else if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
869 do_put = 1;
870
871 raidunlock(rs);
872
873 if (do_detach) {
874 /* free the pseudo device attach bits */
875 cf = device_cfdata(dksc->sc_dev);
876 error = config_detach(dksc->sc_dev, 0);
877 if (error == 0)
878 free(cf, M_RAIDFRAME);
879 } else if (do_put) {
880 raidput(rs);
881 }
882
883 return error;
884
885 }
886
887 static void
888 raid_wakeup(RF_Raid_t *raidPtr)
889 {
890 rf_lock_mutex2(raidPtr->iodone_lock);
891 rf_signal_cond2(raidPtr->iodone_cv);
892 rf_unlock_mutex2(raidPtr->iodone_lock);
893 }
894
895 static void
896 raidstrategy(struct buf *bp)
897 {
898 unsigned int unit;
899 struct raid_softc *rs;
900 struct dk_softc *dksc;
901 RF_Raid_t *raidPtr;
902
903 unit = raidunit(bp->b_dev);
904 if ((rs = raidget(unit, false)) == NULL) {
905 bp->b_error = ENXIO;
906 goto fail;
907 }
908 if ((rs->sc_flags & RAIDF_INITED) == 0) {
909 bp->b_error = ENXIO;
910 goto fail;
911 }
912 dksc = &rs->sc_dksc;
913 raidPtr = &rs->sc_r;
914
915 /* Queue IO only */
916 if (dk_strategy_defer(dksc, bp))
917 goto done;
918
919 /* schedule the IO to happen at the next convenient time */
920 raid_wakeup(raidPtr);
921
922 done:
923 return;
924
925 fail:
926 bp->b_resid = bp->b_bcount;
927 biodone(bp);
928 }
929
930 static int
931 raid_diskstart(device_t dev, struct buf *bp)
932 {
933 struct raid_softc *rs = raidsoftc(dev);
934 RF_Raid_t *raidPtr;
935
936 raidPtr = &rs->sc_r;
937 if (!raidPtr->valid) {
938 db1_printf(("raid is not valid..\n"));
939 return ENODEV;
940 }
941
942 /* XXX */
943 bp->b_resid = 0;
944
945 return raiddoaccess(raidPtr, bp);
946 }
947
948 void
949 raiddone(RF_Raid_t *raidPtr, struct buf *bp)
950 {
951 struct raid_softc *rs;
952 struct dk_softc *dksc;
953
954 rs = raidPtr->softc;
955 dksc = &rs->sc_dksc;
956
957 dk_done(dksc, bp);
958
959 rf_lock_mutex2(raidPtr->mutex);
960 raidPtr->openings++;
961 rf_unlock_mutex2(raidPtr->mutex);
962
963 /* schedule more IO */
964 raid_wakeup(raidPtr);
965 }
966
967 /* ARGSUSED */
968 static int
969 raidread(dev_t dev, struct uio *uio, int flags)
970 {
971 int unit = raidunit(dev);
972 struct raid_softc *rs;
973
974 if ((rs = raidget(unit, false)) == NULL)
975 return ENXIO;
976
977 if ((rs->sc_flags & RAIDF_INITED) == 0)
978 return ENXIO;
979
980 return physio(raidstrategy, NULL, dev, B_READ, minphys, uio);
981
982 }
983
984 /* ARGSUSED */
985 static int
986 raidwrite(dev_t dev, struct uio *uio, int flags)
987 {
988 int unit = raidunit(dev);
989 struct raid_softc *rs;
990
991 if ((rs = raidget(unit, false)) == NULL)
992 return ENXIO;
993
994 if ((rs->sc_flags & RAIDF_INITED) == 0)
995 return ENXIO;
996
997 return physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio);
998
999 }
1000
1001 static int
1002 raid_detach_unlocked(struct raid_softc *rs)
1003 {
1004 struct dk_softc *dksc = &rs->sc_dksc;
1005 RF_Raid_t *raidPtr;
1006 int error;
1007
1008 raidPtr = &rs->sc_r;
1009
1010 if (DK_BUSY(dksc, 0) ||
1011 raidPtr->recon_in_progress != 0 ||
1012 raidPtr->parity_rewrite_in_progress != 0 ||
1013 raidPtr->copyback_in_progress != 0)
1014 return EBUSY;
1015
1016 if ((rs->sc_flags & RAIDF_INITED) == 0)
1017 return 0;
1018
1019 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1020
1021 if ((error = rf_Shutdown(raidPtr)) != 0)
1022 return error;
1023
1024 rs->sc_flags &= ~RAIDF_INITED;
1025
1026 /* Kill off any queued buffers */
1027 dk_drain(dksc);
1028 bufq_free(dksc->sc_bufq);
1029
1030 /* Detach the disk. */
1031 dkwedge_delall(&dksc->sc_dkdev);
1032 disk_detach(&dksc->sc_dkdev);
1033 disk_destroy(&dksc->sc_dkdev);
1034 dk_detach(dksc);
1035
1036 return 0;
1037 }
1038
1039 static bool
1040 rf_must_be_initialized(const struct raid_softc *rs, u_long cmd)
1041 {
1042 switch (cmd) {
1043 case RAIDFRAME_ADD_HOT_SPARE:
1044 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1045 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1046 case RAIDFRAME_CHECK_PARITY:
1047 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1048 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1049 case RAIDFRAME_CHECK_RECON_STATUS:
1050 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1051 case RAIDFRAME_COPYBACK:
1052 case RAIDFRAME_DELETE_COMPONENT:
1053 case RAIDFRAME_FAIL_DISK:
1054 case RAIDFRAME_GET_ACCTOTALS:
1055 case RAIDFRAME_GET_COMPONENT_LABEL:
1056 case RAIDFRAME_GET_INFO:
1057 case RAIDFRAME_GET_SIZE:
1058 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1059 case RAIDFRAME_INIT_LABELS:
1060 case RAIDFRAME_KEEP_ACCTOTALS:
1061 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1062 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1063 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1064 case RAIDFRAME_PARITYMAP_STATUS:
1065 case RAIDFRAME_REBUILD_IN_PLACE:
1066 case RAIDFRAME_REMOVE_HOT_SPARE:
1067 case RAIDFRAME_RESET_ACCTOTALS:
1068 case RAIDFRAME_REWRITEPARITY:
1069 case RAIDFRAME_SET_AUTOCONFIG:
1070 case RAIDFRAME_SET_COMPONENT_LABEL:
1071 case RAIDFRAME_SET_ROOT:
1072 return (rs->sc_flags & RAIDF_INITED) == 0;
1073 }
1074 return false;
1075 }
1076
1077 int
1078 rf_fail_disk(RF_Raid_t *raidPtr, struct rf_recon_req *rr)
1079 {
1080 struct rf_recon_req_internal *rrint;
1081
1082 if (raidPtr->Layout.map->faultsTolerated == 0) {
1083 /* Can't do this on a RAID 0!! */
1084 return EINVAL;
1085 }
1086
1087 if (rr->col < 0 || rr->col >= raidPtr->numCol) {
1088 /* bad column */
1089 return EINVAL;
1090 }
1091
1092 rf_lock_mutex2(raidPtr->mutex);
1093 if (raidPtr->status == rf_rs_reconstructing) {
1094 /* you can't fail a disk while we're reconstructing! */
1095 /* XXX wrong for RAID6 */
1096 goto out;
1097 }
1098 if ((raidPtr->Disks[rr->col].status == rf_ds_optimal) &&
1099 (raidPtr->numFailures > 0)) {
1100 /* some other component has failed. Let's not make
1101 things worse. XXX wrong for RAID6 */
1102 goto out;
1103 }
1104 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1105 /* Can't fail a spared disk! */
1106 goto out;
1107 }
1108 rf_unlock_mutex2(raidPtr->mutex);
1109
1110 /* make a copy of the recon request so that we don't rely on
1111 * the user's buffer */
1112 rrint = RF_Malloc(sizeof(*rrint));
1113 if (rrint == NULL)
1114 return(ENOMEM);
1115 rrint->col = rr->col;
1116 rrint->flags = rr->flags;
1117 rrint->raidPtr = raidPtr;
1118
1119 return RF_CREATE_THREAD(raidPtr->recon_thread, rf_ReconThread,
1120 rrint, "raid_recon");
1121 out:
1122 rf_unlock_mutex2(raidPtr->mutex);
1123 return EINVAL;
1124 }
1125
1126 static int
1127 rf_copyinspecificbuf(RF_Config_t *k_cfg)
1128 {
1129 /* allocate a buffer for the layout-specific data, and copy it in */
1130 if (k_cfg->layoutSpecificSize == 0)
1131 return 0;
1132
1133 if (k_cfg->layoutSpecificSize > 10000) {
1134 /* sanity check */
1135 return EINVAL;
1136 }
1137
1138 u_char *specific_buf;
1139 specific_buf = RF_Malloc(k_cfg->layoutSpecificSize);
1140 if (specific_buf == NULL)
1141 return ENOMEM;
1142
1143 int retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1144 k_cfg->layoutSpecificSize);
1145 if (retcode) {
1146 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1147 db1_printf(("%s: retcode=%d copyin.2\n", __func__, retcode));
1148 return retcode;
1149 }
1150
1151 k_cfg->layoutSpecific = specific_buf;
1152 return 0;
1153 }
1154
1155 static int
1156 rf_getConfiguration(struct raid_softc *rs, void *data, RF_Config_t **k_cfg)
1157 {
1158 RF_Config_t *u_cfg = *((RF_Config_t **) data);
1159
1160 if (rs->sc_r.valid) {
1161 /* There is a valid RAID set running on this unit! */
1162 printf("raid%d: Device already configured!\n", rs->sc_unit);
1163 return EINVAL;
1164 }
1165
1166 /* copy-in the configuration information */
1167 /* data points to a pointer to the configuration structure */
1168 *k_cfg = RF_Malloc(sizeof(**k_cfg));
1169 if (*k_cfg == NULL) {
1170 return ENOMEM;
1171 }
1172 int retcode = copyin(u_cfg, *k_cfg, sizeof(RF_Config_t));
1173 if (retcode == 0)
1174 return 0;
1175 RF_Free(*k_cfg, sizeof(RF_Config_t));
1176 db1_printf(("%s: retcode=%d copyin.1\n", __func__, retcode));
1177 rs->sc_flags |= RAIDF_SHUTDOWN;
1178 return retcode;
1179 }
1180
1181 int
1182 rf_construct(struct raid_softc *rs, RF_Config_t *k_cfg)
1183 {
1184 int retcode;
1185 RF_Raid_t *raidPtr = &rs->sc_r;
1186
1187 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1188
1189 if ((retcode = rf_copyinspecificbuf(k_cfg)) != 0)
1190 goto out;
1191
1192 /* should do some kind of sanity check on the configuration.
1193 * Store the sum of all the bytes in the last byte? */
1194
1195 /* configure the system */
1196
1197 /*
1198 * Clear the entire RAID descriptor, just to make sure
1199 * there is no stale data left in the case of a
1200 * reconfiguration
1201 */
1202 memset(raidPtr, 0, sizeof(*raidPtr));
1203 raidPtr->softc = rs;
1204 raidPtr->raidid = rs->sc_unit;
1205
1206 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1207
1208 if (retcode == 0) {
1209 /* allow this many simultaneous IO's to
1210 this RAID device */
1211 raidPtr->openings = RAIDOUTSTANDING;
1212
1213 raidinit(rs);
1214 raid_wakeup(raidPtr);
1215 rf_markalldirty(raidPtr);
1216 }
1217
1218 /* free the buffers. No return code here. */
1219 if (k_cfg->layoutSpecificSize) {
1220 RF_Free(k_cfg->layoutSpecific, k_cfg->layoutSpecificSize);
1221 }
1222 out:
1223 RF_Free(k_cfg, sizeof(RF_Config_t));
1224 if (retcode) {
1225 /*
1226 * If configuration failed, set sc_flags so that we
1227 * will detach the device when we close it.
1228 */
1229 rs->sc_flags |= RAIDF_SHUTDOWN;
1230 }
1231 return retcode;
1232 }
1233
1234 #if RF_DISABLED
1235 static int
1236 rf_set_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
1237 {
1238
1239 /* XXX check the label for valid stuff... */
1240 /* Note that some things *should not* get modified --
1241 the user should be re-initing the labels instead of
1242 trying to patch things.
1243 */
1244 #ifdef DEBUG
1245 int raidid = raidPtr->raidid;
1246 printf("raid%d: Got component label:\n", raidid);
1247 printf("raid%d: Version: %d\n", raidid, clabel->version);
1248 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1249 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1250 printf("raid%d: Column: %d\n", raidid, clabel->column);
1251 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1252 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1253 printf("raid%d: Status: %d\n", raidid, clabel->status);
1254 #endif /* DEBUG */
1255 clabel->row = 0;
1256 int column = clabel->column;
1257
1258 if ((column < 0) || (column >= raidPtr->numCol)) {
1259 return(EINVAL);
1260 }
1261
1262 /* XXX this isn't allowed to do anything for now :-) */
1263
1264 /* XXX and before it is, we need to fill in the rest
1265 of the fields!?!?!?! */
1266 memcpy(raidget_component_label(raidPtr, column),
1267 clabel, sizeof(*clabel));
1268 raidflush_component_label(raidPtr, column);
1269 return 0;
1270 }
1271 #endif
1272
1273 static int
1274 rf_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
1275 {
1276 /*
1277 we only want the serial number from
1278 the above. We get all the rest of the information
1279 from the config that was used to create this RAID
1280 set.
1281 */
1282
1283 raidPtr->serial_number = clabel->serial_number;
1284
1285 for (int column = 0; column < raidPtr->numCol; column++) {
1286 RF_RaidDisk_t *diskPtr = &raidPtr->Disks[column];
1287 if (RF_DEAD_DISK(diskPtr->status))
1288 continue;
1289 RF_ComponentLabel_t *ci_label = raidget_component_label(
1290 raidPtr, column);
1291 /* Zeroing this is important. */
1292 memset(ci_label, 0, sizeof(*ci_label));
1293 raid_init_component_label(raidPtr, ci_label);
1294 ci_label->serial_number = raidPtr->serial_number;
1295 ci_label->row = 0; /* we dont' pretend to support more */
1296 rf_component_label_set_partitionsize(ci_label,
1297 diskPtr->partitionSize);
1298 ci_label->column = column;
1299 raidflush_component_label(raidPtr, column);
1300 /* XXXjld what about the spares? */
1301 }
1302
1303 return 0;
1304 }
1305
1306 static int
1307 rf_rebuild_in_place(RF_Raid_t *raidPtr, RF_SingleComponent_t *componentPtr)
1308 {
1309
1310 if (raidPtr->Layout.map->faultsTolerated == 0) {
1311 /* Can't do this on a RAID 0!! */
1312 return EINVAL;
1313 }
1314
1315 if (raidPtr->recon_in_progress == 1) {
1316 /* a reconstruct is already in progress! */
1317 return EINVAL;
1318 }
1319
1320 RF_SingleComponent_t component;
1321 memcpy(&component, componentPtr, sizeof(RF_SingleComponent_t));
1322 component.row = 0; /* we don't support any more */
1323 int column = component.column;
1324
1325 if ((column < 0) || (column >= raidPtr->numCol)) {
1326 return EINVAL;
1327 }
1328
1329 rf_lock_mutex2(raidPtr->mutex);
1330 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1331 (raidPtr->numFailures > 0)) {
1332 /* XXX 0 above shouldn't be constant!!! */
1333 /* some component other than this has failed.
1334 Let's not make things worse than they already
1335 are... */
1336 printf("raid%d: Unable to reconstruct to disk at:\n",
1337 raidPtr->raidid);
1338 printf("raid%d: Col: %d Too many failures.\n",
1339 raidPtr->raidid, column);
1340 rf_unlock_mutex2(raidPtr->mutex);
1341 return EINVAL;
1342 }
1343
1344 if (raidPtr->Disks[column].status == rf_ds_reconstructing) {
1345 printf("raid%d: Unable to reconstruct to disk at:\n",
1346 raidPtr->raidid);
1347 printf("raid%d: Col: %d "
1348 "Reconstruction already occurring!\n",
1349 raidPtr->raidid, column);
1350
1351 rf_unlock_mutex2(raidPtr->mutex);
1352 return EINVAL;
1353 }
1354
1355 if (raidPtr->Disks[column].status == rf_ds_spared) {
1356 rf_unlock_mutex2(raidPtr->mutex);
1357 return EINVAL;
1358 }
1359
1360 rf_unlock_mutex2(raidPtr->mutex);
1361
1362 struct rf_recon_req_internal *rrint;
1363 rrint = RF_Malloc(sizeof(*rrint));
1364 if (rrint == NULL)
1365 return ENOMEM;
1366
1367 rrint->col = column;
1368 rrint->raidPtr = raidPtr;
1369
1370 return RF_CREATE_THREAD(raidPtr->recon_thread,
1371 rf_ReconstructInPlaceThread, rrint, "raid_reconip");
1372 }
1373
1374 static int
1375 rf_check_recon_status(RF_Raid_t *raidPtr, int *data)
1376 {
1377 /*
1378 * This makes no sense on a RAID 0, or if we are not reconstructing
1379 * so tell the user it's done.
1380 */
1381 if (raidPtr->Layout.map->faultsTolerated == 0 ||
1382 raidPtr->status != rf_rs_reconstructing) {
1383 *data = 100;
1384 return 0;
1385 }
1386 if (raidPtr->reconControl->numRUsTotal == 0) {
1387 *data = 0;
1388 return 0;
1389 }
1390 *data = (raidPtr->reconControl->numRUsComplete * 100
1391 / raidPtr->reconControl->numRUsTotal);
1392 return 0;
1393 }
1394
1395 static int
1396 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1397 {
1398 int unit = raidunit(dev);
1399 int part, pmask;
1400 struct raid_softc *rs;
1401 struct dk_softc *dksc;
1402 RF_Config_t *k_cfg;
1403 RF_Raid_t *raidPtr;
1404 RF_AccTotals_t *totals;
1405 RF_SingleComponent_t component;
1406 RF_DeviceConfig_t *d_cfg, *ucfgp;
1407 int retcode = 0;
1408 int column;
1409 RF_ComponentLabel_t *clabel;
1410 RF_SingleComponent_t *sparePtr,*componentPtr;
1411 int d;
1412
1413 if ((rs = raidget(unit, false)) == NULL)
1414 return ENXIO;
1415
1416 dksc = &rs->sc_dksc;
1417 raidPtr = &rs->sc_r;
1418
1419 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1420 (int) DISKPART(dev), (int) unit, cmd));
1421
1422 /* Must be initialized for these... */
1423 if (rf_must_be_initialized(rs, cmd))
1424 return ENXIO;
1425
1426 switch (cmd) {
1427 /* configure the system */
1428 case RAIDFRAME_CONFIGURE:
1429 if ((retcode = rf_getConfiguration(rs, data, &k_cfg)) != 0)
1430 return retcode;
1431 return rf_construct(rs, k_cfg);
1432
1433 /* shutdown the system */
1434 case RAIDFRAME_SHUTDOWN:
1435
1436 part = DISKPART(dev);
1437 pmask = (1 << part);
1438
1439 if ((retcode = raidlock(rs)) != 0)
1440 return retcode;
1441
1442 if (DK_BUSY(dksc, pmask) ||
1443 raidPtr->recon_in_progress != 0 ||
1444 raidPtr->parity_rewrite_in_progress != 0 ||
1445 raidPtr->copyback_in_progress != 0)
1446 retcode = EBUSY;
1447 else {
1448 /* detach and free on close */
1449 rs->sc_flags |= RAIDF_SHUTDOWN;
1450 retcode = 0;
1451 }
1452
1453 raidunlock(rs);
1454
1455 return retcode;
1456 case RAIDFRAME_GET_COMPONENT_LABEL:
1457 return rf_get_component_label(raidPtr, data);
1458
1459 #if RF_DISABLED
1460 case RAIDFRAME_SET_COMPONENT_LABEL:
1461 return rf_set_component_label(raidPtr, data);
1462 #endif
1463
1464 case RAIDFRAME_INIT_LABELS:
1465 return rf_init_component_label(raidPtr, data);
1466
1467 case RAIDFRAME_SET_AUTOCONFIG:
1468 d = rf_set_autoconfig(raidPtr, *(int *) data);
1469 printf("raid%d: New autoconfig value is: %d\n",
1470 raidPtr->raidid, d);
1471 *(int *) data = d;
1472 return retcode;
1473
1474 case RAIDFRAME_SET_ROOT:
1475 d = rf_set_rootpartition(raidPtr, *(int *) data);
1476 printf("raid%d: New rootpartition value is: %d\n",
1477 raidPtr->raidid, d);
1478 *(int *) data = d;
1479 return retcode;
1480
1481 /* initialize all parity */
1482 case RAIDFRAME_REWRITEPARITY:
1483
1484 if (raidPtr->Layout.map->faultsTolerated == 0) {
1485 /* Parity for RAID 0 is trivially correct */
1486 raidPtr->parity_good = RF_RAID_CLEAN;
1487 return 0;
1488 }
1489
1490 if (raidPtr->parity_rewrite_in_progress == 1) {
1491 /* Re-write is already in progress! */
1492 return EINVAL;
1493 }
1494
1495 return RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1496 rf_RewriteParityThread, raidPtr,"raid_parity");
1497
1498 case RAIDFRAME_ADD_HOT_SPARE:
1499 sparePtr = (RF_SingleComponent_t *) data;
1500 memcpy(&component, sparePtr, sizeof(RF_SingleComponent_t));
1501 return rf_add_hot_spare(raidPtr, &component);
1502
1503 case RAIDFRAME_REMOVE_HOT_SPARE:
1504 return retcode;
1505
1506 case RAIDFRAME_DELETE_COMPONENT:
1507 componentPtr = (RF_SingleComponent_t *)data;
1508 memcpy(&component, componentPtr, sizeof(RF_SingleComponent_t));
1509 return rf_delete_component(raidPtr, &component);
1510
1511 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1512 componentPtr = (RF_SingleComponent_t *)data;
1513 memcpy(&component, componentPtr, sizeof(RF_SingleComponent_t));
1514 return rf_incorporate_hot_spare(raidPtr, &component);
1515
1516 case RAIDFRAME_REBUILD_IN_PLACE:
1517 return rf_rebuild_in_place(raidPtr, data);
1518
1519 case RAIDFRAME_GET_INFO:
1520 ucfgp = *(RF_DeviceConfig_t **)data;
1521 d_cfg = RF_Malloc(sizeof(*d_cfg));
1522 if (d_cfg == NULL)
1523 return ENOMEM;
1524 retcode = rf_get_info(raidPtr, d_cfg);
1525 if (retcode == 0) {
1526 retcode = copyout(d_cfg, ucfgp, sizeof(*d_cfg));
1527 }
1528 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1529 return retcode;
1530
1531 case RAIDFRAME_CHECK_PARITY:
1532 *(int *) data = raidPtr->parity_good;
1533 return 0;
1534
1535 case RAIDFRAME_PARITYMAP_STATUS:
1536 if (rf_paritymap_ineligible(raidPtr))
1537 return EINVAL;
1538 rf_paritymap_status(raidPtr->parity_map, data);
1539 return 0;
1540
1541 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1542 if (rf_paritymap_ineligible(raidPtr))
1543 return EINVAL;
1544 if (raidPtr->parity_map == NULL)
1545 return ENOENT; /* ??? */
1546 if (rf_paritymap_set_params(raidPtr->parity_map, data, 1) != 0)
1547 return EINVAL;
1548 return 0;
1549
1550 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1551 if (rf_paritymap_ineligible(raidPtr))
1552 return EINVAL;
1553 *(int *) data = rf_paritymap_get_disable(raidPtr);
1554 return 0;
1555
1556 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1557 if (rf_paritymap_ineligible(raidPtr))
1558 return EINVAL;
1559 rf_paritymap_set_disable(raidPtr, *(int *)data);
1560 /* XXX should errors be passed up? */
1561 return 0;
1562
1563 case RAIDFRAME_RESET_ACCTOTALS:
1564 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1565 return 0;
1566
1567 case RAIDFRAME_GET_ACCTOTALS:
1568 totals = (RF_AccTotals_t *) data;
1569 *totals = raidPtr->acc_totals;
1570 return 0;
1571
1572 case RAIDFRAME_KEEP_ACCTOTALS:
1573 raidPtr->keep_acc_totals = *(int *)data;
1574 return 0;
1575
1576 case RAIDFRAME_GET_SIZE:
1577 *(int *) data = raidPtr->totalSectors;
1578 return 0;
1579
1580 case RAIDFRAME_FAIL_DISK:
1581 return rf_fail_disk(raidPtr, data);
1582
1583 /* invoke a copyback operation after recon on whatever disk
1584 * needs it, if any */
1585 case RAIDFRAME_COPYBACK:
1586
1587 if (raidPtr->Layout.map->faultsTolerated == 0) {
1588 /* This makes no sense on a RAID 0!! */
1589 return EINVAL;
1590 }
1591
1592 if (raidPtr->copyback_in_progress == 1) {
1593 /* Copyback is already in progress! */
1594 return EINVAL;
1595 }
1596
1597 return RF_CREATE_THREAD(raidPtr->copyback_thread,
1598 rf_CopybackThread, raidPtr, "raid_copyback");
1599
1600 /* return the percentage completion of reconstruction */
1601 case RAIDFRAME_CHECK_RECON_STATUS:
1602 return rf_check_recon_status(raidPtr, data);
1603
1604 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1605 rf_check_recon_status_ext(raidPtr, data);
1606 return 0;
1607
1608 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1609 if (raidPtr->Layout.map->faultsTolerated == 0) {
1610 /* This makes no sense on a RAID 0, so tell the
1611 user it's done. */
1612 *(int *) data = 100;
1613 return 0;
1614 }
1615 if (raidPtr->parity_rewrite_in_progress == 1) {
1616 *(int *) data = 100 *
1617 raidPtr->parity_rewrite_stripes_done /
1618 raidPtr->Layout.numStripe;
1619 } else {
1620 *(int *) data = 100;
1621 }
1622 return 0;
1623
1624 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1625 rf_check_parityrewrite_status_ext(raidPtr, data);
1626 return 0;
1627
1628 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1629 if (raidPtr->Layout.map->faultsTolerated == 0) {
1630 /* This makes no sense on a RAID 0 */
1631 *(int *) data = 100;
1632 return 0;
1633 }
1634 if (raidPtr->copyback_in_progress == 1) {
1635 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1636 raidPtr->Layout.numStripe;
1637 } else {
1638 *(int *) data = 100;
1639 }
1640 return 0;
1641
1642 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1643 rf_check_copyback_status_ext(raidPtr, data);
1644 return 0;
1645
1646 case RAIDFRAME_SET_LAST_UNIT:
1647 for (column = 0; column < raidPtr->numCol; column++)
1648 if (raidPtr->Disks[column].status != rf_ds_optimal)
1649 return EBUSY;
1650
1651 for (column = 0; column < raidPtr->numCol; column++) {
1652 clabel = raidget_component_label(raidPtr, column);
1653 clabel->last_unit = *(int *)data;
1654 raidflush_component_label(raidPtr, column);
1655 }
1656 rs->sc_cflags |= RAIDF_UNIT_CHANGED;
1657 return 0;
1658
1659 /* the sparetable daemon calls this to wait for the kernel to
1660 * need a spare table. this ioctl does not return until a
1661 * spare table is needed. XXX -- calling mpsleep here in the
1662 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1663 * -- I should either compute the spare table in the kernel,
1664 * or have a different -- XXX XXX -- interface (a different
1665 * character device) for delivering the table -- XXX */
1666 #if RF_DISABLED
1667 case RAIDFRAME_SPARET_WAIT:
1668 rf_lock_mutex2(rf_sparet_wait_mutex);
1669 while (!rf_sparet_wait_queue)
1670 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1671 RF_SparetWait_t *waitreq = rf_sparet_wait_queue;
1672 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1673 rf_unlock_mutex2(rf_sparet_wait_mutex);
1674
1675 /* structure assignment */
1676 *((RF_SparetWait_t *) data) = *waitreq;
1677
1678 RF_Free(waitreq, sizeof(*waitreq));
1679 return 0;
1680
1681 /* wakes up a process waiting on SPARET_WAIT and puts an error
1682 * code in it that will cause the dameon to exit */
1683 case RAIDFRAME_ABORT_SPARET_WAIT:
1684 waitreq = RF_Malloc(sizeof(*waitreq));
1685 waitreq->fcol = -1;
1686 rf_lock_mutex2(rf_sparet_wait_mutex);
1687 waitreq->next = rf_sparet_wait_queue;
1688 rf_sparet_wait_queue = waitreq;
1689 rf_broadcast_cond2(rf_sparet_wait_cv);
1690 rf_unlock_mutex2(rf_sparet_wait_mutex);
1691 return 0;
1692
1693 /* used by the spare table daemon to deliver a spare table
1694 * into the kernel */
1695 case RAIDFRAME_SEND_SPARET:
1696
1697 /* install the spare table */
1698 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1699
1700 /* respond to the requestor. the return status of the spare
1701 * table installation is passed in the "fcol" field */
1702 waitred = RF_Malloc(sizeof(*waitreq));
1703 waitreq->fcol = retcode;
1704 rf_lock_mutex2(rf_sparet_wait_mutex);
1705 waitreq->next = rf_sparet_resp_queue;
1706 rf_sparet_resp_queue = waitreq;
1707 rf_broadcast_cond2(rf_sparet_resp_cv);
1708 rf_unlock_mutex2(rf_sparet_wait_mutex);
1709
1710 return retcode;
1711 #endif
1712 default:
1713 /*
1714 * Don't bother trying to load compat modules
1715 * if it is not our ioctl. This is more efficient
1716 * and makes rump tests not depend on compat code
1717 */
1718 if (IOCGROUP(cmd) != 'r')
1719 break;
1720 #ifdef _LP64
1721 if ((l->l_proc->p_flag & PK_32) != 0) {
1722 module_autoload("compat_netbsd32_raid",
1723 MODULE_CLASS_EXEC);
1724 MODULE_HOOK_CALL(raidframe_netbsd32_ioctl_hook,
1725 (rs, cmd, data), enosys(), retcode);
1726 if (retcode != EPASSTHROUGH)
1727 return retcode;
1728 }
1729 #endif
1730 module_autoload("compat_raid_80", MODULE_CLASS_EXEC);
1731 MODULE_HOOK_CALL(raidframe_ioctl_80_hook,
1732 (rs, cmd, data), enosys(), retcode);
1733 if (retcode != EPASSTHROUGH)
1734 return retcode;
1735
1736 module_autoload("compat_raid_50", MODULE_CLASS_EXEC);
1737 MODULE_HOOK_CALL(raidframe_ioctl_50_hook,
1738 (rs, cmd, data), enosys(), retcode);
1739 if (retcode != EPASSTHROUGH)
1740 return retcode;
1741 break; /* fall through to the os-specific code below */
1742
1743 }
1744
1745 if (!raidPtr->valid)
1746 return EINVAL;
1747
1748 /*
1749 * Add support for "regular" device ioctls here.
1750 */
1751
1752 switch (cmd) {
1753 case DIOCGCACHE:
1754 retcode = rf_get_component_caches(raidPtr, (int *)data);
1755 break;
1756
1757 case DIOCCACHESYNC:
1758 retcode = rf_sync_component_caches(raidPtr, *(int *)data);
1759 break;
1760
1761 default:
1762 retcode = dk_ioctl(dksc, dev, cmd, data, flag, l);
1763 break;
1764 }
1765
1766 return retcode;
1767
1768 }
1769
1770
1771 /* raidinit -- complete the rest of the initialization for the
1772 RAIDframe device. */
1773
1774
1775 static void
1776 raidinit(struct raid_softc *rs)
1777 {
1778 cfdata_t cf;
1779 unsigned int unit;
1780 struct dk_softc *dksc = &rs->sc_dksc;
1781 RF_Raid_t *raidPtr = &rs->sc_r;
1782 device_t dev;
1783
1784 unit = raidPtr->raidid;
1785
1786 /* XXX doesn't check bounds. */
1787 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%u", unit);
1788
1789 /* attach the pseudo device */
1790 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1791 cf->cf_name = raid_cd.cd_name;
1792 cf->cf_atname = raid_cd.cd_name;
1793 cf->cf_unit = unit;
1794 cf->cf_fstate = FSTATE_STAR;
1795
1796 dev = config_attach_pseudo(cf);
1797 if (dev == NULL) {
1798 printf("raid%d: config_attach_pseudo failed\n",
1799 raidPtr->raidid);
1800 free(cf, M_RAIDFRAME);
1801 return;
1802 }
1803
1804 /* provide a backpointer to the real softc */
1805 raidsoftc(dev) = rs;
1806
1807 /* disk_attach actually creates space for the CPU disklabel, among
1808 * other things, so it's critical to call this *BEFORE* we try putzing
1809 * with disklabels. */
1810 dk_init(dksc, dev, DKTYPE_RAID);
1811 disk_init(&dksc->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1812
1813 /* XXX There may be a weird interaction here between this, and
1814 * protectedSectors, as used in RAIDframe. */
1815
1816 rs->sc_size = raidPtr->totalSectors;
1817
1818 /* Attach dk and disk subsystems */
1819 dk_attach(dksc);
1820 disk_attach(&dksc->sc_dkdev);
1821 rf_set_geometry(rs, raidPtr);
1822
1823 bufq_alloc(&dksc->sc_bufq, "fcfs", BUFQ_SORT_RAWBLOCK);
1824
1825 /* mark unit as usuable */
1826 rs->sc_flags |= RAIDF_INITED;
1827
1828 dkwedge_discover(&dksc->sc_dkdev);
1829 }
1830
1831 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1832 /* wake up the daemon & tell it to get us a spare table
1833 * XXX
1834 * the entries in the queues should be tagged with the raidPtr
1835 * so that in the extremely rare case that two recons happen at once,
1836 * we know for which device were requesting a spare table
1837 * XXX
1838 *
1839 * XXX This code is not currently used. GO
1840 */
1841 int
1842 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1843 {
1844 int retcode;
1845
1846 rf_lock_mutex2(rf_sparet_wait_mutex);
1847 req->next = rf_sparet_wait_queue;
1848 rf_sparet_wait_queue = req;
1849 rf_broadcast_cond2(rf_sparet_wait_cv);
1850
1851 /* mpsleep unlocks the mutex */
1852 while (!rf_sparet_resp_queue) {
1853 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
1854 }
1855 req = rf_sparet_resp_queue;
1856 rf_sparet_resp_queue = req->next;
1857 rf_unlock_mutex2(rf_sparet_wait_mutex);
1858
1859 retcode = req->fcol;
1860 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1861 * alloc'd */
1862 return retcode;
1863 }
1864 #endif
1865
1866 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1867 * bp & passes it down.
1868 * any calls originating in the kernel must use non-blocking I/O
1869 * do some extra sanity checking to return "appropriate" error values for
1870 * certain conditions (to make some standard utilities work)
1871 *
1872 * Formerly known as: rf_DoAccessKernel
1873 */
1874 void
1875 raidstart(RF_Raid_t *raidPtr)
1876 {
1877 struct raid_softc *rs;
1878 struct dk_softc *dksc;
1879
1880 rs = raidPtr->softc;
1881 dksc = &rs->sc_dksc;
1882 /* quick check to see if anything has died recently */
1883 rf_lock_mutex2(raidPtr->mutex);
1884 if (raidPtr->numNewFailures > 0) {
1885 rf_unlock_mutex2(raidPtr->mutex);
1886 rf_update_component_labels(raidPtr,
1887 RF_NORMAL_COMPONENT_UPDATE);
1888 rf_lock_mutex2(raidPtr->mutex);
1889 raidPtr->numNewFailures--;
1890 }
1891 rf_unlock_mutex2(raidPtr->mutex);
1892
1893 if ((rs->sc_flags & RAIDF_INITED) == 0) {
1894 printf("raid%d: raidstart not ready\n", raidPtr->raidid);
1895 return;
1896 }
1897
1898 dk_start(dksc, NULL);
1899 }
1900
1901 static int
1902 raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp)
1903 {
1904 RF_SectorCount_t num_blocks, pb, sum;
1905 RF_RaidAddr_t raid_addr;
1906 daddr_t blocknum;
1907 int do_async;
1908 int rc;
1909
1910 rf_lock_mutex2(raidPtr->mutex);
1911 if (raidPtr->openings == 0) {
1912 rf_unlock_mutex2(raidPtr->mutex);
1913 return EAGAIN;
1914 }
1915 rf_unlock_mutex2(raidPtr->mutex);
1916
1917 blocknum = bp->b_rawblkno;
1918
1919 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1920 (int) blocknum));
1921
1922 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1923 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1924
1925 /* *THIS* is where we adjust what block we're going to...
1926 * but DO NOT TOUCH bp->b_blkno!!! */
1927 raid_addr = blocknum;
1928
1929 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1930 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1931 sum = raid_addr + num_blocks + pb;
1932 if (1 || rf_debugKernelAccess) {
1933 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1934 (int) raid_addr, (int) sum, (int) num_blocks,
1935 (int) pb, (int) bp->b_resid));
1936 }
1937 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1938 || (sum < num_blocks) || (sum < pb)) {
1939 rc = ENOSPC;
1940 goto done;
1941 }
1942 /*
1943 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1944 */
1945
1946 if (bp->b_bcount & raidPtr->sectorMask) {
1947 rc = ENOSPC;
1948 goto done;
1949 }
1950 db1_printf(("Calling DoAccess..\n"));
1951
1952
1953 rf_lock_mutex2(raidPtr->mutex);
1954 raidPtr->openings--;
1955 rf_unlock_mutex2(raidPtr->mutex);
1956
1957 /*
1958 * Everything is async.
1959 */
1960 do_async = 1;
1961
1962 /* don't ever condition on bp->b_flags & B_WRITE.
1963 * always condition on B_READ instead */
1964
1965 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1966 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1967 do_async, raid_addr, num_blocks,
1968 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1969
1970 done:
1971 return rc;
1972 }
1973
1974 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1975
1976 int
1977 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
1978 {
1979 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1980 struct buf *bp;
1981
1982 req->queue = queue;
1983 bp = req->bp;
1984
1985 switch (req->type) {
1986 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1987 /* XXX need to do something extra here.. */
1988 /* I'm leaving this in, as I've never actually seen it used,
1989 * and I'd like folks to report it... GO */
1990 printf("%s: WAKEUP CALLED\n", __func__);
1991 queue->numOutstanding++;
1992
1993 bp->b_flags = 0;
1994 bp->b_private = req;
1995
1996 KernelWakeupFunc(bp);
1997 break;
1998
1999 case RF_IO_TYPE_READ:
2000 case RF_IO_TYPE_WRITE:
2001 #if RF_ACC_TRACE > 0
2002 if (req->tracerec) {
2003 RF_ETIMER_START(req->tracerec->timer);
2004 }
2005 #endif
2006 InitBP(bp, queue->rf_cinfo->ci_vp,
2007 op, queue->rf_cinfo->ci_dev,
2008 req->sectorOffset, req->numSector,
2009 req->buf, KernelWakeupFunc, (void *) req,
2010 queue->raidPtr->logBytesPerSector);
2011
2012 if (rf_debugKernelAccess) {
2013 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2014 (long) bp->b_blkno));
2015 }
2016 queue->numOutstanding++;
2017 queue->last_deq_sector = req->sectorOffset;
2018 /* acc wouldn't have been let in if there were any pending
2019 * reqs at any other priority */
2020 queue->curPriority = req->priority;
2021
2022 db1_printf(("Going for %c to unit %d col %d\n",
2023 req->type, queue->raidPtr->raidid,
2024 queue->col));
2025 db1_printf(("sector %d count %d (%d bytes) %d\n",
2026 (int) req->sectorOffset, (int) req->numSector,
2027 (int) (req->numSector <<
2028 queue->raidPtr->logBytesPerSector),
2029 (int) queue->raidPtr->logBytesPerSector));
2030
2031 /*
2032 * XXX: drop lock here since this can block at
2033 * least with backing SCSI devices. Retake it
2034 * to minimize fuss with calling interfaces.
2035 */
2036
2037 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2038 bdev_strategy(bp);
2039 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2040 break;
2041
2042 default:
2043 panic("bad req->type in rf_DispatchKernelIO");
2044 }
2045 db1_printf(("Exiting from DispatchKernelIO\n"));
2046
2047 return 0;
2048 }
2049 /* this is the callback function associated with a I/O invoked from
2050 kernel code.
2051 */
2052 static void
2053 KernelWakeupFunc(struct buf *bp)
2054 {
2055 RF_DiskQueueData_t *req = NULL;
2056 RF_DiskQueue_t *queue;
2057
2058 db1_printf(("recovering the request queue:\n"));
2059
2060 req = bp->b_private;
2061
2062 queue = (RF_DiskQueue_t *) req->queue;
2063
2064 rf_lock_mutex2(queue->raidPtr->iodone_lock);
2065
2066 #if RF_ACC_TRACE > 0
2067 if (req->tracerec) {
2068 RF_ETIMER_STOP(req->tracerec->timer);
2069 RF_ETIMER_EVAL(req->tracerec->timer);
2070 rf_lock_mutex2(rf_tracing_mutex);
2071 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2072 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2073 req->tracerec->num_phys_ios++;
2074 rf_unlock_mutex2(rf_tracing_mutex);
2075 }
2076 #endif
2077
2078 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2079 * ballistic, and mark the component as hosed... */
2080
2081 if (bp->b_error != 0) {
2082 /* Mark the disk as dead */
2083 /* but only mark it once... */
2084 /* and only if it wouldn't leave this RAID set
2085 completely broken */
2086 if (((queue->raidPtr->Disks[queue->col].status ==
2087 rf_ds_optimal) ||
2088 (queue->raidPtr->Disks[queue->col].status ==
2089 rf_ds_used_spare)) &&
2090 (queue->raidPtr->numFailures <
2091 queue->raidPtr->Layout.map->faultsTolerated)) {
2092 printf("raid%d: IO Error (%d). Marking %s as failed.\n",
2093 queue->raidPtr->raidid,
2094 bp->b_error,
2095 queue->raidPtr->Disks[queue->col].devname);
2096 queue->raidPtr->Disks[queue->col].status =
2097 rf_ds_failed;
2098 queue->raidPtr->status = rf_rs_degraded;
2099 queue->raidPtr->numFailures++;
2100 queue->raidPtr->numNewFailures++;
2101 } else { /* Disk is already dead... */
2102 /* printf("Disk already marked as dead!\n"); */
2103 }
2104
2105 }
2106
2107 /* Fill in the error value */
2108 req->error = bp->b_error;
2109
2110 /* Drop this one on the "finished" queue... */
2111 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2112
2113 /* Let the raidio thread know there is work to be done. */
2114 rf_signal_cond2(queue->raidPtr->iodone_cv);
2115
2116 rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2117 }
2118
2119
2120 /*
2121 * initialize a buf structure for doing an I/O in the kernel.
2122 */
2123 static void
2124 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2125 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2126 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector)
2127 {
2128 bp->b_flags = rw_flag | (bp->b_flags & rf_b_pass);
2129 bp->b_oflags = 0;
2130 bp->b_cflags = 0;
2131 bp->b_bcount = numSect << logBytesPerSector;
2132 bp->b_bufsize = bp->b_bcount;
2133 bp->b_error = 0;
2134 bp->b_dev = dev;
2135 bp->b_data = bf;
2136 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2137 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2138 if (bp->b_bcount == 0) {
2139 panic("bp->b_bcount is zero in InitBP!!");
2140 }
2141 bp->b_iodone = cbFunc;
2142 bp->b_private = cbArg;
2143 }
2144
2145 /*
2146 * Wait interruptibly for an exclusive lock.
2147 *
2148 * XXX
2149 * Several drivers do this; it should be abstracted and made MP-safe.
2150 * (Hmm... where have we seen this warning before :-> GO )
2151 */
2152 static int
2153 raidlock(struct raid_softc *rs)
2154 {
2155 int error;
2156
2157 error = 0;
2158 mutex_enter(&rs->sc_mutex);
2159 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2160 rs->sc_flags |= RAIDF_WANTED;
2161 error = cv_wait_sig(&rs->sc_cv, &rs->sc_mutex);
2162 if (error != 0)
2163 goto done;
2164 }
2165 rs->sc_flags |= RAIDF_LOCKED;
2166 done:
2167 mutex_exit(&rs->sc_mutex);
2168 return error;
2169 }
2170 /*
2171 * Unlock and wake up any waiters.
2172 */
2173 static void
2174 raidunlock(struct raid_softc *rs)
2175 {
2176
2177 mutex_enter(&rs->sc_mutex);
2178 rs->sc_flags &= ~RAIDF_LOCKED;
2179 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2180 rs->sc_flags &= ~RAIDF_WANTED;
2181 cv_broadcast(&rs->sc_cv);
2182 }
2183 mutex_exit(&rs->sc_mutex);
2184 }
2185
2186
2187 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2188 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2189 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2190
2191 static daddr_t
2192 rf_component_info_offset(void)
2193 {
2194
2195 return RF_COMPONENT_INFO_OFFSET;
2196 }
2197
2198 static daddr_t
2199 rf_component_info_size(unsigned secsize)
2200 {
2201 daddr_t info_size;
2202
2203 KASSERT(secsize);
2204 if (secsize > RF_COMPONENT_INFO_SIZE)
2205 info_size = secsize;
2206 else
2207 info_size = RF_COMPONENT_INFO_SIZE;
2208
2209 return info_size;
2210 }
2211
2212 static daddr_t
2213 rf_parity_map_offset(RF_Raid_t *raidPtr)
2214 {
2215 daddr_t map_offset;
2216
2217 KASSERT(raidPtr->bytesPerSector);
2218 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2219 map_offset = raidPtr->bytesPerSector;
2220 else
2221 map_offset = RF_COMPONENT_INFO_SIZE;
2222 map_offset += rf_component_info_offset();
2223
2224 return map_offset;
2225 }
2226
2227 static daddr_t
2228 rf_parity_map_size(RF_Raid_t *raidPtr)
2229 {
2230 daddr_t map_size;
2231
2232 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2233 map_size = raidPtr->bytesPerSector;
2234 else
2235 map_size = RF_PARITY_MAP_SIZE;
2236
2237 return map_size;
2238 }
2239
2240 int
2241 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2242 {
2243 RF_ComponentLabel_t *clabel;
2244
2245 clabel = raidget_component_label(raidPtr, col);
2246 clabel->clean = RF_RAID_CLEAN;
2247 raidflush_component_label(raidPtr, col);
2248 return(0);
2249 }
2250
2251
2252 int
2253 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2254 {
2255 RF_ComponentLabel_t *clabel;
2256
2257 clabel = raidget_component_label(raidPtr, col);
2258 clabel->clean = RF_RAID_DIRTY;
2259 raidflush_component_label(raidPtr, col);
2260 return(0);
2261 }
2262
2263 int
2264 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2265 {
2266 KASSERT(raidPtr->bytesPerSector);
2267
2268 return raidread_component_label(raidPtr->bytesPerSector,
2269 raidPtr->Disks[col].dev,
2270 raidPtr->raid_cinfo[col].ci_vp,
2271 &raidPtr->raid_cinfo[col].ci_label);
2272 }
2273
2274 RF_ComponentLabel_t *
2275 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2276 {
2277 return &raidPtr->raid_cinfo[col].ci_label;
2278 }
2279
2280 int
2281 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2282 {
2283 RF_ComponentLabel_t *label;
2284
2285 label = &raidPtr->raid_cinfo[col].ci_label;
2286 label->mod_counter = raidPtr->mod_counter;
2287 #ifndef RF_NO_PARITY_MAP
2288 label->parity_map_modcount = label->mod_counter;
2289 #endif
2290 return raidwrite_component_label(raidPtr->bytesPerSector,
2291 raidPtr->Disks[col].dev,
2292 raidPtr->raid_cinfo[col].ci_vp, label);
2293 }
2294
2295 /*
2296 * Swap the label endianness.
2297 *
2298 * Everything in the component label is 4-byte-swapped except the version,
2299 * which is kept in the byte-swapped version at all times, and indicates
2300 * for the writer that a swap is necessary.
2301 *
2302 * For reads it is expected that out_label == clabel, but writes expect
2303 * separate labels so only the re-swapped label is written out to disk,
2304 * leaving the swapped-except-version internally.
2305 *
2306 * Only support swapping label version 2.
2307 */
2308 static void
2309 rf_swap_label(RF_ComponentLabel_t *clabel, RF_ComponentLabel_t *out_label)
2310 {
2311 int *in, *out, *in_last;
2312
2313 KASSERT(clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION));
2314
2315 /* Don't swap the label, but do copy it. */
2316 out_label->version = clabel->version;
2317
2318 in = &clabel->serial_number;
2319 in_last = &clabel->future_use2[42];
2320 out = &out_label->serial_number;
2321
2322 for (; in < in_last; in++, out++)
2323 *out = bswap32(*in);
2324 }
2325
2326 static int
2327 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2328 RF_ComponentLabel_t *clabel)
2329 {
2330 int error;
2331
2332 error = raidread_component_area(dev, b_vp, clabel,
2333 sizeof(RF_ComponentLabel_t),
2334 rf_component_info_offset(),
2335 rf_component_info_size(secsize));
2336
2337 if (error == 0 &&
2338 clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) {
2339 rf_swap_label(clabel, clabel);
2340 }
2341
2342 return error;
2343 }
2344
2345 /* ARGSUSED */
2346 static int
2347 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2348 size_t msize, daddr_t offset, daddr_t dsize)
2349 {
2350 struct buf *bp;
2351 int error;
2352
2353 /* XXX should probably ensure that we don't try to do this if
2354 someone has changed rf_protected_sectors. */
2355
2356 if (b_vp == NULL) {
2357 /* For whatever reason, this component is not valid.
2358 Don't try to read a component label from it. */
2359 return(EINVAL);
2360 }
2361
2362 /* get a block of the appropriate size... */
2363 bp = geteblk((int)dsize);
2364 bp->b_dev = dev;
2365
2366 /* get our ducks in a row for the read */
2367 bp->b_blkno = offset / DEV_BSIZE;
2368 bp->b_bcount = dsize;
2369 bp->b_flags |= B_READ;
2370 bp->b_resid = dsize;
2371
2372 bdev_strategy(bp);
2373 error = biowait(bp);
2374
2375 if (!error) {
2376 memcpy(data, bp->b_data, msize);
2377 }
2378
2379 brelse(bp, 0);
2380 return(error);
2381 }
2382
2383 static int
2384 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2385 RF_ComponentLabel_t *clabel)
2386 {
2387 RF_ComponentLabel_t *clabel_write = clabel;
2388 RF_ComponentLabel_t lclabel;
2389 int error;
2390
2391 if (clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) {
2392 clabel_write = &lclabel;
2393 rf_swap_label(clabel, clabel_write);
2394 }
2395 error = raidwrite_component_area(dev, b_vp, clabel_write,
2396 sizeof(RF_ComponentLabel_t),
2397 rf_component_info_offset(),
2398 rf_component_info_size(secsize), 0);
2399
2400 return error;
2401 }
2402
2403 /* ARGSUSED */
2404 static int
2405 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2406 size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2407 {
2408 struct buf *bp;
2409 int error;
2410
2411 /* get a block of the appropriate size... */
2412 bp = geteblk((int)dsize);
2413 bp->b_dev = dev;
2414
2415 /* get our ducks in a row for the write */
2416 bp->b_blkno = offset / DEV_BSIZE;
2417 bp->b_bcount = dsize;
2418 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2419 bp->b_resid = dsize;
2420
2421 memset(bp->b_data, 0, dsize);
2422 memcpy(bp->b_data, data, msize);
2423
2424 bdev_strategy(bp);
2425 if (asyncp)
2426 return 0;
2427 error = biowait(bp);
2428 brelse(bp, 0);
2429 if (error) {
2430 #if 1
2431 printf("Failed to write RAID component info!\n");
2432 #endif
2433 }
2434
2435 return(error);
2436 }
2437
2438 void
2439 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2440 {
2441 int c;
2442
2443 for (c = 0; c < raidPtr->numCol; c++) {
2444 /* Skip dead disks. */
2445 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2446 continue;
2447 /* XXXjld: what if an error occurs here? */
2448 raidwrite_component_area(raidPtr->Disks[c].dev,
2449 raidPtr->raid_cinfo[c].ci_vp, map,
2450 RF_PARITYMAP_NBYTE,
2451 rf_parity_map_offset(raidPtr),
2452 rf_parity_map_size(raidPtr), 0);
2453 }
2454 }
2455
2456 void
2457 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2458 {
2459 struct rf_paritymap_ondisk tmp;
2460 int c,first;
2461
2462 first=1;
2463 for (c = 0; c < raidPtr->numCol; c++) {
2464 /* Skip dead disks. */
2465 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2466 continue;
2467 raidread_component_area(raidPtr->Disks[c].dev,
2468 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2469 RF_PARITYMAP_NBYTE,
2470 rf_parity_map_offset(raidPtr),
2471 rf_parity_map_size(raidPtr));
2472 if (first) {
2473 memcpy(map, &tmp, sizeof(*map));
2474 first = 0;
2475 } else {
2476 rf_paritymap_merge(map, &tmp);
2477 }
2478 }
2479 }
2480
2481 void
2482 rf_markalldirty(RF_Raid_t *raidPtr)
2483 {
2484 RF_ComponentLabel_t *clabel;
2485 int sparecol;
2486 int c;
2487 int j;
2488 int scol = -1;
2489
2490 raidPtr->mod_counter++;
2491 for (c = 0; c < raidPtr->numCol; c++) {
2492 /* we don't want to touch (at all) a disk that has
2493 failed */
2494 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2495 clabel = raidget_component_label(raidPtr, c);
2496 if (clabel->status == rf_ds_spared) {
2497 /* XXX do something special...
2498 but whatever you do, don't
2499 try to access it!! */
2500 } else {
2501 raidmarkdirty(raidPtr, c);
2502 }
2503 }
2504 }
2505
2506 for( c = 0; c < raidPtr->numSpare ; c++) {
2507 sparecol = raidPtr->numCol + c;
2508 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2509 /*
2510
2511 we claim this disk is "optimal" if it's
2512 rf_ds_used_spare, as that means it should be
2513 directly substitutable for the disk it replaced.
2514 We note that too...
2515
2516 */
2517
2518 for(j=0;j<raidPtr->numCol;j++) {
2519 if (raidPtr->Disks[j].spareCol == sparecol) {
2520 scol = j;
2521 break;
2522 }
2523 }
2524
2525 clabel = raidget_component_label(raidPtr, sparecol);
2526 /* make sure status is noted */
2527
2528 raid_init_component_label(raidPtr, clabel);
2529
2530 clabel->row = 0;
2531 clabel->column = scol;
2532 /* Note: we *don't* change status from rf_ds_used_spare
2533 to rf_ds_optimal */
2534 /* clabel.status = rf_ds_optimal; */
2535
2536 raidmarkdirty(raidPtr, sparecol);
2537 }
2538 }
2539 }
2540
2541
2542 void
2543 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2544 {
2545 RF_ComponentLabel_t *clabel;
2546 int sparecol;
2547 int c;
2548 int j;
2549 int scol;
2550 struct raid_softc *rs = raidPtr->softc;
2551
2552 scol = -1;
2553
2554 /* XXX should do extra checks to make sure things really are clean,
2555 rather than blindly setting the clean bit... */
2556
2557 raidPtr->mod_counter++;
2558
2559 for (c = 0; c < raidPtr->numCol; c++) {
2560 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2561 clabel = raidget_component_label(raidPtr, c);
2562 /* make sure status is noted */
2563 clabel->status = rf_ds_optimal;
2564
2565 /* note what unit we are configured as */
2566 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2567 clabel->last_unit = raidPtr->raidid;
2568
2569 raidflush_component_label(raidPtr, c);
2570 if (final == RF_FINAL_COMPONENT_UPDATE) {
2571 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2572 raidmarkclean(raidPtr, c);
2573 }
2574 }
2575 }
2576 /* else we don't touch it.. */
2577 }
2578
2579 for( c = 0; c < raidPtr->numSpare ; c++) {
2580 sparecol = raidPtr->numCol + c;
2581 /* Need to ensure that the reconstruct actually completed! */
2582 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2583 /*
2584
2585 we claim this disk is "optimal" if it's
2586 rf_ds_used_spare, as that means it should be
2587 directly substitutable for the disk it replaced.
2588 We note that too...
2589
2590 */
2591
2592 for(j=0;j<raidPtr->numCol;j++) {
2593 if (raidPtr->Disks[j].spareCol == sparecol) {
2594 scol = j;
2595 break;
2596 }
2597 }
2598
2599 /* XXX shouldn't *really* need this... */
2600 clabel = raidget_component_label(raidPtr, sparecol);
2601 /* make sure status is noted */
2602
2603 raid_init_component_label(raidPtr, clabel);
2604
2605 clabel->column = scol;
2606 clabel->status = rf_ds_optimal;
2607 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2608 clabel->last_unit = raidPtr->raidid;
2609
2610 raidflush_component_label(raidPtr, sparecol);
2611 if (final == RF_FINAL_COMPONENT_UPDATE) {
2612 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2613 raidmarkclean(raidPtr, sparecol);
2614 }
2615 }
2616 }
2617 }
2618 }
2619
2620 void
2621 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2622 {
2623
2624 if (vp != NULL) {
2625 if (auto_configured == 1) {
2626 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2627 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2628 vput(vp);
2629
2630 } else {
2631 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2632 }
2633 }
2634 }
2635
2636
2637 void
2638 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2639 {
2640 int r,c;
2641 struct vnode *vp;
2642 int acd;
2643
2644
2645 /* We take this opportunity to close the vnodes like we should.. */
2646
2647 for (c = 0; c < raidPtr->numCol; c++) {
2648 vp = raidPtr->raid_cinfo[c].ci_vp;
2649 acd = raidPtr->Disks[c].auto_configured;
2650 rf_close_component(raidPtr, vp, acd);
2651 raidPtr->raid_cinfo[c].ci_vp = NULL;
2652 raidPtr->Disks[c].auto_configured = 0;
2653 }
2654
2655 for (r = 0; r < raidPtr->numSpare; r++) {
2656 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2657 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2658 rf_close_component(raidPtr, vp, acd);
2659 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2660 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2661 }
2662 }
2663
2664
2665 static void
2666 rf_ReconThread(struct rf_recon_req_internal *req)
2667 {
2668 int s;
2669 RF_Raid_t *raidPtr;
2670
2671 s = splbio();
2672 raidPtr = (RF_Raid_t *) req->raidPtr;
2673 raidPtr->recon_in_progress = 1;
2674
2675 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2676 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2677
2678 RF_Free(req, sizeof(*req));
2679
2680 raidPtr->recon_in_progress = 0;
2681 splx(s);
2682
2683 /* That's all... */
2684 kthread_exit(0); /* does not return */
2685 }
2686
2687 static void
2688 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2689 {
2690 int retcode;
2691 int s;
2692
2693 raidPtr->parity_rewrite_stripes_done = 0;
2694 raidPtr->parity_rewrite_in_progress = 1;
2695 s = splbio();
2696 retcode = rf_RewriteParity(raidPtr);
2697 splx(s);
2698 if (retcode) {
2699 printf("raid%d: Error re-writing parity (%d)!\n",
2700 raidPtr->raidid, retcode);
2701 } else {
2702 /* set the clean bit! If we shutdown correctly,
2703 the clean bit on each component label will get
2704 set */
2705 raidPtr->parity_good = RF_RAID_CLEAN;
2706 }
2707 raidPtr->parity_rewrite_in_progress = 0;
2708
2709 /* Anyone waiting for us to stop? If so, inform them... */
2710 if (raidPtr->waitShutdown) {
2711 rf_lock_mutex2(raidPtr->rad_lock);
2712 cv_broadcast(&raidPtr->parity_rewrite_cv);
2713 rf_unlock_mutex2(raidPtr->rad_lock);
2714 }
2715
2716 /* That's all... */
2717 kthread_exit(0); /* does not return */
2718 }
2719
2720
2721 static void
2722 rf_CopybackThread(RF_Raid_t *raidPtr)
2723 {
2724 int s;
2725
2726 raidPtr->copyback_in_progress = 1;
2727 s = splbio();
2728 rf_CopybackReconstructedData(raidPtr);
2729 splx(s);
2730 raidPtr->copyback_in_progress = 0;
2731
2732 /* That's all... */
2733 kthread_exit(0); /* does not return */
2734 }
2735
2736
2737 static void
2738 rf_ReconstructInPlaceThread(struct rf_recon_req_internal *req)
2739 {
2740 int s;
2741 RF_Raid_t *raidPtr;
2742
2743 s = splbio();
2744 raidPtr = req->raidPtr;
2745 raidPtr->recon_in_progress = 1;
2746 rf_ReconstructInPlace(raidPtr, req->col);
2747 RF_Free(req, sizeof(*req));
2748 raidPtr->recon_in_progress = 0;
2749 splx(s);
2750
2751 /* That's all... */
2752 kthread_exit(0); /* does not return */
2753 }
2754
2755 static RF_AutoConfig_t *
2756 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
2757 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
2758 unsigned secsize)
2759 {
2760 int good_one = 0;
2761 RF_ComponentLabel_t *clabel;
2762 RF_AutoConfig_t *ac;
2763
2764 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_WAITOK);
2765
2766 if (!raidread_component_label(secsize, dev, vp, clabel)) {
2767 /* Got the label. Does it look reasonable? */
2768 if (rf_reasonable_label(clabel, numsecs) &&
2769 (rf_component_label_partitionsize(clabel) <= size)) {
2770 #ifdef DEBUG
2771 printf("Component on: %s: %llu\n",
2772 cname, (unsigned long long)size);
2773 rf_print_component_label(clabel);
2774 #endif
2775 /* if it's reasonable, add it, else ignore it. */
2776 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
2777 M_WAITOK);
2778 strlcpy(ac->devname, cname, sizeof(ac->devname));
2779 ac->dev = dev;
2780 ac->vp = vp;
2781 ac->clabel = clabel;
2782 ac->next = ac_list;
2783 ac_list = ac;
2784 good_one = 1;
2785 }
2786 }
2787 if (!good_one) {
2788 /* cleanup */
2789 free(clabel, M_RAIDFRAME);
2790 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2791 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2792 vput(vp);
2793 }
2794 return ac_list;
2795 }
2796
2797 static RF_AutoConfig_t *
2798 rf_find_raid_components(void)
2799 {
2800 struct vnode *vp;
2801 struct disklabel label;
2802 device_t dv;
2803 deviter_t di;
2804 dev_t dev;
2805 int bmajor, bminor, wedge, rf_part_found;
2806 int error;
2807 int i;
2808 RF_AutoConfig_t *ac_list;
2809 uint64_t numsecs;
2810 unsigned secsize;
2811 int dowedges;
2812
2813 /* initialize the AutoConfig list */
2814 ac_list = NULL;
2815
2816 /*
2817 * we begin by trolling through *all* the devices on the system *twice*
2818 * first we scan for wedges, second for other devices. This avoids
2819 * using a raw partition instead of a wedge that covers the whole disk
2820 */
2821
2822 for (dowedges=1; dowedges>=0; --dowedges) {
2823 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
2824 dv = deviter_next(&di)) {
2825
2826 /* we are only interested in disks */
2827 if (device_class(dv) != DV_DISK)
2828 continue;
2829
2830 /* we don't care about floppies */
2831 if (device_is_a(dv, "fd")) {
2832 continue;
2833 }
2834
2835 /* we don't care about CDs. */
2836 if (device_is_a(dv, "cd")) {
2837 continue;
2838 }
2839
2840 /* we don't care about md. */
2841 if (device_is_a(dv, "md")) {
2842 continue;
2843 }
2844
2845 /* hdfd is the Atari/Hades floppy driver */
2846 if (device_is_a(dv, "hdfd")) {
2847 continue;
2848 }
2849
2850 /* fdisa is the Atari/Milan floppy driver */
2851 if (device_is_a(dv, "fdisa")) {
2852 continue;
2853 }
2854
2855 /* we don't care about spiflash */
2856 if (device_is_a(dv, "spiflash")) {
2857 continue;
2858 }
2859
2860 /* are we in the wedges pass ? */
2861 wedge = device_is_a(dv, "dk");
2862 if (wedge != dowedges) {
2863 continue;
2864 }
2865
2866 /* need to find the device_name_to_block_device_major stuff */
2867 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
2868
2869 rf_part_found = 0; /*No raid partition as yet*/
2870
2871 /* get a vnode for the raw partition of this disk */
2872 bminor = minor(device_unit(dv));
2873 dev = wedge ? makedev(bmajor, bminor) :
2874 MAKEDISKDEV(bmajor, bminor, RAW_PART);
2875 if (bdevvp(dev, &vp))
2876 panic("RAID can't alloc vnode");
2877
2878 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2879 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
2880
2881 if (error) {
2882 /* "Who cares." Continue looking
2883 for something that exists*/
2884 vput(vp);
2885 continue;
2886 }
2887
2888 error = getdisksize(vp, &numsecs, &secsize);
2889 if (error) {
2890 /*
2891 * Pseudo devices like vnd and cgd can be
2892 * opened but may still need some configuration.
2893 * Ignore these quietly.
2894 */
2895 if (error != ENXIO)
2896 printf("RAIDframe: can't get disk size"
2897 " for dev %s (%d)\n",
2898 device_xname(dv), error);
2899 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2900 vput(vp);
2901 continue;
2902 }
2903 if (wedge) {
2904 struct dkwedge_info dkw;
2905 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
2906 NOCRED);
2907 if (error) {
2908 printf("RAIDframe: can't get wedge info for "
2909 "dev %s (%d)\n", device_xname(dv), error);
2910 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2911 vput(vp);
2912 continue;
2913 }
2914
2915 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
2916 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2917 vput(vp);
2918 continue;
2919 }
2920
2921 VOP_UNLOCK(vp);
2922 ac_list = rf_get_component(ac_list, dev, vp,
2923 device_xname(dv), dkw.dkw_size, numsecs, secsize);
2924 rf_part_found = 1; /*There is a raid component on this disk*/
2925 continue;
2926 }
2927
2928 /* Ok, the disk exists. Go get the disklabel. */
2929 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
2930 if (error) {
2931 /*
2932 * XXX can't happen - open() would
2933 * have errored out (or faked up one)
2934 */
2935 if (error != ENOTTY)
2936 printf("RAIDframe: can't get label for dev "
2937 "%s (%d)\n", device_xname(dv), error);
2938 }
2939
2940 /* don't need this any more. We'll allocate it again
2941 a little later if we really do... */
2942 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2943 vput(vp);
2944
2945 if (error)
2946 continue;
2947
2948 rf_part_found = 0; /*No raid partitions yet*/
2949 for (i = 0; i < label.d_npartitions; i++) {
2950 char cname[sizeof(ac_list->devname)];
2951
2952 /* We only support partitions marked as RAID */
2953 if (label.d_partitions[i].p_fstype != FS_RAID)
2954 continue;
2955
2956 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
2957 if (bdevvp(dev, &vp))
2958 panic("RAID can't alloc vnode");
2959
2960 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2961 error = VOP_OPEN(vp, FREAD, NOCRED);
2962 if (error) {
2963 /* Whatever... */
2964 vput(vp);
2965 continue;
2966 }
2967 VOP_UNLOCK(vp);
2968 snprintf(cname, sizeof(cname), "%s%c",
2969 device_xname(dv), 'a' + i);
2970 ac_list = rf_get_component(ac_list, dev, vp, cname,
2971 label.d_partitions[i].p_size, numsecs, secsize);
2972 rf_part_found = 1; /*There is at least one raid partition on this disk*/
2973 }
2974
2975 /*
2976 *If there is no raid component on this disk, either in a
2977 *disklabel or inside a wedge, check the raw partition as well,
2978 *as it is possible to configure raid components on raw disk
2979 *devices.
2980 */
2981
2982 if (!rf_part_found) {
2983 char cname[sizeof(ac_list->devname)];
2984
2985 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
2986 if (bdevvp(dev, &vp))
2987 panic("RAID can't alloc vnode");
2988
2989 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2990
2991 error = VOP_OPEN(vp, FREAD, NOCRED);
2992 if (error) {
2993 /* Whatever... */
2994 vput(vp);
2995 continue;
2996 }
2997 VOP_UNLOCK(vp);
2998 snprintf(cname, sizeof(cname), "%s%c",
2999 device_xname(dv), 'a' + RAW_PART);
3000 ac_list = rf_get_component(ac_list, dev, vp, cname,
3001 label.d_partitions[RAW_PART].p_size, numsecs, secsize);
3002 }
3003 }
3004 deviter_release(&di);
3005 }
3006 return ac_list;
3007 }
3008
3009 int
3010 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3011 {
3012
3013 if ((clabel->version==RF_COMPONENT_LABEL_VERSION_1 ||
3014 clabel->version==RF_COMPONENT_LABEL_VERSION ||
3015 clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) &&
3016 (clabel->clean == RF_RAID_CLEAN ||
3017 clabel->clean == RF_RAID_DIRTY) &&
3018 clabel->row >=0 &&
3019 clabel->column >= 0 &&
3020 clabel->num_rows > 0 &&
3021 clabel->num_columns > 0 &&
3022 clabel->row < clabel->num_rows &&
3023 clabel->column < clabel->num_columns &&
3024 clabel->blockSize > 0 &&
3025 /*
3026 * numBlocksHi may contain garbage, but it is ok since
3027 * the type is unsigned. If it is really garbage,
3028 * rf_fix_old_label_size() will fix it.
3029 */
3030 rf_component_label_numblocks(clabel) > 0) {
3031 /*
3032 * label looks reasonable enough...
3033 * let's make sure it has no old garbage.
3034 */
3035 if (numsecs)
3036 rf_fix_old_label_size(clabel, numsecs);
3037 return(1);
3038 }
3039 return(0);
3040 }
3041
3042
3043 /*
3044 * For reasons yet unknown, some old component labels have garbage in
3045 * the newer numBlocksHi region, and this causes lossage. Since those
3046 * disks will also have numsecs set to less than 32 bits of sectors,
3047 * we can determine when this corruption has occurred, and fix it.
3048 *
3049 * The exact same problem, with the same unknown reason, happens to
3050 * the partitionSizeHi member as well.
3051 */
3052 static void
3053 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3054 {
3055
3056 if (numsecs < ((uint64_t)1 << 32)) {
3057 if (clabel->numBlocksHi) {
3058 printf("WARNING: total sectors < 32 bits, yet "
3059 "numBlocksHi set\n"
3060 "WARNING: resetting numBlocksHi to zero.\n");
3061 clabel->numBlocksHi = 0;
3062 }
3063
3064 if (clabel->partitionSizeHi) {
3065 printf("WARNING: total sectors < 32 bits, yet "
3066 "partitionSizeHi set\n"
3067 "WARNING: resetting partitionSizeHi to zero.\n");
3068 clabel->partitionSizeHi = 0;
3069 }
3070 }
3071 }
3072
3073
3074 #ifdef DEBUG
3075 void
3076 rf_print_component_label(RF_ComponentLabel_t *clabel)
3077 {
3078 uint64_t numBlocks;
3079 static const char *rp[] = {
3080 "No", "Force", "Soft", "*invalid*"
3081 };
3082
3083
3084 numBlocks = rf_component_label_numblocks(clabel);
3085
3086 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3087 clabel->row, clabel->column,
3088 clabel->num_rows, clabel->num_columns);
3089 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3090 clabel->version, clabel->serial_number,
3091 clabel->mod_counter);
3092 printf(" Clean: %s Status: %d\n",
3093 clabel->clean ? "Yes" : "No", clabel->status);
3094 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3095 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3096 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3097 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3098 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3099 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]);
3100 printf(" Last configured as: raid%d\n", clabel->last_unit);
3101 #if 0
3102 printf(" Config order: %d\n", clabel->config_order);
3103 #endif
3104
3105 }
3106 #endif
3107
3108 static RF_ConfigSet_t *
3109 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3110 {
3111 RF_AutoConfig_t *ac;
3112 RF_ConfigSet_t *config_sets;
3113 RF_ConfigSet_t *cset;
3114 RF_AutoConfig_t *ac_next;
3115
3116
3117 config_sets = NULL;
3118
3119 /* Go through the AutoConfig list, and figure out which components
3120 belong to what sets. */
3121 ac = ac_list;
3122 while(ac!=NULL) {
3123 /* we're going to putz with ac->next, so save it here
3124 for use at the end of the loop */
3125 ac_next = ac->next;
3126
3127 if (config_sets == NULL) {
3128 /* will need at least this one... */
3129 config_sets = malloc(sizeof(RF_ConfigSet_t),
3130 M_RAIDFRAME, M_WAITOK);
3131 /* this one is easy :) */
3132 config_sets->ac = ac;
3133 config_sets->next = NULL;
3134 config_sets->rootable = 0;
3135 ac->next = NULL;
3136 } else {
3137 /* which set does this component fit into? */
3138 cset = config_sets;
3139 while(cset!=NULL) {
3140 if (rf_does_it_fit(cset, ac)) {
3141 /* looks like it matches... */
3142 ac->next = cset->ac;
3143 cset->ac = ac;
3144 break;
3145 }
3146 cset = cset->next;
3147 }
3148 if (cset==NULL) {
3149 /* didn't find a match above... new set..*/
3150 cset = malloc(sizeof(RF_ConfigSet_t),
3151 M_RAIDFRAME, M_WAITOK);
3152 cset->ac = ac;
3153 ac->next = NULL;
3154 cset->next = config_sets;
3155 cset->rootable = 0;
3156 config_sets = cset;
3157 }
3158 }
3159 ac = ac_next;
3160 }
3161
3162
3163 return(config_sets);
3164 }
3165
3166 static int
3167 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3168 {
3169 RF_ComponentLabel_t *clabel1, *clabel2;
3170
3171 /* If this one matches the *first* one in the set, that's good
3172 enough, since the other members of the set would have been
3173 through here too... */
3174 /* note that we are not checking partitionSize here..
3175
3176 Note that we are also not checking the mod_counters here.
3177 If everything else matches except the mod_counter, that's
3178 good enough for this test. We will deal with the mod_counters
3179 a little later in the autoconfiguration process.
3180
3181 (clabel1->mod_counter == clabel2->mod_counter) &&
3182
3183 The reason we don't check for this is that failed disks
3184 will have lower modification counts. If those disks are
3185 not added to the set they used to belong to, then they will
3186 form their own set, which may result in 2 different sets,
3187 for example, competing to be configured at raid0, and
3188 perhaps competing to be the root filesystem set. If the
3189 wrong ones get configured, or both attempt to become /,
3190 weird behaviour and or serious lossage will occur. Thus we
3191 need to bring them into the fold here, and kick them out at
3192 a later point.
3193
3194 */
3195
3196 clabel1 = cset->ac->clabel;
3197 clabel2 = ac->clabel;
3198 if ((clabel1->version == clabel2->version) &&
3199 (clabel1->serial_number == clabel2->serial_number) &&
3200 (clabel1->num_rows == clabel2->num_rows) &&
3201 (clabel1->num_columns == clabel2->num_columns) &&
3202 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3203 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3204 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3205 (clabel1->parityConfig == clabel2->parityConfig) &&
3206 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3207 (clabel1->blockSize == clabel2->blockSize) &&
3208 rf_component_label_numblocks(clabel1) ==
3209 rf_component_label_numblocks(clabel2) &&
3210 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3211 (clabel1->root_partition == clabel2->root_partition) &&
3212 (clabel1->last_unit == clabel2->last_unit) &&
3213 (clabel1->config_order == clabel2->config_order)) {
3214 /* if it get's here, it almost *has* to be a match */
3215 } else {
3216 /* it's not consistent with somebody in the set..
3217 punt */
3218 return(0);
3219 }
3220 /* all was fine.. it must fit... */
3221 return(1);
3222 }
3223
3224 static int
3225 rf_have_enough_components(RF_ConfigSet_t *cset)
3226 {
3227 RF_AutoConfig_t *ac;
3228 RF_AutoConfig_t *auto_config;
3229 RF_ComponentLabel_t *clabel;
3230 int c;
3231 int num_cols;
3232 int num_missing;
3233 int mod_counter;
3234 int mod_counter_found;
3235 int even_pair_failed;
3236 char parity_type;
3237
3238
3239 /* check to see that we have enough 'live' components
3240 of this set. If so, we can configure it if necessary */
3241
3242 num_cols = cset->ac->clabel->num_columns;
3243 parity_type = cset->ac->clabel->parityConfig;
3244
3245 /* XXX Check for duplicate components!?!?!? */
3246
3247 /* Determine what the mod_counter is supposed to be for this set. */
3248
3249 mod_counter_found = 0;
3250 mod_counter = 0;
3251 ac = cset->ac;
3252 while(ac!=NULL) {
3253 if (mod_counter_found==0) {
3254 mod_counter = ac->clabel->mod_counter;
3255 mod_counter_found = 1;
3256 } else {
3257 if (ac->clabel->mod_counter > mod_counter) {
3258 mod_counter = ac->clabel->mod_counter;
3259 }
3260 }
3261 ac = ac->next;
3262 }
3263
3264 num_missing = 0;
3265 auto_config = cset->ac;
3266
3267 even_pair_failed = 0;
3268 for(c=0; c<num_cols; c++) {
3269 ac = auto_config;
3270 while(ac!=NULL) {
3271 if ((ac->clabel->column == c) &&
3272 (ac->clabel->mod_counter == mod_counter)) {
3273 /* it's this one... */
3274 #ifdef DEBUG
3275 printf("Found: %s at %d\n",
3276 ac->devname,c);
3277 #endif
3278 break;
3279 }
3280 ac=ac->next;
3281 }
3282 if (ac==NULL) {
3283 /* Didn't find one here! */
3284 /* special case for RAID 1, especially
3285 where there are more than 2
3286 components (where RAIDframe treats
3287 things a little differently :( ) */
3288 if (parity_type == '1') {
3289 if (c%2 == 0) { /* even component */
3290 even_pair_failed = 1;
3291 } else { /* odd component. If
3292 we're failed, and
3293 so is the even
3294 component, it's
3295 "Good Night, Charlie" */
3296 if (even_pair_failed == 1) {
3297 return(0);
3298 }
3299 }
3300 } else {
3301 /* normal accounting */
3302 num_missing++;
3303 }
3304 }
3305 if ((parity_type == '1') && (c%2 == 1)) {
3306 /* Just did an even component, and we didn't
3307 bail.. reset the even_pair_failed flag,
3308 and go on to the next component.... */
3309 even_pair_failed = 0;
3310 }
3311 }
3312
3313 clabel = cset->ac->clabel;
3314
3315 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3316 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3317 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3318 /* XXX this needs to be made *much* more general */
3319 /* Too many failures */
3320 return(0);
3321 }
3322 /* otherwise, all is well, and we've got enough to take a kick
3323 at autoconfiguring this set */
3324 return(1);
3325 }
3326
3327 static void
3328 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3329 RF_Raid_t *raidPtr)
3330 {
3331 RF_ComponentLabel_t *clabel;
3332 int i;
3333
3334 clabel = ac->clabel;
3335
3336 /* 1. Fill in the common stuff */
3337 config->numCol = clabel->num_columns;
3338 config->numSpare = 0; /* XXX should this be set here? */
3339 config->sectPerSU = clabel->sectPerSU;
3340 config->SUsPerPU = clabel->SUsPerPU;
3341 config->SUsPerRU = clabel->SUsPerRU;
3342 config->parityConfig = clabel->parityConfig;
3343 /* XXX... */
3344 strcpy(config->diskQueueType,"fifo");
3345 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3346 config->layoutSpecificSize = 0; /* XXX ?? */
3347
3348 while(ac!=NULL) {
3349 /* row/col values will be in range due to the checks
3350 in reasonable_label() */
3351 strcpy(config->devnames[0][ac->clabel->column],
3352 ac->devname);
3353 ac = ac->next;
3354 }
3355
3356 for(i=0;i<RF_MAXDBGV;i++) {
3357 config->debugVars[i][0] = 0;
3358 }
3359 }
3360
3361 static int
3362 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3363 {
3364 RF_ComponentLabel_t *clabel;
3365 int column;
3366 int sparecol;
3367
3368 raidPtr->autoconfigure = new_value;
3369
3370 for(column=0; column<raidPtr->numCol; column++) {
3371 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3372 clabel = raidget_component_label(raidPtr, column);
3373 clabel->autoconfigure = new_value;
3374 raidflush_component_label(raidPtr, column);
3375 }
3376 }
3377 for(column = 0; column < raidPtr->numSpare ; column++) {
3378 sparecol = raidPtr->numCol + column;
3379 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3380 clabel = raidget_component_label(raidPtr, sparecol);
3381 clabel->autoconfigure = new_value;
3382 raidflush_component_label(raidPtr, sparecol);
3383 }
3384 }
3385 return(new_value);
3386 }
3387
3388 static int
3389 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3390 {
3391 RF_ComponentLabel_t *clabel;
3392 int column;
3393 int sparecol;
3394
3395 raidPtr->root_partition = new_value;
3396 for(column=0; column<raidPtr->numCol; column++) {
3397 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3398 clabel = raidget_component_label(raidPtr, column);
3399 clabel->root_partition = new_value;
3400 raidflush_component_label(raidPtr, column);
3401 }
3402 }
3403 for(column = 0; column < raidPtr->numSpare ; column++) {
3404 sparecol = raidPtr->numCol + column;
3405 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3406 clabel = raidget_component_label(raidPtr, sparecol);
3407 clabel->root_partition = new_value;
3408 raidflush_component_label(raidPtr, sparecol);
3409 }
3410 }
3411 return(new_value);
3412 }
3413
3414 static void
3415 rf_release_all_vps(RF_ConfigSet_t *cset)
3416 {
3417 RF_AutoConfig_t *ac;
3418
3419 ac = cset->ac;
3420 while(ac!=NULL) {
3421 /* Close the vp, and give it back */
3422 if (ac->vp) {
3423 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3424 VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
3425 vput(ac->vp);
3426 ac->vp = NULL;
3427 }
3428 ac = ac->next;
3429 }
3430 }
3431
3432
3433 static void
3434 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3435 {
3436 RF_AutoConfig_t *ac;
3437 RF_AutoConfig_t *next_ac;
3438
3439 ac = cset->ac;
3440 while(ac!=NULL) {
3441 next_ac = ac->next;
3442 /* nuke the label */
3443 free(ac->clabel, M_RAIDFRAME);
3444 /* cleanup the config structure */
3445 free(ac, M_RAIDFRAME);
3446 /* "next.." */
3447 ac = next_ac;
3448 }
3449 /* and, finally, nuke the config set */
3450 free(cset, M_RAIDFRAME);
3451 }
3452
3453
3454 void
3455 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3456 {
3457 /* avoid over-writing byteswapped version. */
3458 if (clabel->version != bswap32(RF_COMPONENT_LABEL_VERSION))
3459 clabel->version = RF_COMPONENT_LABEL_VERSION;
3460 clabel->serial_number = raidPtr->serial_number;
3461 clabel->mod_counter = raidPtr->mod_counter;
3462
3463 clabel->num_rows = 1;
3464 clabel->num_columns = raidPtr->numCol;
3465 clabel->clean = RF_RAID_DIRTY; /* not clean */
3466 clabel->status = rf_ds_optimal; /* "It's good!" */
3467
3468 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3469 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3470 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3471
3472 clabel->blockSize = raidPtr->bytesPerSector;
3473 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3474
3475 /* XXX not portable */
3476 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3477 clabel->maxOutstanding = raidPtr->maxOutstanding;
3478 clabel->autoconfigure = raidPtr->autoconfigure;
3479 clabel->root_partition = raidPtr->root_partition;
3480 clabel->last_unit = raidPtr->raidid;
3481 clabel->config_order = raidPtr->config_order;
3482
3483 #ifndef RF_NO_PARITY_MAP
3484 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3485 #endif
3486 }
3487
3488 static struct raid_softc *
3489 rf_auto_config_set(RF_ConfigSet_t *cset)
3490 {
3491 RF_Raid_t *raidPtr;
3492 RF_Config_t *config;
3493 int raidID;
3494 struct raid_softc *sc;
3495
3496 #ifdef DEBUG
3497 printf("RAID autoconfigure\n");
3498 #endif
3499
3500 /* 1. Create a config structure */
3501 config = malloc(sizeof(*config), M_RAIDFRAME, M_WAITOK|M_ZERO);
3502
3503 /*
3504 2. Figure out what RAID ID this one is supposed to live at
3505 See if we can get the same RAID dev that it was configured
3506 on last time..
3507 */
3508
3509 raidID = cset->ac->clabel->last_unit;
3510 for (sc = raidget(raidID, false); sc && sc->sc_r.valid != 0;
3511 sc = raidget(++raidID, false))
3512 continue;
3513 #ifdef DEBUG
3514 printf("Configuring raid%d:\n",raidID);
3515 #endif
3516
3517 if (sc == NULL)
3518 sc = raidget(raidID, true);
3519 raidPtr = &sc->sc_r;
3520
3521 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3522 raidPtr->softc = sc;
3523 raidPtr->raidid = raidID;
3524 raidPtr->openings = RAIDOUTSTANDING;
3525
3526 /* 3. Build the configuration structure */
3527 rf_create_configuration(cset->ac, config, raidPtr);
3528
3529 /* 4. Do the configuration */
3530 if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3531 raidinit(sc);
3532
3533 rf_markalldirty(raidPtr);
3534 raidPtr->autoconfigure = 1; /* XXX do this here? */
3535 switch (cset->ac->clabel->root_partition) {
3536 case 1: /* Force Root */
3537 case 2: /* Soft Root: root when boot partition part of raid */
3538 /*
3539 * everything configured just fine. Make a note
3540 * that this set is eligible to be root,
3541 * or forced to be root
3542 */
3543 cset->rootable = cset->ac->clabel->root_partition;
3544 /* XXX do this here? */
3545 raidPtr->root_partition = cset->rootable;
3546 break;
3547 default:
3548 break;
3549 }
3550 } else {
3551 raidput(sc);
3552 sc = NULL;
3553 }
3554
3555 /* 5. Cleanup */
3556 free(config, M_RAIDFRAME);
3557 return sc;
3558 }
3559
3560 void
3561 rf_pool_init(RF_Raid_t *raidPtr, char *w_chan, struct pool *p, size_t size, const char *pool_name,
3562 size_t xmin, size_t xmax)
3563 {
3564
3565 /* Format: raid%d_foo */
3566 snprintf(w_chan, RF_MAX_POOLNAMELEN, "raid%d_%s", raidPtr->raidid, pool_name);
3567
3568 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3569 pool_sethiwat(p, xmax);
3570 pool_prime(p, xmin);
3571 }
3572
3573
3574 /*
3575 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buffer queue
3576 * to see if there is IO pending and if that IO could possibly be done
3577 * for a given RAID set. Returns 0 if IO is waiting and can be done, 1
3578 * otherwise.
3579 *
3580 */
3581 int
3582 rf_buf_queue_check(RF_Raid_t *raidPtr)
3583 {
3584 struct raid_softc *rs;
3585 struct dk_softc *dksc;
3586
3587 rs = raidPtr->softc;
3588 dksc = &rs->sc_dksc;
3589
3590 if ((rs->sc_flags & RAIDF_INITED) == 0)
3591 return 1;
3592
3593 if (dk_strategy_pending(dksc) && raidPtr->openings > 0) {
3594 /* there is work to do */
3595 return 0;
3596 }
3597 /* default is nothing to do */
3598 return 1;
3599 }
3600
3601 int
3602 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3603 {
3604 uint64_t numsecs;
3605 unsigned secsize;
3606 int error;
3607
3608 error = getdisksize(vp, &numsecs, &secsize);
3609 if (error == 0) {
3610 diskPtr->blockSize = secsize;
3611 diskPtr->numBlocks = numsecs - rf_protectedSectors;
3612 diskPtr->partitionSize = numsecs;
3613 return 0;
3614 }
3615 return error;
3616 }
3617
3618 static int
3619 raid_match(device_t self, cfdata_t cfdata, void *aux)
3620 {
3621 return 1;
3622 }
3623
3624 static void
3625 raid_attach(device_t parent, device_t self, void *aux)
3626 {
3627 }
3628
3629
3630 static int
3631 raid_detach(device_t self, int flags)
3632 {
3633 int error;
3634 struct raid_softc *rs = raidsoftc(self);
3635
3636 if (rs == NULL)
3637 return ENXIO;
3638
3639 if ((error = raidlock(rs)) != 0)
3640 return error;
3641
3642 error = raid_detach_unlocked(rs);
3643
3644 raidunlock(rs);
3645
3646 /* XXX raid can be referenced here */
3647
3648 if (error)
3649 return error;
3650
3651 /* Free the softc */
3652 raidput(rs);
3653
3654 return 0;
3655 }
3656
3657 static void
3658 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
3659 {
3660 struct dk_softc *dksc = &rs->sc_dksc;
3661 struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
3662
3663 memset(dg, 0, sizeof(*dg));
3664
3665 dg->dg_secperunit = raidPtr->totalSectors;
3666 dg->dg_secsize = raidPtr->bytesPerSector;
3667 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
3668 dg->dg_ntracks = 4 * raidPtr->numCol;
3669
3670 disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, NULL);
3671 }
3672
3673 /*
3674 * Get cache info for all the components (including spares).
3675 * Returns intersection of all the cache flags of all disks, or first
3676 * error if any encountered.
3677 * XXXfua feature flags can change as spares are added - lock down somehow
3678 */
3679 static int
3680 rf_get_component_caches(RF_Raid_t *raidPtr, int *data)
3681 {
3682 int c;
3683 int error;
3684 int dkwhole = 0, dkpart;
3685
3686 for (c = 0; c < raidPtr->numCol + raidPtr->numSpare; c++) {
3687 /*
3688 * Check any non-dead disk, even when currently being
3689 * reconstructed.
3690 */
3691 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)
3692 || raidPtr->Disks[c].status == rf_ds_reconstructing) {
3693 error = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp,
3694 DIOCGCACHE, &dkpart, FREAD, NOCRED);
3695 if (error) {
3696 if (error != ENODEV) {
3697 printf("raid%d: get cache for component %s failed\n",
3698 raidPtr->raidid,
3699 raidPtr->Disks[c].devname);
3700 }
3701
3702 return error;
3703 }
3704
3705 if (c == 0)
3706 dkwhole = dkpart;
3707 else
3708 dkwhole = DKCACHE_COMBINE(dkwhole, dkpart);
3709 }
3710 }
3711
3712 *data = dkwhole;
3713
3714 return 0;
3715 }
3716
3717 /*
3718 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3719 * We end up returning whatever error was returned by the first cache flush
3720 * that fails.
3721 */
3722
3723 static int
3724 rf_sync_component_cache(RF_Raid_t *raidPtr, int c, int force)
3725 {
3726 int e = 0;
3727 for (int i = 0; i < 5; i++) {
3728 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3729 &force, FWRITE, NOCRED);
3730 if (!e || e == ENODEV)
3731 return e;
3732 printf("raid%d: cache flush[%d] to component %s failed (%d)\n",
3733 raidPtr->raidid, i, raidPtr->Disks[c].devname, e);
3734 }
3735 return e;
3736 }
3737
3738 int
3739 rf_sync_component_caches(RF_Raid_t *raidPtr, int force)
3740 {
3741 int c, error;
3742
3743 error = 0;
3744 for (c = 0; c < raidPtr->numCol; c++) {
3745 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3746 int e = rf_sync_component_cache(raidPtr, c, force);
3747 if (e && !error)
3748 error = e;
3749 }
3750 }
3751
3752 for (c = 0; c < raidPtr->numSpare ; c++) {
3753 int sparecol = raidPtr->numCol + c;
3754 /* Need to ensure that the reconstruct actually completed! */
3755 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3756 int e = rf_sync_component_cache(raidPtr, sparecol,
3757 force);
3758 if (e && !error)
3759 error = e;
3760 }
3761 }
3762 return error;
3763 }
3764
3765 /* Fill in info with the current status */
3766 void
3767 rf_check_recon_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3768 {
3769
3770 if (raidPtr->status != rf_rs_reconstructing) {
3771 info->total = 100;
3772 info->completed = 100;
3773 } else {
3774 info->total = raidPtr->reconControl->numRUsTotal;
3775 info->completed = raidPtr->reconControl->numRUsComplete;
3776 }
3777 info->remaining = info->total - info->completed;
3778 }
3779
3780 /* Fill in info with the current status */
3781 void
3782 rf_check_parityrewrite_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3783 {
3784
3785 if (raidPtr->parity_rewrite_in_progress == 1) {
3786 info->total = raidPtr->Layout.numStripe;
3787 info->completed = raidPtr->parity_rewrite_stripes_done;
3788 } else {
3789 info->completed = 100;
3790 info->total = 100;
3791 }
3792 info->remaining = info->total - info->completed;
3793 }
3794
3795 /* Fill in info with the current status */
3796 void
3797 rf_check_copyback_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3798 {
3799
3800 if (raidPtr->copyback_in_progress == 1) {
3801 info->total = raidPtr->Layout.numStripe;
3802 info->completed = raidPtr->copyback_stripes_done;
3803 info->remaining = info->total - info->completed;
3804 } else {
3805 info->remaining = 0;
3806 info->completed = 100;
3807 info->total = 100;
3808 }
3809 }
3810
3811 /* Fill in config with the current info */
3812 int
3813 rf_get_info(RF_Raid_t *raidPtr, RF_DeviceConfig_t *config)
3814 {
3815 int d, i, j;
3816
3817 if (!raidPtr->valid)
3818 return ENODEV;
3819 config->cols = raidPtr->numCol;
3820 config->ndevs = raidPtr->numCol;
3821 if (config->ndevs >= RF_MAX_DISKS)
3822 return ENOMEM;
3823 config->nspares = raidPtr->numSpare;
3824 if (config->nspares >= RF_MAX_DISKS)
3825 return ENOMEM;
3826 config->maxqdepth = raidPtr->maxQueueDepth;
3827 d = 0;
3828 for (j = 0; j < config->cols; j++) {
3829 config->devs[d] = raidPtr->Disks[j];
3830 d++;
3831 }
3832 for (j = config->cols, i = 0; i < config->nspares; i++, j++) {
3833 config->spares[i] = raidPtr->Disks[j];
3834 if (config->spares[i].status == rf_ds_rebuilding_spare) {
3835 /* XXX: raidctl(8) expects to see this as a used spare */
3836 config->spares[i].status = rf_ds_used_spare;
3837 }
3838 }
3839 return 0;
3840 }
3841
3842 int
3843 rf_get_component_label(RF_Raid_t *raidPtr, void *data)
3844 {
3845 RF_ComponentLabel_t *clabel = (RF_ComponentLabel_t *)data;
3846 RF_ComponentLabel_t *raid_clabel;
3847 int column = clabel->column;
3848
3849 if ((column < 0) || (column >= raidPtr->numCol + raidPtr->numSpare))
3850 return EINVAL;
3851 raid_clabel = raidget_component_label(raidPtr, column);
3852 memcpy(clabel, raid_clabel, sizeof *clabel);
3853 /* Fix-up for userland. */
3854 if (clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION))
3855 clabel->version = RF_COMPONENT_LABEL_VERSION;
3856
3857 return 0;
3858 }
3859
3860 /*
3861 * Module interface
3862 */
3863
3864 MODULE(MODULE_CLASS_DRIVER, raid, "dk_subr,bufq_fcfs");
3865
3866 #ifdef _MODULE
3867 CFDRIVER_DECL(raid, DV_DISK, NULL);
3868 #endif
3869
3870 static int raid_modcmd(modcmd_t, void *);
3871 static int raid_modcmd_init(void);
3872 static int raid_modcmd_fini(void);
3873
3874 static int
3875 raid_modcmd(modcmd_t cmd, void *data)
3876 {
3877 int error;
3878
3879 error = 0;
3880 switch (cmd) {
3881 case MODULE_CMD_INIT:
3882 error = raid_modcmd_init();
3883 break;
3884 case MODULE_CMD_FINI:
3885 error = raid_modcmd_fini();
3886 break;
3887 default:
3888 error = ENOTTY;
3889 break;
3890 }
3891 return error;
3892 }
3893
3894 static int
3895 raid_modcmd_init(void)
3896 {
3897 int error;
3898 int bmajor, cmajor;
3899
3900 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
3901 mutex_enter(&raid_lock);
3902 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
3903 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
3904 rf_init_cond2(rf_sparet_wait_cv, "sparetw");
3905 rf_init_cond2(rf_sparet_resp_cv, "rfgst");
3906
3907 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
3908 #endif
3909
3910 bmajor = cmajor = -1;
3911 error = devsw_attach("raid", &raid_bdevsw, &bmajor,
3912 &raid_cdevsw, &cmajor);
3913 if (error != 0 && error != EEXIST) {
3914 aprint_error("%s: devsw_attach failed %d\n", __func__, error);
3915 mutex_exit(&raid_lock);
3916 return error;
3917 }
3918 #ifdef _MODULE
3919 error = config_cfdriver_attach(&raid_cd);
3920 if (error != 0) {
3921 aprint_error("%s: config_cfdriver_attach failed %d\n",
3922 __func__, error);
3923 devsw_detach(&raid_bdevsw, &raid_cdevsw);
3924 mutex_exit(&raid_lock);
3925 return error;
3926 }
3927 #endif
3928 error = config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3929 if (error != 0) {
3930 aprint_error("%s: config_cfattach_attach failed %d\n",
3931 __func__, error);
3932 #ifdef _MODULE
3933 config_cfdriver_detach(&raid_cd);
3934 #endif
3935 devsw_detach(&raid_bdevsw, &raid_cdevsw);
3936 mutex_exit(&raid_lock);
3937 return error;
3938 }
3939
3940 raidautoconfigdone = false;
3941
3942 mutex_exit(&raid_lock);
3943
3944 if (error == 0) {
3945 if (rf_BootRaidframe(true) == 0)
3946 aprint_verbose("Kernelized RAIDframe activated\n");
3947 else
3948 panic("Serious error activating RAID!!");
3949 }
3950
3951 /*
3952 * Register a finalizer which will be used to auto-config RAID
3953 * sets once all real hardware devices have been found.
3954 */
3955 error = config_finalize_register(NULL, rf_autoconfig);
3956 if (error != 0) {
3957 aprint_error("WARNING: unable to register RAIDframe "
3958 "finalizer\n");
3959 error = 0;
3960 }
3961
3962 return error;
3963 }
3964
3965 static int
3966 raid_modcmd_fini(void)
3967 {
3968 int error;
3969
3970 mutex_enter(&raid_lock);
3971
3972 /* Don't allow unload if raid device(s) exist. */
3973 if (!LIST_EMPTY(&raids)) {
3974 mutex_exit(&raid_lock);
3975 return EBUSY;
3976 }
3977
3978 error = config_cfattach_detach(raid_cd.cd_name, &raid_ca);
3979 if (error != 0) {
3980 aprint_error("%s: cannot detach cfattach\n",__func__);
3981 mutex_exit(&raid_lock);
3982 return error;
3983 }
3984 #ifdef _MODULE
3985 error = config_cfdriver_detach(&raid_cd);
3986 if (error != 0) {
3987 aprint_error("%s: cannot detach cfdriver\n",__func__);
3988 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3989 mutex_exit(&raid_lock);
3990 return error;
3991 }
3992 #endif
3993 error = devsw_detach(&raid_bdevsw, &raid_cdevsw);
3994 if (error != 0) {
3995 aprint_error("%s: cannot detach devsw\n",__func__);
3996 #ifdef _MODULE
3997 config_cfdriver_attach(&raid_cd);
3998 #endif
3999 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
4000 mutex_exit(&raid_lock);
4001 return error;
4002 }
4003 rf_BootRaidframe(false);
4004 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
4005 rf_destroy_mutex2(rf_sparet_wait_mutex);
4006 rf_destroy_cond2(rf_sparet_wait_cv);
4007 rf_destroy_cond2(rf_sparet_resp_cv);
4008 #endif
4009 mutex_exit(&raid_lock);
4010 mutex_destroy(&raid_lock);
4011
4012 return error;
4013 }
4014