rf_netbsdkintf.c revision 1.358 1 /* $NetBSD: rf_netbsdkintf.c,v 1.358 2019/01/27 02:08:42 pgoyette Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: cd.c 1.6 90/11/28$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
89 * School of Computer Science
90 * Carnegie Mellon University
91 * Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97 /***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.358 2019/01/27 02:08:42 pgoyette Exp $");
105
106 #ifdef _KERNEL_OPT
107 #include "opt_compat_netbsd.h"
108 #include "opt_compat_netbsd32.h"
109 #include "opt_raid_autoconfig.h"
110 #endif
111
112 #include <sys/param.h>
113 #include <sys/errno.h>
114 #include <sys/pool.h>
115 #include <sys/proc.h>
116 #include <sys/queue.h>
117 #include <sys/disk.h>
118 #include <sys/device.h>
119 #include <sys/stat.h>
120 #include <sys/ioctl.h>
121 #include <sys/fcntl.h>
122 #include <sys/systm.h>
123 #include <sys/vnode.h>
124 #include <sys/disklabel.h>
125 #include <sys/conf.h>
126 #include <sys/buf.h>
127 #include <sys/bufq.h>
128 #include <sys/reboot.h>
129 #include <sys/kauth.h>
130 #include <sys/module.h>
131 #include <sys/compat_stub.h>
132
133 #include <prop/proplib.h>
134
135 #include <dev/raidframe/raidframevar.h>
136 #include <dev/raidframe/raidframeio.h>
137 #include <dev/raidframe/rf_paritymap.h>
138
139 #include "rf_raid.h"
140 #include "rf_copyback.h"
141 #include "rf_dag.h"
142 #include "rf_dagflags.h"
143 #include "rf_desc.h"
144 #include "rf_diskqueue.h"
145 #include "rf_etimer.h"
146 #include "rf_general.h"
147 #include "rf_kintf.h"
148 #include "rf_options.h"
149 #include "rf_driver.h"
150 #include "rf_parityscan.h"
151 #include "rf_threadstuff.h"
152
153 #include "rf_compat50.h"
154
155 #include "rf_compat80.h"
156
157 #ifdef COMPAT_NETBSD32
158 #include "rf_compat32.h"
159 #endif
160
161 #include "ioconf.h"
162
163 #ifdef DEBUG
164 int rf_kdebug_level = 0;
165 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
166 #else /* DEBUG */
167 #define db1_printf(a) { }
168 #endif /* DEBUG */
169
170 #ifdef DEBUG_ROOT
171 #define DPRINTF(a, ...) printf(a, __VA_ARGS__)
172 #else
173 #define DPRINTF(a, ...)
174 #endif
175
176 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
177 static rf_declare_mutex2(rf_sparet_wait_mutex);
178 static rf_declare_cond2(rf_sparet_wait_cv);
179 static rf_declare_cond2(rf_sparet_resp_cv);
180
181 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
182 * spare table */
183 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
184 * installation process */
185 #endif
186
187 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
188
189 /* prototypes */
190 static void KernelWakeupFunc(struct buf *);
191 static void InitBP(struct buf *, struct vnode *, unsigned,
192 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
193 void *, int, struct proc *);
194 struct raid_softc;
195 static void raidinit(struct raid_softc *);
196 static int raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp);
197 static int rf_get_component_caches(RF_Raid_t *raidPtr, int *);
198
199 static int raid_match(device_t, cfdata_t, void *);
200 static void raid_attach(device_t, device_t, void *);
201 static int raid_detach(device_t, int);
202
203 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
204 daddr_t, daddr_t);
205 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
206 daddr_t, daddr_t, int);
207
208 static int raidwrite_component_label(unsigned,
209 dev_t, struct vnode *, RF_ComponentLabel_t *);
210 static int raidread_component_label(unsigned,
211 dev_t, struct vnode *, RF_ComponentLabel_t *);
212
213 static int raid_diskstart(device_t, struct buf *bp);
214 static int raid_dumpblocks(device_t, void *, daddr_t, int);
215 static int raid_lastclose(device_t);
216
217 static dev_type_open(raidopen);
218 static dev_type_close(raidclose);
219 static dev_type_read(raidread);
220 static dev_type_write(raidwrite);
221 static dev_type_ioctl(raidioctl);
222 static dev_type_strategy(raidstrategy);
223 static dev_type_dump(raiddump);
224 static dev_type_size(raidsize);
225
226 const struct bdevsw raid_bdevsw = {
227 .d_open = raidopen,
228 .d_close = raidclose,
229 .d_strategy = raidstrategy,
230 .d_ioctl = raidioctl,
231 .d_dump = raiddump,
232 .d_psize = raidsize,
233 .d_discard = nodiscard,
234 .d_flag = D_DISK
235 };
236
237 const struct cdevsw raid_cdevsw = {
238 .d_open = raidopen,
239 .d_close = raidclose,
240 .d_read = raidread,
241 .d_write = raidwrite,
242 .d_ioctl = raidioctl,
243 .d_stop = nostop,
244 .d_tty = notty,
245 .d_poll = nopoll,
246 .d_mmap = nommap,
247 .d_kqfilter = nokqfilter,
248 .d_discard = nodiscard,
249 .d_flag = D_DISK
250 };
251
252 static struct dkdriver rf_dkdriver = {
253 .d_open = raidopen,
254 .d_close = raidclose,
255 .d_strategy = raidstrategy,
256 .d_diskstart = raid_diskstart,
257 .d_dumpblocks = raid_dumpblocks,
258 .d_lastclose = raid_lastclose,
259 .d_minphys = minphys
260 };
261
262 struct raid_softc {
263 struct dk_softc sc_dksc;
264 int sc_unit;
265 int sc_flags; /* flags */
266 int sc_cflags; /* configuration flags */
267 kmutex_t sc_mutex; /* interlock mutex */
268 kcondvar_t sc_cv; /* and the condvar */
269 uint64_t sc_size; /* size of the raid device */
270 char sc_xname[20]; /* XXX external name */
271 RF_Raid_t sc_r;
272 LIST_ENTRY(raid_softc) sc_link;
273 };
274 /* sc_flags */
275 #define RAIDF_INITED 0x01 /* unit has been initialized */
276 #define RAIDF_SHUTDOWN 0x02 /* unit is being shutdown */
277 #define RAIDF_DETACH 0x04 /* detach after final close */
278 #define RAIDF_WANTED 0x08 /* someone waiting to obtain a lock */
279 #define RAIDF_LOCKED 0x10 /* unit is locked */
280 #define RAIDF_UNIT_CHANGED 0x20 /* unit is being changed */
281
282 #define raidunit(x) DISKUNIT(x)
283 #define raidsoftc(dev) (((struct raid_softc *)device_private(dev))->sc_r.softc)
284
285 extern struct cfdriver raid_cd;
286 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
287 raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
288 DVF_DETACH_SHUTDOWN);
289
290 /* Internal representation of a rf_recon_req */
291 struct rf_recon_req_internal {
292 RF_RowCol_t col;
293 RF_ReconReqFlags_t flags;
294 void *raidPtr;
295 };
296
297 /*
298 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
299 * Be aware that large numbers can allow the driver to consume a lot of
300 * kernel memory, especially on writes, and in degraded mode reads.
301 *
302 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
303 * a single 64K write will typically require 64K for the old data,
304 * 64K for the old parity, and 64K for the new parity, for a total
305 * of 192K (if the parity buffer is not re-used immediately).
306 * Even it if is used immediately, that's still 128K, which when multiplied
307 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
308 *
309 * Now in degraded mode, for example, a 64K read on the above setup may
310 * require data reconstruction, which will require *all* of the 4 remaining
311 * disks to participate -- 4 * 32K/disk == 128K again.
312 */
313
314 #ifndef RAIDOUTSTANDING
315 #define RAIDOUTSTANDING 6
316 #endif
317
318 #define RAIDLABELDEV(dev) \
319 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
320
321 /* declared here, and made public, for the benefit of KVM stuff.. */
322
323 static int raidlock(struct raid_softc *);
324 static void raidunlock(struct raid_softc *);
325
326 static int raid_detach_unlocked(struct raid_softc *);
327
328 static void rf_markalldirty(RF_Raid_t *);
329 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
330
331 void rf_ReconThread(struct rf_recon_req_internal *);
332 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
333 void rf_CopybackThread(RF_Raid_t *raidPtr);
334 void rf_ReconstructInPlaceThread(struct rf_recon_req_internal *);
335 int rf_autoconfig(device_t);
336 void rf_buildroothack(RF_ConfigSet_t *);
337
338 RF_AutoConfig_t *rf_find_raid_components(void);
339 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
340 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
341 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
342 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
343 int rf_set_autoconfig(RF_Raid_t *, int);
344 int rf_set_rootpartition(RF_Raid_t *, int);
345 void rf_release_all_vps(RF_ConfigSet_t *);
346 void rf_cleanup_config_set(RF_ConfigSet_t *);
347 int rf_have_enough_components(RF_ConfigSet_t *);
348 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
349 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
350
351 /*
352 * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
353 * Note that this is overridden by having RAID_AUTOCONFIG as an option
354 * in the kernel config file.
355 */
356 #ifdef RAID_AUTOCONFIG
357 int raidautoconfig = 1;
358 #else
359 int raidautoconfig = 0;
360 #endif
361 static bool raidautoconfigdone = false;
362
363 struct RF_Pools_s rf_pools;
364
365 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
366 static kmutex_t raid_lock;
367
368 static struct raid_softc *
369 raidcreate(int unit) {
370 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
371 sc->sc_unit = unit;
372 cv_init(&sc->sc_cv, "raidunit");
373 mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_NONE);
374 return sc;
375 }
376
377 static void
378 raiddestroy(struct raid_softc *sc) {
379 cv_destroy(&sc->sc_cv);
380 mutex_destroy(&sc->sc_mutex);
381 kmem_free(sc, sizeof(*sc));
382 }
383
384 static struct raid_softc *
385 raidget(int unit, bool create) {
386 struct raid_softc *sc;
387 if (unit < 0) {
388 #ifdef DIAGNOSTIC
389 panic("%s: unit %d!", __func__, unit);
390 #endif
391 return NULL;
392 }
393 mutex_enter(&raid_lock);
394 LIST_FOREACH(sc, &raids, sc_link) {
395 if (sc->sc_unit == unit) {
396 mutex_exit(&raid_lock);
397 return sc;
398 }
399 }
400 mutex_exit(&raid_lock);
401 if (!create)
402 return NULL;
403 if ((sc = raidcreate(unit)) == NULL)
404 return NULL;
405 mutex_enter(&raid_lock);
406 LIST_INSERT_HEAD(&raids, sc, sc_link);
407 mutex_exit(&raid_lock);
408 return sc;
409 }
410
411 static void
412 raidput(struct raid_softc *sc) {
413 mutex_enter(&raid_lock);
414 LIST_REMOVE(sc, sc_link);
415 mutex_exit(&raid_lock);
416 raiddestroy(sc);
417 }
418
419 void
420 raidattach(int num)
421 {
422
423 /*
424 * Device attachment and associated initialization now occurs
425 * as part of the module initialization.
426 */
427 }
428
429 int
430 rf_autoconfig(device_t self)
431 {
432 RF_AutoConfig_t *ac_list;
433 RF_ConfigSet_t *config_sets;
434
435 if (!raidautoconfig || raidautoconfigdone == true)
436 return (0);
437
438 /* XXX This code can only be run once. */
439 raidautoconfigdone = true;
440
441 #ifdef __HAVE_CPU_BOOTCONF
442 /*
443 * 0. find the boot device if needed first so we can use it later
444 * this needs to be done before we autoconfigure any raid sets,
445 * because if we use wedges we are not going to be able to open
446 * the boot device later
447 */
448 if (booted_device == NULL)
449 cpu_bootconf();
450 #endif
451 /* 1. locate all RAID components on the system */
452 aprint_debug("Searching for RAID components...\n");
453 ac_list = rf_find_raid_components();
454
455 /* 2. Sort them into their respective sets. */
456 config_sets = rf_create_auto_sets(ac_list);
457
458 /*
459 * 3. Evaluate each set and configure the valid ones.
460 * This gets done in rf_buildroothack().
461 */
462 rf_buildroothack(config_sets);
463
464 return 1;
465 }
466
467 static int
468 rf_containsboot(RF_Raid_t *r, device_t bdv) {
469 const char *bootname = device_xname(bdv);
470 size_t len = strlen(bootname);
471
472 for (int col = 0; col < r->numCol; col++) {
473 const char *devname = r->Disks[col].devname;
474 devname += sizeof("/dev/") - 1;
475 if (strncmp(devname, "dk", 2) == 0) {
476 const char *parent =
477 dkwedge_get_parent_name(r->Disks[col].dev);
478 if (parent != NULL)
479 devname = parent;
480 }
481 if (strncmp(devname, bootname, len) == 0) {
482 struct raid_softc *sc = r->softc;
483 aprint_debug("raid%d includes boot device %s\n",
484 sc->sc_unit, devname);
485 return 1;
486 }
487 }
488 return 0;
489 }
490
491 void
492 rf_buildroothack(RF_ConfigSet_t *config_sets)
493 {
494 RF_ConfigSet_t *cset;
495 RF_ConfigSet_t *next_cset;
496 int num_root;
497 struct raid_softc *sc, *rsc;
498 struct dk_softc *dksc;
499
500 sc = rsc = NULL;
501 num_root = 0;
502 cset = config_sets;
503 while (cset != NULL) {
504 next_cset = cset->next;
505 if (rf_have_enough_components(cset) &&
506 cset->ac->clabel->autoconfigure == 1) {
507 sc = rf_auto_config_set(cset);
508 if (sc != NULL) {
509 aprint_debug("raid%d: configured ok\n",
510 sc->sc_unit);
511 if (cset->rootable) {
512 rsc = sc;
513 num_root++;
514 }
515 } else {
516 /* The autoconfig didn't work :( */
517 aprint_debug("Autoconfig failed\n");
518 rf_release_all_vps(cset);
519 }
520 } else {
521 /* we're not autoconfiguring this set...
522 release the associated resources */
523 rf_release_all_vps(cset);
524 }
525 /* cleanup */
526 rf_cleanup_config_set(cset);
527 cset = next_cset;
528 }
529 dksc = &rsc->sc_dksc;
530
531 /* if the user has specified what the root device should be
532 then we don't touch booted_device or boothowto... */
533
534 if (rootspec != NULL)
535 return;
536
537 /* we found something bootable... */
538
539 /*
540 * XXX: The following code assumes that the root raid
541 * is the first ('a') partition. This is about the best
542 * we can do with a BSD disklabel, but we might be able
543 * to do better with a GPT label, by setting a specified
544 * attribute to indicate the root partition. We can then
545 * stash the partition number in the r->root_partition
546 * high bits (the bottom 2 bits are already used). For
547 * now we just set booted_partition to 0 when we override
548 * root.
549 */
550 if (num_root == 1) {
551 device_t candidate_root;
552 if (dksc->sc_dkdev.dk_nwedges != 0) {
553 char cname[sizeof(cset->ac->devname)];
554 /* XXX: assume partition 'a' first */
555 snprintf(cname, sizeof(cname), "%s%c",
556 device_xname(dksc->sc_dev), 'a');
557 candidate_root = dkwedge_find_by_wname(cname);
558 DPRINTF("%s: candidate wedge root=%s\n", __func__,
559 cname);
560 if (candidate_root == NULL) {
561 /*
562 * If that is not found, because we don't use
563 * disklabel, return the first dk child
564 * XXX: we can skip the 'a' check above
565 * and always do this...
566 */
567 size_t i = 0;
568 candidate_root = dkwedge_find_by_parent(
569 device_xname(dksc->sc_dev), &i);
570 }
571 DPRINTF("%s: candidate wedge root=%p\n", __func__,
572 candidate_root);
573 } else
574 candidate_root = dksc->sc_dev;
575 DPRINTF("%s: candidate root=%p\n", __func__, candidate_root);
576 DPRINTF("%s: booted_device=%p root_partition=%d "
577 "contains_boot=%d\n", __func__, booted_device,
578 rsc->sc_r.root_partition,
579 rf_containsboot(&rsc->sc_r, booted_device));
580 if (booted_device == NULL ||
581 rsc->sc_r.root_partition == 1 ||
582 rf_containsboot(&rsc->sc_r, booted_device)) {
583 booted_device = candidate_root;
584 booted_method = "raidframe/single";
585 booted_partition = 0; /* XXX assume 'a' */
586 }
587 } else if (num_root > 1) {
588 DPRINTF("%s: many roots=%d, %p\n", __func__, num_root,
589 booted_device);
590
591 /*
592 * Maybe the MD code can help. If it cannot, then
593 * setroot() will discover that we have no
594 * booted_device and will ask the user if nothing was
595 * hardwired in the kernel config file
596 */
597 if (booted_device == NULL)
598 return;
599
600 num_root = 0;
601 mutex_enter(&raid_lock);
602 LIST_FOREACH(sc, &raids, sc_link) {
603 RF_Raid_t *r = &sc->sc_r;
604 if (r->valid == 0)
605 continue;
606
607 if (r->root_partition == 0)
608 continue;
609
610 if (rf_containsboot(r, booted_device)) {
611 num_root++;
612 rsc = sc;
613 dksc = &rsc->sc_dksc;
614 }
615 }
616 mutex_exit(&raid_lock);
617
618 if (num_root == 1) {
619 booted_device = dksc->sc_dev;
620 booted_method = "raidframe/multi";
621 booted_partition = 0; /* XXX assume 'a' */
622 } else {
623 /* we can't guess.. require the user to answer... */
624 boothowto |= RB_ASKNAME;
625 }
626 }
627 }
628
629 static int
630 raidsize(dev_t dev)
631 {
632 struct raid_softc *rs;
633 struct dk_softc *dksc;
634 unsigned int unit;
635
636 unit = raidunit(dev);
637 if ((rs = raidget(unit, false)) == NULL)
638 return -1;
639 dksc = &rs->sc_dksc;
640
641 if ((rs->sc_flags & RAIDF_INITED) == 0)
642 return -1;
643
644 return dk_size(dksc, dev);
645 }
646
647 static int
648 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
649 {
650 unsigned int unit;
651 struct raid_softc *rs;
652 struct dk_softc *dksc;
653
654 unit = raidunit(dev);
655 if ((rs = raidget(unit, false)) == NULL)
656 return ENXIO;
657 dksc = &rs->sc_dksc;
658
659 if ((rs->sc_flags & RAIDF_INITED) == 0)
660 return ENODEV;
661
662 /*
663 Note that blkno is relative to this particular partition.
664 By adding adding RF_PROTECTED_SECTORS, we get a value that
665 is relative to the partition used for the underlying component.
666 */
667 blkno += RF_PROTECTED_SECTORS;
668
669 return dk_dump(dksc, dev, blkno, va, size);
670 }
671
672 static int
673 raid_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk)
674 {
675 struct raid_softc *rs = raidsoftc(dev);
676 const struct bdevsw *bdev;
677 RF_Raid_t *raidPtr;
678 int c, sparecol, j, scol, dumpto;
679 int error = 0;
680
681 raidPtr = &rs->sc_r;
682
683 /* we only support dumping to RAID 1 sets */
684 if (raidPtr->Layout.numDataCol != 1 ||
685 raidPtr->Layout.numParityCol != 1)
686 return EINVAL;
687
688 if ((error = raidlock(rs)) != 0)
689 return error;
690
691 /* figure out what device is alive.. */
692
693 /*
694 Look for a component to dump to. The preference for the
695 component to dump to is as follows:
696 1) the master
697 2) a used_spare of the master
698 3) the slave
699 4) a used_spare of the slave
700 */
701
702 dumpto = -1;
703 for (c = 0; c < raidPtr->numCol; c++) {
704 if (raidPtr->Disks[c].status == rf_ds_optimal) {
705 /* this might be the one */
706 dumpto = c;
707 break;
708 }
709 }
710
711 /*
712 At this point we have possibly selected a live master or a
713 live slave. We now check to see if there is a spared
714 master (or a spared slave), if we didn't find a live master
715 or a live slave.
716 */
717
718 for (c = 0; c < raidPtr->numSpare; c++) {
719 sparecol = raidPtr->numCol + c;
720 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
721 /* How about this one? */
722 scol = -1;
723 for(j=0;j<raidPtr->numCol;j++) {
724 if (raidPtr->Disks[j].spareCol == sparecol) {
725 scol = j;
726 break;
727 }
728 }
729 if (scol == 0) {
730 /*
731 We must have found a spared master!
732 We'll take that over anything else
733 found so far. (We couldn't have
734 found a real master before, since
735 this is a used spare, and it's
736 saying that it's replacing the
737 master.) On reboot (with
738 autoconfiguration turned on)
739 sparecol will become the 1st
740 component (component0) of this set.
741 */
742 dumpto = sparecol;
743 break;
744 } else if (scol != -1) {
745 /*
746 Must be a spared slave. We'll dump
747 to that if we havn't found anything
748 else so far.
749 */
750 if (dumpto == -1)
751 dumpto = sparecol;
752 }
753 }
754 }
755
756 if (dumpto == -1) {
757 /* we couldn't find any live components to dump to!?!?
758 */
759 error = EINVAL;
760 goto out;
761 }
762
763 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
764 if (bdev == NULL) {
765 error = ENXIO;
766 goto out;
767 }
768
769 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
770 blkno, va, nblk * raidPtr->bytesPerSector);
771
772 out:
773 raidunlock(rs);
774
775 return error;
776 }
777
778 /* ARGSUSED */
779 static int
780 raidopen(dev_t dev, int flags, int fmt,
781 struct lwp *l)
782 {
783 int unit = raidunit(dev);
784 struct raid_softc *rs;
785 struct dk_softc *dksc;
786 int error = 0;
787 int part, pmask;
788
789 if ((rs = raidget(unit, true)) == NULL)
790 return ENXIO;
791 if ((error = raidlock(rs)) != 0)
792 return (error);
793
794 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
795 error = EBUSY;
796 goto bad;
797 }
798
799 dksc = &rs->sc_dksc;
800
801 part = DISKPART(dev);
802 pmask = (1 << part);
803
804 if (!DK_BUSY(dksc, pmask) &&
805 ((rs->sc_flags & RAIDF_INITED) != 0)) {
806 /* First one... mark things as dirty... Note that we *MUST*
807 have done a configure before this. I DO NOT WANT TO BE
808 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
809 THAT THEY BELONG TOGETHER!!!!! */
810 /* XXX should check to see if we're only open for reading
811 here... If so, we needn't do this, but then need some
812 other way of keeping track of what's happened.. */
813
814 rf_markalldirty(&rs->sc_r);
815 }
816
817 if ((rs->sc_flags & RAIDF_INITED) != 0)
818 error = dk_open(dksc, dev, flags, fmt, l);
819
820 bad:
821 raidunlock(rs);
822
823 return (error);
824
825
826 }
827
828 static int
829 raid_lastclose(device_t self)
830 {
831 struct raid_softc *rs = raidsoftc(self);
832
833 /* Last one... device is not unconfigured yet.
834 Device shutdown has taken care of setting the
835 clean bits if RAIDF_INITED is not set
836 mark things as clean... */
837
838 rf_update_component_labels(&rs->sc_r,
839 RF_FINAL_COMPONENT_UPDATE);
840
841 /* pass to unlocked code */
842 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
843 rs->sc_flags |= RAIDF_DETACH;
844
845 return 0;
846 }
847
848 /* ARGSUSED */
849 static int
850 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
851 {
852 int unit = raidunit(dev);
853 struct raid_softc *rs;
854 struct dk_softc *dksc;
855 cfdata_t cf;
856 int error = 0, do_detach = 0, do_put = 0;
857
858 if ((rs = raidget(unit, false)) == NULL)
859 return ENXIO;
860 dksc = &rs->sc_dksc;
861
862 if ((error = raidlock(rs)) != 0)
863 return (error);
864
865 if ((rs->sc_flags & RAIDF_INITED) != 0) {
866 error = dk_close(dksc, dev, flags, fmt, l);
867 if ((rs->sc_flags & RAIDF_DETACH) != 0)
868 do_detach = 1;
869 } else if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
870 do_put = 1;
871
872 raidunlock(rs);
873
874 if (do_detach) {
875 /* free the pseudo device attach bits */
876 cf = device_cfdata(dksc->sc_dev);
877 error = config_detach(dksc->sc_dev, 0);
878 if (error == 0)
879 free(cf, M_RAIDFRAME);
880 } else if (do_put) {
881 raidput(rs);
882 }
883
884 return (error);
885
886 }
887
888 static void
889 raid_wakeup(RF_Raid_t *raidPtr)
890 {
891 rf_lock_mutex2(raidPtr->iodone_lock);
892 rf_signal_cond2(raidPtr->iodone_cv);
893 rf_unlock_mutex2(raidPtr->iodone_lock);
894 }
895
896 static void
897 raidstrategy(struct buf *bp)
898 {
899 unsigned int unit;
900 struct raid_softc *rs;
901 struct dk_softc *dksc;
902 RF_Raid_t *raidPtr;
903
904 unit = raidunit(bp->b_dev);
905 if ((rs = raidget(unit, false)) == NULL) {
906 bp->b_error = ENXIO;
907 goto fail;
908 }
909 if ((rs->sc_flags & RAIDF_INITED) == 0) {
910 bp->b_error = ENXIO;
911 goto fail;
912 }
913 dksc = &rs->sc_dksc;
914 raidPtr = &rs->sc_r;
915
916 /* Queue IO only */
917 if (dk_strategy_defer(dksc, bp))
918 goto done;
919
920 /* schedule the IO to happen at the next convenient time */
921 raid_wakeup(raidPtr);
922
923 done:
924 return;
925
926 fail:
927 bp->b_resid = bp->b_bcount;
928 biodone(bp);
929 }
930
931 static int
932 raid_diskstart(device_t dev, struct buf *bp)
933 {
934 struct raid_softc *rs = raidsoftc(dev);
935 RF_Raid_t *raidPtr;
936
937 raidPtr = &rs->sc_r;
938 if (!raidPtr->valid) {
939 db1_printf(("raid is not valid..\n"));
940 return ENODEV;
941 }
942
943 /* XXX */
944 bp->b_resid = 0;
945
946 return raiddoaccess(raidPtr, bp);
947 }
948
949 void
950 raiddone(RF_Raid_t *raidPtr, struct buf *bp)
951 {
952 struct raid_softc *rs;
953 struct dk_softc *dksc;
954
955 rs = raidPtr->softc;
956 dksc = &rs->sc_dksc;
957
958 dk_done(dksc, bp);
959
960 rf_lock_mutex2(raidPtr->mutex);
961 raidPtr->openings++;
962 rf_unlock_mutex2(raidPtr->mutex);
963
964 /* schedule more IO */
965 raid_wakeup(raidPtr);
966 }
967
968 /* ARGSUSED */
969 static int
970 raidread(dev_t dev, struct uio *uio, int flags)
971 {
972 int unit = raidunit(dev);
973 struct raid_softc *rs;
974
975 if ((rs = raidget(unit, false)) == NULL)
976 return ENXIO;
977
978 if ((rs->sc_flags & RAIDF_INITED) == 0)
979 return (ENXIO);
980
981 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
982
983 }
984
985 /* ARGSUSED */
986 static int
987 raidwrite(dev_t dev, struct uio *uio, int flags)
988 {
989 int unit = raidunit(dev);
990 struct raid_softc *rs;
991
992 if ((rs = raidget(unit, false)) == NULL)
993 return ENXIO;
994
995 if ((rs->sc_flags & RAIDF_INITED) == 0)
996 return (ENXIO);
997
998 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
999
1000 }
1001
1002 static int
1003 raid_detach_unlocked(struct raid_softc *rs)
1004 {
1005 struct dk_softc *dksc = &rs->sc_dksc;
1006 RF_Raid_t *raidPtr;
1007 int error;
1008
1009 raidPtr = &rs->sc_r;
1010
1011 if (DK_BUSY(dksc, 0) ||
1012 raidPtr->recon_in_progress != 0 ||
1013 raidPtr->parity_rewrite_in_progress != 0 ||
1014 raidPtr->copyback_in_progress != 0)
1015 return EBUSY;
1016
1017 if ((rs->sc_flags & RAIDF_INITED) == 0)
1018 return 0;
1019
1020 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1021
1022 if ((error = rf_Shutdown(raidPtr)) != 0)
1023 return error;
1024
1025 rs->sc_flags &= ~RAIDF_INITED;
1026
1027 /* Kill off any queued buffers */
1028 dk_drain(dksc);
1029 bufq_free(dksc->sc_bufq);
1030
1031 /* Detach the disk. */
1032 dkwedge_delall(&dksc->sc_dkdev);
1033 disk_detach(&dksc->sc_dkdev);
1034 disk_destroy(&dksc->sc_dkdev);
1035 dk_detach(dksc);
1036
1037 return 0;
1038 }
1039
1040 static int
1041 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1042 {
1043 int unit = raidunit(dev);
1044 int error = 0;
1045 int part, pmask;
1046 struct raid_softc *rs;
1047 struct dk_softc *dksc;
1048 RF_Config_t *k_cfg, *u_cfg;
1049 RF_Raid_t *raidPtr;
1050 RF_RaidDisk_t *diskPtr;
1051 RF_AccTotals_t *totals;
1052 RF_DeviceConfig_t *d_cfg, *ucfgp;
1053 u_char *specific_buf;
1054 int retcode = 0;
1055 int column;
1056 /* int raidid; */
1057 struct rf_recon_req *rr;
1058 struct rf_recon_req_internal *rrint;
1059 RF_ComponentLabel_t *clabel;
1060 RF_ComponentLabel_t *ci_label;
1061 RF_SingleComponent_t *sparePtr,*componentPtr;
1062 RF_SingleComponent_t component;
1063 int d;
1064
1065 if ((rs = raidget(unit, false)) == NULL)
1066 return ENXIO;
1067 dksc = &rs->sc_dksc;
1068 raidPtr = &rs->sc_r;
1069
1070 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1071 (int) DISKPART(dev), (int) unit, cmd));
1072
1073 /* Must be initialized for these... */
1074 switch (cmd) {
1075 case RAIDFRAME_REWRITEPARITY:
1076 case RAIDFRAME_GET_INFO:
1077 case RAIDFRAME_RESET_ACCTOTALS:
1078 case RAIDFRAME_GET_ACCTOTALS:
1079 case RAIDFRAME_KEEP_ACCTOTALS:
1080 case RAIDFRAME_GET_SIZE:
1081 case RAIDFRAME_FAIL_DISK:
1082 case RAIDFRAME_COPYBACK:
1083 case RAIDFRAME_CHECK_RECON_STATUS:
1084 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1085 case RAIDFRAME_GET_COMPONENT_LABEL:
1086 case RAIDFRAME_SET_COMPONENT_LABEL:
1087 case RAIDFRAME_ADD_HOT_SPARE:
1088 case RAIDFRAME_REMOVE_HOT_SPARE:
1089 case RAIDFRAME_INIT_LABELS:
1090 case RAIDFRAME_REBUILD_IN_PLACE:
1091 case RAIDFRAME_CHECK_PARITY:
1092 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1093 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1094 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1095 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1096 case RAIDFRAME_SET_AUTOCONFIG:
1097 case RAIDFRAME_SET_ROOT:
1098 case RAIDFRAME_DELETE_COMPONENT:
1099 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1100 case RAIDFRAME_PARITYMAP_STATUS:
1101 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1102 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1103 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1104 #ifdef COMPAT_NETBSD32
1105 #ifdef _LP64
1106 case RAIDFRAME_GET_INFO32:
1107 #endif
1108 #endif
1109 if ((rs->sc_flags & RAIDF_INITED) == 0)
1110 return (ENXIO);
1111 }
1112
1113 /*
1114 * Handle compat ioctl calls
1115 *
1116 * * If compat code is not loaded, stub returns ENOSYS and we just
1117 * check the "native" cmd's
1118 * * If compat code is loaded but does not recognize the cmd, it
1119 * returns EPASSTHROUGH, and we just check the "native" cmd's
1120 * * If compat code returns EAGAIN, we need to finish via config
1121 * * Otherwise the cmd has been handled and we just return
1122 */
1123 MODULE_CALL_HOOK(raidframe50_ioctl_hook,
1124 (cmd, (rs->sc_flags & RAIDF_INITED),raidPtr, unit, data, &k_cfg),
1125 enosys(), retcode);
1126 if (retcode == ENOSYS)
1127 retcode = 0;
1128 else if (retcode == EAGAIN)
1129 goto config;
1130 else if (retcode != EPASSTHROUGH)
1131 return retcode;
1132
1133 MODULE_CALL_HOOK(raidframe80_ioctl_hook,
1134 (cmd, (rs->sc_flags & RAIDF_INITED),raidPtr, unit, data, &k_cfg),
1135 enosys(), retcode);
1136 if (retcode == ENOSYS)
1137 retcode = 0;
1138 else if (retcode == EAGAIN)
1139 goto config;
1140 else if (retcode != EPASSTHROUGH)
1141 return retcode;
1142
1143 /*
1144 * XXX
1145 * Handling of FAIL_DISK80 command requires us to retain retcode's
1146 * value of EPASSTHROUGH. If you add more compat code later, make
1147 * sure you don't overwrite retcode and break this!
1148 */
1149
1150 switch (cmd) {
1151
1152 /* configure the system */
1153 case RAIDFRAME_CONFIGURE:
1154 #ifdef COMPAT_NETBSD32
1155 #ifdef _LP64
1156 case RAIDFRAME_CONFIGURE32:
1157 #endif
1158 #endif
1159
1160 if (raidPtr->valid) {
1161 /* There is a valid RAID set running on this unit! */
1162 printf("raid%d: Device already configured!\n",unit);
1163 return(EINVAL);
1164 }
1165
1166 /* copy-in the configuration information */
1167 /* data points to a pointer to the configuration structure */
1168
1169 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1170 if (k_cfg == NULL) {
1171 return (ENOMEM);
1172 }
1173 #ifdef COMPAT_NETBSD32
1174 #ifdef _LP64
1175 if (cmd == RAIDFRAME_CONFIGURE32 &&
1176 (l->l_proc->p_flag & PK_32) != 0)
1177 retcode = rf_config_netbsd32(data, k_cfg);
1178 else
1179 #endif
1180 #endif
1181 {
1182 u_cfg = *((RF_Config_t **) data);
1183 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1184 }
1185 if (retcode) {
1186 RF_Free(k_cfg, sizeof(RF_Config_t));
1187 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1188 retcode));
1189 goto no_config;
1190 }
1191 goto config;
1192 config:
1193 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1194
1195 /* allocate a buffer for the layout-specific data, and copy it
1196 * in */
1197 if (k_cfg->layoutSpecificSize) {
1198 if (k_cfg->layoutSpecificSize > 10000) {
1199 /* sanity check */
1200 RF_Free(k_cfg, sizeof(RF_Config_t));
1201 retcode = EINVAL;
1202 goto no_config;
1203 }
1204 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1205 (u_char *));
1206 if (specific_buf == NULL) {
1207 RF_Free(k_cfg, sizeof(RF_Config_t));
1208 retcode = ENOMEM;
1209 goto no_config;
1210 }
1211 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1212 k_cfg->layoutSpecificSize);
1213 if (retcode) {
1214 RF_Free(k_cfg, sizeof(RF_Config_t));
1215 RF_Free(specific_buf,
1216 k_cfg->layoutSpecificSize);
1217 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1218 retcode));
1219 goto no_config;
1220 }
1221 } else
1222 specific_buf = NULL;
1223 k_cfg->layoutSpecific = specific_buf;
1224
1225 /* should do some kind of sanity check on the configuration.
1226 * Store the sum of all the bytes in the last byte? */
1227
1228 /* configure the system */
1229
1230 /*
1231 * Clear the entire RAID descriptor, just to make sure
1232 * there is no stale data left in the case of a
1233 * reconfiguration
1234 */
1235 memset(raidPtr, 0, sizeof(*raidPtr));
1236 raidPtr->softc = rs;
1237 raidPtr->raidid = unit;
1238
1239 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1240
1241 if (retcode == 0) {
1242
1243 /* allow this many simultaneous IO's to
1244 this RAID device */
1245 raidPtr->openings = RAIDOUTSTANDING;
1246
1247 raidinit(rs);
1248 raid_wakeup(raidPtr);
1249 rf_markalldirty(raidPtr);
1250 }
1251 /* free the buffers. No return code here. */
1252 if (k_cfg->layoutSpecificSize) {
1253 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1254 }
1255 RF_Free(k_cfg, sizeof(RF_Config_t));
1256
1257 no_config:
1258 /*
1259 * If configuration failed, set sc_flags so that we
1260 * will detach the device when we close it.
1261 */
1262 if (retcode != 0)
1263 rs->sc_flags |= RAIDF_SHUTDOWN;
1264 return (retcode);
1265
1266 /* shutdown the system */
1267 case RAIDFRAME_SHUTDOWN:
1268
1269 part = DISKPART(dev);
1270 pmask = (1 << part);
1271
1272 if ((error = raidlock(rs)) != 0)
1273 return (error);
1274
1275 if (DK_BUSY(dksc, pmask) ||
1276 raidPtr->recon_in_progress != 0 ||
1277 raidPtr->parity_rewrite_in_progress != 0 ||
1278 raidPtr->copyback_in_progress != 0)
1279 retcode = EBUSY;
1280 else {
1281 /* detach and free on close */
1282 rs->sc_flags |= RAIDF_SHUTDOWN;
1283 retcode = 0;
1284 }
1285
1286 raidunlock(rs);
1287
1288 return (retcode);
1289 case RAIDFRAME_GET_COMPONENT_LABEL:
1290 return rf_get_component_label(raidPtr, data);
1291
1292 #if 0
1293 case RAIDFRAME_SET_COMPONENT_LABEL:
1294 clabel = (RF_ComponentLabel_t *) data;
1295
1296 /* XXX check the label for valid stuff... */
1297 /* Note that some things *should not* get modified --
1298 the user should be re-initing the labels instead of
1299 trying to patch things.
1300 */
1301
1302 raidid = raidPtr->raidid;
1303 #ifdef DEBUG
1304 printf("raid%d: Got component label:\n", raidid);
1305 printf("raid%d: Version: %d\n", raidid, clabel->version);
1306 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1307 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1308 printf("raid%d: Column: %d\n", raidid, clabel->column);
1309 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1310 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1311 printf("raid%d: Status: %d\n", raidid, clabel->status);
1312 #endif
1313 clabel->row = 0;
1314 column = clabel->column;
1315
1316 if ((column < 0) || (column >= raidPtr->numCol)) {
1317 return(EINVAL);
1318 }
1319
1320 /* XXX this isn't allowed to do anything for now :-) */
1321
1322 /* XXX and before it is, we need to fill in the rest
1323 of the fields!?!?!?! */
1324 memcpy(raidget_component_label(raidPtr, column),
1325 clabel, sizeof(*clabel));
1326 raidflush_component_label(raidPtr, column);
1327 return (0);
1328 #endif
1329
1330 case RAIDFRAME_INIT_LABELS:
1331 clabel = (RF_ComponentLabel_t *) data;
1332 /*
1333 we only want the serial number from
1334 the above. We get all the rest of the information
1335 from the config that was used to create this RAID
1336 set.
1337 */
1338
1339 raidPtr->serial_number = clabel->serial_number;
1340
1341 for(column=0;column<raidPtr->numCol;column++) {
1342 diskPtr = &raidPtr->Disks[column];
1343 if (!RF_DEAD_DISK(diskPtr->status)) {
1344 ci_label = raidget_component_label(raidPtr,
1345 column);
1346 /* Zeroing this is important. */
1347 memset(ci_label, 0, sizeof(*ci_label));
1348 raid_init_component_label(raidPtr, ci_label);
1349 ci_label->serial_number =
1350 raidPtr->serial_number;
1351 ci_label->row = 0; /* we dont' pretend to support more */
1352 rf_component_label_set_partitionsize(ci_label,
1353 diskPtr->partitionSize);
1354 ci_label->column = column;
1355 raidflush_component_label(raidPtr, column);
1356 }
1357 /* XXXjld what about the spares? */
1358 }
1359
1360 return (retcode);
1361 case RAIDFRAME_SET_AUTOCONFIG:
1362 d = rf_set_autoconfig(raidPtr, *(int *) data);
1363 printf("raid%d: New autoconfig value is: %d\n",
1364 raidPtr->raidid, d);
1365 *(int *) data = d;
1366 return (retcode);
1367
1368 case RAIDFRAME_SET_ROOT:
1369 d = rf_set_rootpartition(raidPtr, *(int *) data);
1370 printf("raid%d: New rootpartition value is: %d\n",
1371 raidPtr->raidid, d);
1372 *(int *) data = d;
1373 return (retcode);
1374
1375 /* initialize all parity */
1376 case RAIDFRAME_REWRITEPARITY:
1377
1378 if (raidPtr->Layout.map->faultsTolerated == 0) {
1379 /* Parity for RAID 0 is trivially correct */
1380 raidPtr->parity_good = RF_RAID_CLEAN;
1381 return(0);
1382 }
1383
1384 if (raidPtr->parity_rewrite_in_progress == 1) {
1385 /* Re-write is already in progress! */
1386 return(EINVAL);
1387 }
1388
1389 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1390 rf_RewriteParityThread,
1391 raidPtr,"raid_parity");
1392 return (retcode);
1393
1394
1395 case RAIDFRAME_ADD_HOT_SPARE:
1396 sparePtr = (RF_SingleComponent_t *) data;
1397 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1398 retcode = rf_add_hot_spare(raidPtr, &component);
1399 return(retcode);
1400
1401 case RAIDFRAME_REMOVE_HOT_SPARE:
1402 return(retcode);
1403
1404 case RAIDFRAME_DELETE_COMPONENT:
1405 componentPtr = (RF_SingleComponent_t *)data;
1406 memcpy( &component, componentPtr,
1407 sizeof(RF_SingleComponent_t));
1408 retcode = rf_delete_component(raidPtr, &component);
1409 return(retcode);
1410
1411 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1412 componentPtr = (RF_SingleComponent_t *)data;
1413 memcpy( &component, componentPtr,
1414 sizeof(RF_SingleComponent_t));
1415 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1416 return(retcode);
1417
1418 case RAIDFRAME_REBUILD_IN_PLACE:
1419
1420 if (raidPtr->Layout.map->faultsTolerated == 0) {
1421 /* Can't do this on a RAID 0!! */
1422 return(EINVAL);
1423 }
1424
1425 if (raidPtr->recon_in_progress == 1) {
1426 /* a reconstruct is already in progress! */
1427 return(EINVAL);
1428 }
1429
1430 componentPtr = (RF_SingleComponent_t *) data;
1431 memcpy( &component, componentPtr,
1432 sizeof(RF_SingleComponent_t));
1433 component.row = 0; /* we don't support any more */
1434 column = component.column;
1435
1436 if ((column < 0) || (column >= raidPtr->numCol)) {
1437 return(EINVAL);
1438 }
1439
1440 rf_lock_mutex2(raidPtr->mutex);
1441 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1442 (raidPtr->numFailures > 0)) {
1443 /* XXX 0 above shouldn't be constant!!! */
1444 /* some component other than this has failed.
1445 Let's not make things worse than they already
1446 are... */
1447 printf("raid%d: Unable to reconstruct to disk at:\n",
1448 raidPtr->raidid);
1449 printf("raid%d: Col: %d Too many failures.\n",
1450 raidPtr->raidid, column);
1451 rf_unlock_mutex2(raidPtr->mutex);
1452 return (EINVAL);
1453 }
1454 if (raidPtr->Disks[column].status ==
1455 rf_ds_reconstructing) {
1456 printf("raid%d: Unable to reconstruct to disk at:\n",
1457 raidPtr->raidid);
1458 printf("raid%d: Col: %d Reconstruction already occurring!\n", raidPtr->raidid, column);
1459
1460 rf_unlock_mutex2(raidPtr->mutex);
1461 return (EINVAL);
1462 }
1463 if (raidPtr->Disks[column].status == rf_ds_spared) {
1464 rf_unlock_mutex2(raidPtr->mutex);
1465 return (EINVAL);
1466 }
1467 rf_unlock_mutex2(raidPtr->mutex);
1468
1469 RF_Malloc(rrint, sizeof(*rrint), (struct rf_recon_req_internal *));
1470 if (rrint == NULL)
1471 return(ENOMEM);
1472
1473 rrint->col = column;
1474 rrint->raidPtr = raidPtr;
1475
1476 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1477 rf_ReconstructInPlaceThread,
1478 rrint, "raid_reconip");
1479 return(retcode);
1480
1481 case RAIDFRAME_GET_INFO:
1482 #ifdef COMPAT_NETBSD32
1483 #ifdef _LP64
1484 case RAIDFRAME_GET_INFO32:
1485 #endif
1486 #endif
1487 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1488 (RF_DeviceConfig_t *));
1489 if (d_cfg == NULL)
1490 return (ENOMEM);
1491 retcode = rf_get_info(raidPtr, d_cfg);
1492 if (retcode == 0) {
1493 #ifdef COMPAT_NETBSD32
1494 #ifdef _LP64
1495 if (cmd == RAIDFRAME_GET_INFO32)
1496 ucfgp = NETBSD32PTR64(*(netbsd32_pointer_t *)data);
1497 else
1498 #endif
1499 #endif
1500 ucfgp = *(RF_DeviceConfig_t **)data;
1501 retcode = copyout(d_cfg, ucfgp, sizeof(RF_DeviceConfig_t));
1502 }
1503 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1504
1505 return (retcode);
1506
1507 case RAIDFRAME_CHECK_PARITY:
1508 *(int *) data = raidPtr->parity_good;
1509 return (0);
1510
1511 case RAIDFRAME_PARITYMAP_STATUS:
1512 if (rf_paritymap_ineligible(raidPtr))
1513 return EINVAL;
1514 rf_paritymap_status(raidPtr->parity_map,
1515 (struct rf_pmstat *)data);
1516 return 0;
1517
1518 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1519 if (rf_paritymap_ineligible(raidPtr))
1520 return EINVAL;
1521 if (raidPtr->parity_map == NULL)
1522 return ENOENT; /* ??? */
1523 if (0 != rf_paritymap_set_params(raidPtr->parity_map,
1524 (struct rf_pmparams *)data, 1))
1525 return EINVAL;
1526 return 0;
1527
1528 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1529 if (rf_paritymap_ineligible(raidPtr))
1530 return EINVAL;
1531 *(int *) data = rf_paritymap_get_disable(raidPtr);
1532 return 0;
1533
1534 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1535 if (rf_paritymap_ineligible(raidPtr))
1536 return EINVAL;
1537 rf_paritymap_set_disable(raidPtr, *(int *)data);
1538 /* XXX should errors be passed up? */
1539 return 0;
1540
1541 case RAIDFRAME_RESET_ACCTOTALS:
1542 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1543 return (0);
1544
1545 case RAIDFRAME_GET_ACCTOTALS:
1546 totals = (RF_AccTotals_t *) data;
1547 *totals = raidPtr->acc_totals;
1548 return (0);
1549
1550 case RAIDFRAME_KEEP_ACCTOTALS:
1551 raidPtr->keep_acc_totals = *(int *)data;
1552 return (0);
1553
1554 case RAIDFRAME_GET_SIZE:
1555 *(int *) data = raidPtr->totalSectors;
1556 return (0);
1557
1558 /* fail a disk & optionally start reconstruction */
1559 case RAIDFRAME_FAIL_DISK80:
1560 /* Check if we called compat code for this cmd */
1561 if (retcode != EPASSTHROUGH)
1562 return EINVAL;
1563 /* FALLTHRU */
1564 case RAIDFRAME_FAIL_DISK:
1565 if (raidPtr->Layout.map->faultsTolerated == 0) {
1566 /* Can't do this on a RAID 0!! */
1567 return(EINVAL);
1568 }
1569
1570 rr = (struct rf_recon_req *) data;
1571 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1572 return (EINVAL);
1573
1574 rf_lock_mutex2(raidPtr->mutex);
1575 if (raidPtr->status == rf_rs_reconstructing) {
1576 /* you can't fail a disk while we're reconstructing! */
1577 /* XXX wrong for RAID6 */
1578 rf_unlock_mutex2(raidPtr->mutex);
1579 return (EINVAL);
1580 }
1581 if ((raidPtr->Disks[rr->col].status ==
1582 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1583 /* some other component has failed. Let's not make
1584 things worse. XXX wrong for RAID6 */
1585 rf_unlock_mutex2(raidPtr->mutex);
1586 return (EINVAL);
1587 }
1588 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1589 /* Can't fail a spared disk! */
1590 rf_unlock_mutex2(raidPtr->mutex);
1591 return (EINVAL);
1592 }
1593 rf_unlock_mutex2(raidPtr->mutex);
1594
1595 /* make a copy of the recon request so that we don't rely on
1596 * the user's buffer */
1597 RF_Malloc(rrint, sizeof(*rrint), (struct rf_recon_req_internal *));
1598 if (rrint == NULL)
1599 return(ENOMEM);
1600 rrint->col = rr->col;
1601 rrint->flags = rr->flags;
1602 rrint->raidPtr = raidPtr;
1603
1604 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1605 rf_ReconThread,
1606 rrint, "raid_recon");
1607 return (0);
1608
1609 /* invoke a copyback operation after recon on whatever disk
1610 * needs it, if any */
1611 case RAIDFRAME_COPYBACK:
1612
1613 if (raidPtr->Layout.map->faultsTolerated == 0) {
1614 /* This makes no sense on a RAID 0!! */
1615 return(EINVAL);
1616 }
1617
1618 if (raidPtr->copyback_in_progress == 1) {
1619 /* Copyback is already in progress! */
1620 return(EINVAL);
1621 }
1622
1623 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1624 rf_CopybackThread,
1625 raidPtr,"raid_copyback");
1626 return (retcode);
1627
1628 /* return the percentage completion of reconstruction */
1629 case RAIDFRAME_CHECK_RECON_STATUS:
1630 if (raidPtr->Layout.map->faultsTolerated == 0) {
1631 /* This makes no sense on a RAID 0, so tell the
1632 user it's done. */
1633 *(int *) data = 100;
1634 return(0);
1635 }
1636 if (raidPtr->status != rf_rs_reconstructing)
1637 *(int *) data = 100;
1638 else {
1639 if (raidPtr->reconControl->numRUsTotal > 0) {
1640 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1641 } else {
1642 *(int *) data = 0;
1643 }
1644 }
1645 return (0);
1646 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1647 rf_check_recon_status_ext(raidPtr, data);
1648 return (0);
1649
1650 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1651 if (raidPtr->Layout.map->faultsTolerated == 0) {
1652 /* This makes no sense on a RAID 0, so tell the
1653 user it's done. */
1654 *(int *) data = 100;
1655 return(0);
1656 }
1657 if (raidPtr->parity_rewrite_in_progress == 1) {
1658 *(int *) data = 100 *
1659 raidPtr->parity_rewrite_stripes_done /
1660 raidPtr->Layout.numStripe;
1661 } else {
1662 *(int *) data = 100;
1663 }
1664 return (0);
1665
1666 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1667 rf_check_parityrewrite_status_ext(raidPtr, data);
1668 return (0);
1669
1670 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1671 if (raidPtr->Layout.map->faultsTolerated == 0) {
1672 /* This makes no sense on a RAID 0 */
1673 *(int *) data = 100;
1674 return(0);
1675 }
1676 if (raidPtr->copyback_in_progress == 1) {
1677 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1678 raidPtr->Layout.numStripe;
1679 } else {
1680 *(int *) data = 100;
1681 }
1682 return (0);
1683
1684 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1685 rf_check_copyback_status_ext(raidPtr, data);
1686 return 0;
1687
1688 case RAIDFRAME_SET_LAST_UNIT:
1689 for (column = 0; column < raidPtr->numCol; column++)
1690 if (raidPtr->Disks[column].status != rf_ds_optimal)
1691 return EBUSY;
1692
1693 for (column = 0; column < raidPtr->numCol; column++) {
1694 clabel = raidget_component_label(raidPtr, column);
1695 clabel->last_unit = *(int *)data;
1696 raidflush_component_label(raidPtr, column);
1697 }
1698 rs->sc_cflags |= RAIDF_UNIT_CHANGED;
1699 return 0;
1700
1701 /* the sparetable daemon calls this to wait for the kernel to
1702 * need a spare table. this ioctl does not return until a
1703 * spare table is needed. XXX -- calling mpsleep here in the
1704 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1705 * -- I should either compute the spare table in the kernel,
1706 * or have a different -- XXX XXX -- interface (a different
1707 * character device) for delivering the table -- XXX */
1708 #if 0
1709 case RAIDFRAME_SPARET_WAIT:
1710 rf_lock_mutex2(rf_sparet_wait_mutex);
1711 while (!rf_sparet_wait_queue)
1712 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1713 waitreq = rf_sparet_wait_queue;
1714 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1715 rf_unlock_mutex2(rf_sparet_wait_mutex);
1716
1717 /* structure assignment */
1718 *((RF_SparetWait_t *) data) = *waitreq;
1719
1720 RF_Free(waitreq, sizeof(*waitreq));
1721 return (0);
1722
1723 /* wakes up a process waiting on SPARET_WAIT and puts an error
1724 * code in it that will cause the dameon to exit */
1725 case RAIDFRAME_ABORT_SPARET_WAIT:
1726 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1727 waitreq->fcol = -1;
1728 rf_lock_mutex2(rf_sparet_wait_mutex);
1729 waitreq->next = rf_sparet_wait_queue;
1730 rf_sparet_wait_queue = waitreq;
1731 rf_broadcast_conf2(rf_sparet_wait_cv);
1732 rf_unlock_mutex2(rf_sparet_wait_mutex);
1733 return (0);
1734
1735 /* used by the spare table daemon to deliver a spare table
1736 * into the kernel */
1737 case RAIDFRAME_SEND_SPARET:
1738
1739 /* install the spare table */
1740 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1741
1742 /* respond to the requestor. the return status of the spare
1743 * table installation is passed in the "fcol" field */
1744 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1745 waitreq->fcol = retcode;
1746 rf_lock_mutex2(rf_sparet_wait_mutex);
1747 waitreq->next = rf_sparet_resp_queue;
1748 rf_sparet_resp_queue = waitreq;
1749 rf_broadcast_cond2(rf_sparet_resp_cv);
1750 rf_unlock_mutex2(rf_sparet_wait_mutex);
1751
1752 return (retcode);
1753 #endif
1754
1755 default:
1756 break; /* fall through to the os-specific code below */
1757
1758 }
1759
1760 if (!raidPtr->valid)
1761 return (EINVAL);
1762
1763 /*
1764 * Add support for "regular" device ioctls here.
1765 */
1766
1767 switch (cmd) {
1768 case DIOCGCACHE:
1769 retcode = rf_get_component_caches(raidPtr, (int *)data);
1770 break;
1771
1772 case DIOCCACHESYNC:
1773 retcode = rf_sync_component_caches(raidPtr);
1774 break;
1775
1776 default:
1777 retcode = dk_ioctl(dksc, dev, cmd, data, flag, l);
1778 break;
1779 }
1780
1781 return (retcode);
1782
1783 }
1784
1785
1786 /* raidinit -- complete the rest of the initialization for the
1787 RAIDframe device. */
1788
1789
1790 static void
1791 raidinit(struct raid_softc *rs)
1792 {
1793 cfdata_t cf;
1794 unsigned int unit;
1795 struct dk_softc *dksc = &rs->sc_dksc;
1796 RF_Raid_t *raidPtr = &rs->sc_r;
1797 device_t dev;
1798
1799 unit = raidPtr->raidid;
1800
1801 /* XXX doesn't check bounds. */
1802 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%u", unit);
1803
1804 /* attach the pseudo device */
1805 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1806 cf->cf_name = raid_cd.cd_name;
1807 cf->cf_atname = raid_cd.cd_name;
1808 cf->cf_unit = unit;
1809 cf->cf_fstate = FSTATE_STAR;
1810
1811 dev = config_attach_pseudo(cf);
1812 if (dev == NULL) {
1813 printf("raid%d: config_attach_pseudo failed\n",
1814 raidPtr->raidid);
1815 free(cf, M_RAIDFRAME);
1816 return;
1817 }
1818
1819 /* provide a backpointer to the real softc */
1820 raidsoftc(dev) = rs;
1821
1822 /* disk_attach actually creates space for the CPU disklabel, among
1823 * other things, so it's critical to call this *BEFORE* we try putzing
1824 * with disklabels. */
1825 dk_init(dksc, dev, DKTYPE_RAID);
1826 disk_init(&dksc->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1827
1828 /* XXX There may be a weird interaction here between this, and
1829 * protectedSectors, as used in RAIDframe. */
1830
1831 rs->sc_size = raidPtr->totalSectors;
1832
1833 /* Attach dk and disk subsystems */
1834 dk_attach(dksc);
1835 disk_attach(&dksc->sc_dkdev);
1836 rf_set_geometry(rs, raidPtr);
1837
1838 bufq_alloc(&dksc->sc_bufq, "fcfs", BUFQ_SORT_RAWBLOCK);
1839
1840 /* mark unit as usuable */
1841 rs->sc_flags |= RAIDF_INITED;
1842
1843 dkwedge_discover(&dksc->sc_dkdev);
1844 }
1845
1846 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1847 /* wake up the daemon & tell it to get us a spare table
1848 * XXX
1849 * the entries in the queues should be tagged with the raidPtr
1850 * so that in the extremely rare case that two recons happen at once,
1851 * we know for which device were requesting a spare table
1852 * XXX
1853 *
1854 * XXX This code is not currently used. GO
1855 */
1856 int
1857 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1858 {
1859 int retcode;
1860
1861 rf_lock_mutex2(rf_sparet_wait_mutex);
1862 req->next = rf_sparet_wait_queue;
1863 rf_sparet_wait_queue = req;
1864 rf_broadcast_cond2(rf_sparet_wait_cv);
1865
1866 /* mpsleep unlocks the mutex */
1867 while (!rf_sparet_resp_queue) {
1868 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
1869 }
1870 req = rf_sparet_resp_queue;
1871 rf_sparet_resp_queue = req->next;
1872 rf_unlock_mutex2(rf_sparet_wait_mutex);
1873
1874 retcode = req->fcol;
1875 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1876 * alloc'd */
1877 return (retcode);
1878 }
1879 #endif
1880
1881 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1882 * bp & passes it down.
1883 * any calls originating in the kernel must use non-blocking I/O
1884 * do some extra sanity checking to return "appropriate" error values for
1885 * certain conditions (to make some standard utilities work)
1886 *
1887 * Formerly known as: rf_DoAccessKernel
1888 */
1889 void
1890 raidstart(RF_Raid_t *raidPtr)
1891 {
1892 struct raid_softc *rs;
1893 struct dk_softc *dksc;
1894
1895 rs = raidPtr->softc;
1896 dksc = &rs->sc_dksc;
1897 /* quick check to see if anything has died recently */
1898 rf_lock_mutex2(raidPtr->mutex);
1899 if (raidPtr->numNewFailures > 0) {
1900 rf_unlock_mutex2(raidPtr->mutex);
1901 rf_update_component_labels(raidPtr,
1902 RF_NORMAL_COMPONENT_UPDATE);
1903 rf_lock_mutex2(raidPtr->mutex);
1904 raidPtr->numNewFailures--;
1905 }
1906 rf_unlock_mutex2(raidPtr->mutex);
1907
1908 if ((rs->sc_flags & RAIDF_INITED) == 0) {
1909 printf("raid%d: raidstart not ready\n", raidPtr->raidid);
1910 return;
1911 }
1912
1913 dk_start(dksc, NULL);
1914 }
1915
1916 static int
1917 raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp)
1918 {
1919 RF_SectorCount_t num_blocks, pb, sum;
1920 RF_RaidAddr_t raid_addr;
1921 daddr_t blocknum;
1922 int do_async;
1923 int rc;
1924
1925 rf_lock_mutex2(raidPtr->mutex);
1926 if (raidPtr->openings == 0) {
1927 rf_unlock_mutex2(raidPtr->mutex);
1928 return EAGAIN;
1929 }
1930 rf_unlock_mutex2(raidPtr->mutex);
1931
1932 blocknum = bp->b_rawblkno;
1933
1934 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1935 (int) blocknum));
1936
1937 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1938 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1939
1940 /* *THIS* is where we adjust what block we're going to...
1941 * but DO NOT TOUCH bp->b_blkno!!! */
1942 raid_addr = blocknum;
1943
1944 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1945 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1946 sum = raid_addr + num_blocks + pb;
1947 if (1 || rf_debugKernelAccess) {
1948 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1949 (int) raid_addr, (int) sum, (int) num_blocks,
1950 (int) pb, (int) bp->b_resid));
1951 }
1952 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1953 || (sum < num_blocks) || (sum < pb)) {
1954 rc = ENOSPC;
1955 goto done;
1956 }
1957 /*
1958 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1959 */
1960
1961 if (bp->b_bcount & raidPtr->sectorMask) {
1962 rc = ENOSPC;
1963 goto done;
1964 }
1965 db1_printf(("Calling DoAccess..\n"));
1966
1967
1968 rf_lock_mutex2(raidPtr->mutex);
1969 raidPtr->openings--;
1970 rf_unlock_mutex2(raidPtr->mutex);
1971
1972 /*
1973 * Everything is async.
1974 */
1975 do_async = 1;
1976
1977 /* don't ever condition on bp->b_flags & B_WRITE.
1978 * always condition on B_READ instead */
1979
1980 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1981 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1982 do_async, raid_addr, num_blocks,
1983 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1984
1985 done:
1986 return rc;
1987 }
1988
1989 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1990
1991 int
1992 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
1993 {
1994 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1995 struct buf *bp;
1996
1997 req->queue = queue;
1998 bp = req->bp;
1999
2000 switch (req->type) {
2001 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2002 /* XXX need to do something extra here.. */
2003 /* I'm leaving this in, as I've never actually seen it used,
2004 * and I'd like folks to report it... GO */
2005 printf(("WAKEUP CALLED\n"));
2006 queue->numOutstanding++;
2007
2008 bp->b_flags = 0;
2009 bp->b_private = req;
2010
2011 KernelWakeupFunc(bp);
2012 break;
2013
2014 case RF_IO_TYPE_READ:
2015 case RF_IO_TYPE_WRITE:
2016 #if RF_ACC_TRACE > 0
2017 if (req->tracerec) {
2018 RF_ETIMER_START(req->tracerec->timer);
2019 }
2020 #endif
2021 InitBP(bp, queue->rf_cinfo->ci_vp,
2022 op, queue->rf_cinfo->ci_dev,
2023 req->sectorOffset, req->numSector,
2024 req->buf, KernelWakeupFunc, (void *) req,
2025 queue->raidPtr->logBytesPerSector, req->b_proc);
2026
2027 if (rf_debugKernelAccess) {
2028 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2029 (long) bp->b_blkno));
2030 }
2031 queue->numOutstanding++;
2032 queue->last_deq_sector = req->sectorOffset;
2033 /* acc wouldn't have been let in if there were any pending
2034 * reqs at any other priority */
2035 queue->curPriority = req->priority;
2036
2037 db1_printf(("Going for %c to unit %d col %d\n",
2038 req->type, queue->raidPtr->raidid,
2039 queue->col));
2040 db1_printf(("sector %d count %d (%d bytes) %d\n",
2041 (int) req->sectorOffset, (int) req->numSector,
2042 (int) (req->numSector <<
2043 queue->raidPtr->logBytesPerSector),
2044 (int) queue->raidPtr->logBytesPerSector));
2045
2046 /*
2047 * XXX: drop lock here since this can block at
2048 * least with backing SCSI devices. Retake it
2049 * to minimize fuss with calling interfaces.
2050 */
2051
2052 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2053 bdev_strategy(bp);
2054 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2055 break;
2056
2057 default:
2058 panic("bad req->type in rf_DispatchKernelIO");
2059 }
2060 db1_printf(("Exiting from DispatchKernelIO\n"));
2061
2062 return (0);
2063 }
2064 /* this is the callback function associated with a I/O invoked from
2065 kernel code.
2066 */
2067 static void
2068 KernelWakeupFunc(struct buf *bp)
2069 {
2070 RF_DiskQueueData_t *req = NULL;
2071 RF_DiskQueue_t *queue;
2072
2073 db1_printf(("recovering the request queue:\n"));
2074
2075 req = bp->b_private;
2076
2077 queue = (RF_DiskQueue_t *) req->queue;
2078
2079 rf_lock_mutex2(queue->raidPtr->iodone_lock);
2080
2081 #if RF_ACC_TRACE > 0
2082 if (req->tracerec) {
2083 RF_ETIMER_STOP(req->tracerec->timer);
2084 RF_ETIMER_EVAL(req->tracerec->timer);
2085 rf_lock_mutex2(rf_tracing_mutex);
2086 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2087 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2088 req->tracerec->num_phys_ios++;
2089 rf_unlock_mutex2(rf_tracing_mutex);
2090 }
2091 #endif
2092
2093 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2094 * ballistic, and mark the component as hosed... */
2095
2096 if (bp->b_error != 0) {
2097 /* Mark the disk as dead */
2098 /* but only mark it once... */
2099 /* and only if it wouldn't leave this RAID set
2100 completely broken */
2101 if (((queue->raidPtr->Disks[queue->col].status ==
2102 rf_ds_optimal) ||
2103 (queue->raidPtr->Disks[queue->col].status ==
2104 rf_ds_used_spare)) &&
2105 (queue->raidPtr->numFailures <
2106 queue->raidPtr->Layout.map->faultsTolerated)) {
2107 printf("raid%d: IO Error (%d). Marking %s as failed.\n",
2108 queue->raidPtr->raidid,
2109 bp->b_error,
2110 queue->raidPtr->Disks[queue->col].devname);
2111 queue->raidPtr->Disks[queue->col].status =
2112 rf_ds_failed;
2113 queue->raidPtr->status = rf_rs_degraded;
2114 queue->raidPtr->numFailures++;
2115 queue->raidPtr->numNewFailures++;
2116 } else { /* Disk is already dead... */
2117 /* printf("Disk already marked as dead!\n"); */
2118 }
2119
2120 }
2121
2122 /* Fill in the error value */
2123 req->error = bp->b_error;
2124
2125 /* Drop this one on the "finished" queue... */
2126 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2127
2128 /* Let the raidio thread know there is work to be done. */
2129 rf_signal_cond2(queue->raidPtr->iodone_cv);
2130
2131 rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2132 }
2133
2134
2135 /*
2136 * initialize a buf structure for doing an I/O in the kernel.
2137 */
2138 static void
2139 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2140 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2141 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2142 struct proc *b_proc)
2143 {
2144 /* bp->b_flags = B_PHYS | rw_flag; */
2145 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */
2146 bp->b_oflags = 0;
2147 bp->b_cflags = 0;
2148 bp->b_bcount = numSect << logBytesPerSector;
2149 bp->b_bufsize = bp->b_bcount;
2150 bp->b_error = 0;
2151 bp->b_dev = dev;
2152 bp->b_data = bf;
2153 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2154 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2155 if (bp->b_bcount == 0) {
2156 panic("bp->b_bcount is zero in InitBP!!");
2157 }
2158 bp->b_proc = b_proc;
2159 bp->b_iodone = cbFunc;
2160 bp->b_private = cbArg;
2161 }
2162
2163 /*
2164 * Wait interruptibly for an exclusive lock.
2165 *
2166 * XXX
2167 * Several drivers do this; it should be abstracted and made MP-safe.
2168 * (Hmm... where have we seen this warning before :-> GO )
2169 */
2170 static int
2171 raidlock(struct raid_softc *rs)
2172 {
2173 int error;
2174
2175 error = 0;
2176 mutex_enter(&rs->sc_mutex);
2177 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2178 rs->sc_flags |= RAIDF_WANTED;
2179 error = cv_wait_sig(&rs->sc_cv, &rs->sc_mutex);
2180 if (error != 0)
2181 goto done;
2182 }
2183 rs->sc_flags |= RAIDF_LOCKED;
2184 done:
2185 mutex_exit(&rs->sc_mutex);
2186 return (error);
2187 }
2188 /*
2189 * Unlock and wake up any waiters.
2190 */
2191 static void
2192 raidunlock(struct raid_softc *rs)
2193 {
2194
2195 mutex_enter(&rs->sc_mutex);
2196 rs->sc_flags &= ~RAIDF_LOCKED;
2197 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2198 rs->sc_flags &= ~RAIDF_WANTED;
2199 cv_broadcast(&rs->sc_cv);
2200 }
2201 mutex_exit(&rs->sc_mutex);
2202 }
2203
2204
2205 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2206 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2207 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2208
2209 static daddr_t
2210 rf_component_info_offset(void)
2211 {
2212
2213 return RF_COMPONENT_INFO_OFFSET;
2214 }
2215
2216 static daddr_t
2217 rf_component_info_size(unsigned secsize)
2218 {
2219 daddr_t info_size;
2220
2221 KASSERT(secsize);
2222 if (secsize > RF_COMPONENT_INFO_SIZE)
2223 info_size = secsize;
2224 else
2225 info_size = RF_COMPONENT_INFO_SIZE;
2226
2227 return info_size;
2228 }
2229
2230 static daddr_t
2231 rf_parity_map_offset(RF_Raid_t *raidPtr)
2232 {
2233 daddr_t map_offset;
2234
2235 KASSERT(raidPtr->bytesPerSector);
2236 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2237 map_offset = raidPtr->bytesPerSector;
2238 else
2239 map_offset = RF_COMPONENT_INFO_SIZE;
2240 map_offset += rf_component_info_offset();
2241
2242 return map_offset;
2243 }
2244
2245 static daddr_t
2246 rf_parity_map_size(RF_Raid_t *raidPtr)
2247 {
2248 daddr_t map_size;
2249
2250 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2251 map_size = raidPtr->bytesPerSector;
2252 else
2253 map_size = RF_PARITY_MAP_SIZE;
2254
2255 return map_size;
2256 }
2257
2258 int
2259 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2260 {
2261 RF_ComponentLabel_t *clabel;
2262
2263 clabel = raidget_component_label(raidPtr, col);
2264 clabel->clean = RF_RAID_CLEAN;
2265 raidflush_component_label(raidPtr, col);
2266 return(0);
2267 }
2268
2269
2270 int
2271 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2272 {
2273 RF_ComponentLabel_t *clabel;
2274
2275 clabel = raidget_component_label(raidPtr, col);
2276 clabel->clean = RF_RAID_DIRTY;
2277 raidflush_component_label(raidPtr, col);
2278 return(0);
2279 }
2280
2281 int
2282 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2283 {
2284 KASSERT(raidPtr->bytesPerSector);
2285 return raidread_component_label(raidPtr->bytesPerSector,
2286 raidPtr->Disks[col].dev,
2287 raidPtr->raid_cinfo[col].ci_vp,
2288 &raidPtr->raid_cinfo[col].ci_label);
2289 }
2290
2291 RF_ComponentLabel_t *
2292 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2293 {
2294 return &raidPtr->raid_cinfo[col].ci_label;
2295 }
2296
2297 int
2298 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2299 {
2300 RF_ComponentLabel_t *label;
2301
2302 label = &raidPtr->raid_cinfo[col].ci_label;
2303 label->mod_counter = raidPtr->mod_counter;
2304 #ifndef RF_NO_PARITY_MAP
2305 label->parity_map_modcount = label->mod_counter;
2306 #endif
2307 return raidwrite_component_label(raidPtr->bytesPerSector,
2308 raidPtr->Disks[col].dev,
2309 raidPtr->raid_cinfo[col].ci_vp, label);
2310 }
2311
2312
2313 static int
2314 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2315 RF_ComponentLabel_t *clabel)
2316 {
2317 return raidread_component_area(dev, b_vp, clabel,
2318 sizeof(RF_ComponentLabel_t),
2319 rf_component_info_offset(),
2320 rf_component_info_size(secsize));
2321 }
2322
2323 /* ARGSUSED */
2324 static int
2325 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2326 size_t msize, daddr_t offset, daddr_t dsize)
2327 {
2328 struct buf *bp;
2329 int error;
2330
2331 /* XXX should probably ensure that we don't try to do this if
2332 someone has changed rf_protected_sectors. */
2333
2334 if (b_vp == NULL) {
2335 /* For whatever reason, this component is not valid.
2336 Don't try to read a component label from it. */
2337 return(EINVAL);
2338 }
2339
2340 /* get a block of the appropriate size... */
2341 bp = geteblk((int)dsize);
2342 bp->b_dev = dev;
2343
2344 /* get our ducks in a row for the read */
2345 bp->b_blkno = offset / DEV_BSIZE;
2346 bp->b_bcount = dsize;
2347 bp->b_flags |= B_READ;
2348 bp->b_resid = dsize;
2349
2350 bdev_strategy(bp);
2351 error = biowait(bp);
2352
2353 if (!error) {
2354 memcpy(data, bp->b_data, msize);
2355 }
2356
2357 brelse(bp, 0);
2358 return(error);
2359 }
2360
2361
2362 static int
2363 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2364 RF_ComponentLabel_t *clabel)
2365 {
2366 return raidwrite_component_area(dev, b_vp, clabel,
2367 sizeof(RF_ComponentLabel_t),
2368 rf_component_info_offset(),
2369 rf_component_info_size(secsize), 0);
2370 }
2371
2372 /* ARGSUSED */
2373 static int
2374 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2375 size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2376 {
2377 struct buf *bp;
2378 int error;
2379
2380 /* get a block of the appropriate size... */
2381 bp = geteblk((int)dsize);
2382 bp->b_dev = dev;
2383
2384 /* get our ducks in a row for the write */
2385 bp->b_blkno = offset / DEV_BSIZE;
2386 bp->b_bcount = dsize;
2387 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2388 bp->b_resid = dsize;
2389
2390 memset(bp->b_data, 0, dsize);
2391 memcpy(bp->b_data, data, msize);
2392
2393 bdev_strategy(bp);
2394 if (asyncp)
2395 return 0;
2396 error = biowait(bp);
2397 brelse(bp, 0);
2398 if (error) {
2399 #if 1
2400 printf("Failed to write RAID component info!\n");
2401 #endif
2402 }
2403
2404 return(error);
2405 }
2406
2407 void
2408 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2409 {
2410 int c;
2411
2412 for (c = 0; c < raidPtr->numCol; c++) {
2413 /* Skip dead disks. */
2414 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2415 continue;
2416 /* XXXjld: what if an error occurs here? */
2417 raidwrite_component_area(raidPtr->Disks[c].dev,
2418 raidPtr->raid_cinfo[c].ci_vp, map,
2419 RF_PARITYMAP_NBYTE,
2420 rf_parity_map_offset(raidPtr),
2421 rf_parity_map_size(raidPtr), 0);
2422 }
2423 }
2424
2425 void
2426 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2427 {
2428 struct rf_paritymap_ondisk tmp;
2429 int c,first;
2430
2431 first=1;
2432 for (c = 0; c < raidPtr->numCol; c++) {
2433 /* Skip dead disks. */
2434 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2435 continue;
2436 raidread_component_area(raidPtr->Disks[c].dev,
2437 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2438 RF_PARITYMAP_NBYTE,
2439 rf_parity_map_offset(raidPtr),
2440 rf_parity_map_size(raidPtr));
2441 if (first) {
2442 memcpy(map, &tmp, sizeof(*map));
2443 first = 0;
2444 } else {
2445 rf_paritymap_merge(map, &tmp);
2446 }
2447 }
2448 }
2449
2450 void
2451 rf_markalldirty(RF_Raid_t *raidPtr)
2452 {
2453 RF_ComponentLabel_t *clabel;
2454 int sparecol;
2455 int c;
2456 int j;
2457 int scol = -1;
2458
2459 raidPtr->mod_counter++;
2460 for (c = 0; c < raidPtr->numCol; c++) {
2461 /* we don't want to touch (at all) a disk that has
2462 failed */
2463 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2464 clabel = raidget_component_label(raidPtr, c);
2465 if (clabel->status == rf_ds_spared) {
2466 /* XXX do something special...
2467 but whatever you do, don't
2468 try to access it!! */
2469 } else {
2470 raidmarkdirty(raidPtr, c);
2471 }
2472 }
2473 }
2474
2475 for( c = 0; c < raidPtr->numSpare ; c++) {
2476 sparecol = raidPtr->numCol + c;
2477 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2478 /*
2479
2480 we claim this disk is "optimal" if it's
2481 rf_ds_used_spare, as that means it should be
2482 directly substitutable for the disk it replaced.
2483 We note that too...
2484
2485 */
2486
2487 for(j=0;j<raidPtr->numCol;j++) {
2488 if (raidPtr->Disks[j].spareCol == sparecol) {
2489 scol = j;
2490 break;
2491 }
2492 }
2493
2494 clabel = raidget_component_label(raidPtr, sparecol);
2495 /* make sure status is noted */
2496
2497 raid_init_component_label(raidPtr, clabel);
2498
2499 clabel->row = 0;
2500 clabel->column = scol;
2501 /* Note: we *don't* change status from rf_ds_used_spare
2502 to rf_ds_optimal */
2503 /* clabel.status = rf_ds_optimal; */
2504
2505 raidmarkdirty(raidPtr, sparecol);
2506 }
2507 }
2508 }
2509
2510
2511 void
2512 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2513 {
2514 RF_ComponentLabel_t *clabel;
2515 int sparecol;
2516 int c;
2517 int j;
2518 int scol;
2519 struct raid_softc *rs = raidPtr->softc;
2520
2521 scol = -1;
2522
2523 /* XXX should do extra checks to make sure things really are clean,
2524 rather than blindly setting the clean bit... */
2525
2526 raidPtr->mod_counter++;
2527
2528 for (c = 0; c < raidPtr->numCol; c++) {
2529 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2530 clabel = raidget_component_label(raidPtr, c);
2531 /* make sure status is noted */
2532 clabel->status = rf_ds_optimal;
2533
2534 /* note what unit we are configured as */
2535 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2536 clabel->last_unit = raidPtr->raidid;
2537
2538 raidflush_component_label(raidPtr, c);
2539 if (final == RF_FINAL_COMPONENT_UPDATE) {
2540 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2541 raidmarkclean(raidPtr, c);
2542 }
2543 }
2544 }
2545 /* else we don't touch it.. */
2546 }
2547
2548 for( c = 0; c < raidPtr->numSpare ; c++) {
2549 sparecol = raidPtr->numCol + c;
2550 /* Need to ensure that the reconstruct actually completed! */
2551 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2552 /*
2553
2554 we claim this disk is "optimal" if it's
2555 rf_ds_used_spare, as that means it should be
2556 directly substitutable for the disk it replaced.
2557 We note that too...
2558
2559 */
2560
2561 for(j=0;j<raidPtr->numCol;j++) {
2562 if (raidPtr->Disks[j].spareCol == sparecol) {
2563 scol = j;
2564 break;
2565 }
2566 }
2567
2568 /* XXX shouldn't *really* need this... */
2569 clabel = raidget_component_label(raidPtr, sparecol);
2570 /* make sure status is noted */
2571
2572 raid_init_component_label(raidPtr, clabel);
2573
2574 clabel->column = scol;
2575 clabel->status = rf_ds_optimal;
2576 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2577 clabel->last_unit = raidPtr->raidid;
2578
2579 raidflush_component_label(raidPtr, sparecol);
2580 if (final == RF_FINAL_COMPONENT_UPDATE) {
2581 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2582 raidmarkclean(raidPtr, sparecol);
2583 }
2584 }
2585 }
2586 }
2587 }
2588
2589 void
2590 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2591 {
2592
2593 if (vp != NULL) {
2594 if (auto_configured == 1) {
2595 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2596 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2597 vput(vp);
2598
2599 } else {
2600 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2601 }
2602 }
2603 }
2604
2605
2606 void
2607 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2608 {
2609 int r,c;
2610 struct vnode *vp;
2611 int acd;
2612
2613
2614 /* We take this opportunity to close the vnodes like we should.. */
2615
2616 for (c = 0; c < raidPtr->numCol; c++) {
2617 vp = raidPtr->raid_cinfo[c].ci_vp;
2618 acd = raidPtr->Disks[c].auto_configured;
2619 rf_close_component(raidPtr, vp, acd);
2620 raidPtr->raid_cinfo[c].ci_vp = NULL;
2621 raidPtr->Disks[c].auto_configured = 0;
2622 }
2623
2624 for (r = 0; r < raidPtr->numSpare; r++) {
2625 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2626 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2627 rf_close_component(raidPtr, vp, acd);
2628 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2629 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2630 }
2631 }
2632
2633
2634 void
2635 rf_ReconThread(struct rf_recon_req_internal *req)
2636 {
2637 int s;
2638 RF_Raid_t *raidPtr;
2639
2640 s = splbio();
2641 raidPtr = (RF_Raid_t *) req->raidPtr;
2642 raidPtr->recon_in_progress = 1;
2643
2644 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2645 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2646
2647 RF_Free(req, sizeof(*req));
2648
2649 raidPtr->recon_in_progress = 0;
2650 splx(s);
2651
2652 /* That's all... */
2653 kthread_exit(0); /* does not return */
2654 }
2655
2656 void
2657 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2658 {
2659 int retcode;
2660 int s;
2661
2662 raidPtr->parity_rewrite_stripes_done = 0;
2663 raidPtr->parity_rewrite_in_progress = 1;
2664 s = splbio();
2665 retcode = rf_RewriteParity(raidPtr);
2666 splx(s);
2667 if (retcode) {
2668 printf("raid%d: Error re-writing parity (%d)!\n",
2669 raidPtr->raidid, retcode);
2670 } else {
2671 /* set the clean bit! If we shutdown correctly,
2672 the clean bit on each component label will get
2673 set */
2674 raidPtr->parity_good = RF_RAID_CLEAN;
2675 }
2676 raidPtr->parity_rewrite_in_progress = 0;
2677
2678 /* Anyone waiting for us to stop? If so, inform them... */
2679 if (raidPtr->waitShutdown) {
2680 rf_lock_mutex2(raidPtr->rad_lock);
2681 cv_broadcast(&raidPtr->parity_rewrite_cv);
2682 rf_unlock_mutex2(raidPtr->rad_lock);
2683 }
2684
2685 /* That's all... */
2686 kthread_exit(0); /* does not return */
2687 }
2688
2689
2690 void
2691 rf_CopybackThread(RF_Raid_t *raidPtr)
2692 {
2693 int s;
2694
2695 raidPtr->copyback_in_progress = 1;
2696 s = splbio();
2697 rf_CopybackReconstructedData(raidPtr);
2698 splx(s);
2699 raidPtr->copyback_in_progress = 0;
2700
2701 /* That's all... */
2702 kthread_exit(0); /* does not return */
2703 }
2704
2705
2706 void
2707 rf_ReconstructInPlaceThread(struct rf_recon_req_internal *req)
2708 {
2709 int s;
2710 RF_Raid_t *raidPtr;
2711
2712 s = splbio();
2713 raidPtr = req->raidPtr;
2714 raidPtr->recon_in_progress = 1;
2715 rf_ReconstructInPlace(raidPtr, req->col);
2716 RF_Free(req, sizeof(*req));
2717 raidPtr->recon_in_progress = 0;
2718 splx(s);
2719
2720 /* That's all... */
2721 kthread_exit(0); /* does not return */
2722 }
2723
2724 static RF_AutoConfig_t *
2725 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
2726 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
2727 unsigned secsize)
2728 {
2729 int good_one = 0;
2730 RF_ComponentLabel_t *clabel;
2731 RF_AutoConfig_t *ac;
2732
2733 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
2734 if (clabel == NULL) {
2735 oomem:
2736 while(ac_list) {
2737 ac = ac_list;
2738 if (ac->clabel)
2739 free(ac->clabel, M_RAIDFRAME);
2740 ac_list = ac_list->next;
2741 free(ac, M_RAIDFRAME);
2742 }
2743 printf("RAID auto config: out of memory!\n");
2744 return NULL; /* XXX probably should panic? */
2745 }
2746
2747 if (!raidread_component_label(secsize, dev, vp, clabel)) {
2748 /* Got the label. Does it look reasonable? */
2749 if (rf_reasonable_label(clabel, numsecs) &&
2750 (rf_component_label_partitionsize(clabel) <= size)) {
2751 #ifdef DEBUG
2752 printf("Component on: %s: %llu\n",
2753 cname, (unsigned long long)size);
2754 rf_print_component_label(clabel);
2755 #endif
2756 /* if it's reasonable, add it, else ignore it. */
2757 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
2758 M_NOWAIT);
2759 if (ac == NULL) {
2760 free(clabel, M_RAIDFRAME);
2761 goto oomem;
2762 }
2763 strlcpy(ac->devname, cname, sizeof(ac->devname));
2764 ac->dev = dev;
2765 ac->vp = vp;
2766 ac->clabel = clabel;
2767 ac->next = ac_list;
2768 ac_list = ac;
2769 good_one = 1;
2770 }
2771 }
2772 if (!good_one) {
2773 /* cleanup */
2774 free(clabel, M_RAIDFRAME);
2775 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2776 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2777 vput(vp);
2778 }
2779 return ac_list;
2780 }
2781
2782 RF_AutoConfig_t *
2783 rf_find_raid_components(void)
2784 {
2785 struct vnode *vp;
2786 struct disklabel label;
2787 device_t dv;
2788 deviter_t di;
2789 dev_t dev;
2790 int bmajor, bminor, wedge, rf_part_found;
2791 int error;
2792 int i;
2793 RF_AutoConfig_t *ac_list;
2794 uint64_t numsecs;
2795 unsigned secsize;
2796 int dowedges;
2797
2798 /* initialize the AutoConfig list */
2799 ac_list = NULL;
2800
2801 /*
2802 * we begin by trolling through *all* the devices on the system *twice*
2803 * first we scan for wedges, second for other devices. This avoids
2804 * using a raw partition instead of a wedge that covers the whole disk
2805 */
2806
2807 for (dowedges=1; dowedges>=0; --dowedges) {
2808 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
2809 dv = deviter_next(&di)) {
2810
2811 /* we are only interested in disks... */
2812 if (device_class(dv) != DV_DISK)
2813 continue;
2814
2815 /* we don't care about floppies... */
2816 if (device_is_a(dv, "fd")) {
2817 continue;
2818 }
2819
2820 /* we don't care about CD's... */
2821 if (device_is_a(dv, "cd")) {
2822 continue;
2823 }
2824
2825 /* we don't care about md's... */
2826 if (device_is_a(dv, "md")) {
2827 continue;
2828 }
2829
2830 /* hdfd is the Atari/Hades floppy driver */
2831 if (device_is_a(dv, "hdfd")) {
2832 continue;
2833 }
2834
2835 /* fdisa is the Atari/Milan floppy driver */
2836 if (device_is_a(dv, "fdisa")) {
2837 continue;
2838 }
2839
2840 /* are we in the wedges pass ? */
2841 wedge = device_is_a(dv, "dk");
2842 if (wedge != dowedges) {
2843 continue;
2844 }
2845
2846 /* need to find the device_name_to_block_device_major stuff */
2847 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
2848
2849 rf_part_found = 0; /*No raid partition as yet*/
2850
2851 /* get a vnode for the raw partition of this disk */
2852 bminor = minor(device_unit(dv));
2853 dev = wedge ? makedev(bmajor, bminor) :
2854 MAKEDISKDEV(bmajor, bminor, RAW_PART);
2855 if (bdevvp(dev, &vp))
2856 panic("RAID can't alloc vnode");
2857
2858 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
2859
2860 if (error) {
2861 /* "Who cares." Continue looking
2862 for something that exists*/
2863 vput(vp);
2864 continue;
2865 }
2866
2867 error = getdisksize(vp, &numsecs, &secsize);
2868 if (error) {
2869 /*
2870 * Pseudo devices like vnd and cgd can be
2871 * opened but may still need some configuration.
2872 * Ignore these quietly.
2873 */
2874 if (error != ENXIO)
2875 printf("RAIDframe: can't get disk size"
2876 " for dev %s (%d)\n",
2877 device_xname(dv), error);
2878 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2879 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2880 vput(vp);
2881 continue;
2882 }
2883 if (wedge) {
2884 struct dkwedge_info dkw;
2885 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
2886 NOCRED);
2887 if (error) {
2888 printf("RAIDframe: can't get wedge info for "
2889 "dev %s (%d)\n", device_xname(dv), error);
2890 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2891 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2892 vput(vp);
2893 continue;
2894 }
2895
2896 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
2897 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2898 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2899 vput(vp);
2900 continue;
2901 }
2902
2903 ac_list = rf_get_component(ac_list, dev, vp,
2904 device_xname(dv), dkw.dkw_size, numsecs, secsize);
2905 rf_part_found = 1; /*There is a raid component on this disk*/
2906 continue;
2907 }
2908
2909 /* Ok, the disk exists. Go get the disklabel. */
2910 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
2911 if (error) {
2912 /*
2913 * XXX can't happen - open() would
2914 * have errored out (or faked up one)
2915 */
2916 if (error != ENOTTY)
2917 printf("RAIDframe: can't get label for dev "
2918 "%s (%d)\n", device_xname(dv), error);
2919 }
2920
2921 /* don't need this any more. We'll allocate it again
2922 a little later if we really do... */
2923 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2924 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2925 vput(vp);
2926
2927 if (error)
2928 continue;
2929
2930 rf_part_found = 0; /*No raid partitions yet*/
2931 for (i = 0; i < label.d_npartitions; i++) {
2932 char cname[sizeof(ac_list->devname)];
2933
2934 /* We only support partitions marked as RAID */
2935 if (label.d_partitions[i].p_fstype != FS_RAID)
2936 continue;
2937
2938 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
2939 if (bdevvp(dev, &vp))
2940 panic("RAID can't alloc vnode");
2941
2942 error = VOP_OPEN(vp, FREAD, NOCRED);
2943 if (error) {
2944 /* Whatever... */
2945 vput(vp);
2946 continue;
2947 }
2948 snprintf(cname, sizeof(cname), "%s%c",
2949 device_xname(dv), 'a' + i);
2950 ac_list = rf_get_component(ac_list, dev, vp, cname,
2951 label.d_partitions[i].p_size, numsecs, secsize);
2952 rf_part_found = 1; /*There is at least one raid partition on this disk*/
2953 }
2954
2955 /*
2956 *If there is no raid component on this disk, either in a
2957 *disklabel or inside a wedge, check the raw partition as well,
2958 *as it is possible to configure raid components on raw disk
2959 *devices.
2960 */
2961
2962 if (!rf_part_found) {
2963 char cname[sizeof(ac_list->devname)];
2964
2965 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
2966 if (bdevvp(dev, &vp))
2967 panic("RAID can't alloc vnode");
2968
2969 error = VOP_OPEN(vp, FREAD, NOCRED);
2970 if (error) {
2971 /* Whatever... */
2972 vput(vp);
2973 continue;
2974 }
2975 snprintf(cname, sizeof(cname), "%s%c",
2976 device_xname(dv), 'a' + RAW_PART);
2977 ac_list = rf_get_component(ac_list, dev, vp, cname,
2978 label.d_partitions[RAW_PART].p_size, numsecs, secsize);
2979 }
2980 }
2981 deviter_release(&di);
2982 }
2983 return ac_list;
2984 }
2985
2986
2987 int
2988 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
2989 {
2990
2991 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2992 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2993 ((clabel->clean == RF_RAID_CLEAN) ||
2994 (clabel->clean == RF_RAID_DIRTY)) &&
2995 clabel->row >=0 &&
2996 clabel->column >= 0 &&
2997 clabel->num_rows > 0 &&
2998 clabel->num_columns > 0 &&
2999 clabel->row < clabel->num_rows &&
3000 clabel->column < clabel->num_columns &&
3001 clabel->blockSize > 0 &&
3002 /*
3003 * numBlocksHi may contain garbage, but it is ok since
3004 * the type is unsigned. If it is really garbage,
3005 * rf_fix_old_label_size() will fix it.
3006 */
3007 rf_component_label_numblocks(clabel) > 0) {
3008 /*
3009 * label looks reasonable enough...
3010 * let's make sure it has no old garbage.
3011 */
3012 if (numsecs)
3013 rf_fix_old_label_size(clabel, numsecs);
3014 return(1);
3015 }
3016 return(0);
3017 }
3018
3019
3020 /*
3021 * For reasons yet unknown, some old component labels have garbage in
3022 * the newer numBlocksHi region, and this causes lossage. Since those
3023 * disks will also have numsecs set to less than 32 bits of sectors,
3024 * we can determine when this corruption has occurred, and fix it.
3025 *
3026 * The exact same problem, with the same unknown reason, happens to
3027 * the partitionSizeHi member as well.
3028 */
3029 static void
3030 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3031 {
3032
3033 if (numsecs < ((uint64_t)1 << 32)) {
3034 if (clabel->numBlocksHi) {
3035 printf("WARNING: total sectors < 32 bits, yet "
3036 "numBlocksHi set\n"
3037 "WARNING: resetting numBlocksHi to zero.\n");
3038 clabel->numBlocksHi = 0;
3039 }
3040
3041 if (clabel->partitionSizeHi) {
3042 printf("WARNING: total sectors < 32 bits, yet "
3043 "partitionSizeHi set\n"
3044 "WARNING: resetting partitionSizeHi to zero.\n");
3045 clabel->partitionSizeHi = 0;
3046 }
3047 }
3048 }
3049
3050
3051 #ifdef DEBUG
3052 void
3053 rf_print_component_label(RF_ComponentLabel_t *clabel)
3054 {
3055 uint64_t numBlocks;
3056 static const char *rp[] = {
3057 "No", "Force", "Soft", "*invalid*"
3058 };
3059
3060
3061 numBlocks = rf_component_label_numblocks(clabel);
3062
3063 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3064 clabel->row, clabel->column,
3065 clabel->num_rows, clabel->num_columns);
3066 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3067 clabel->version, clabel->serial_number,
3068 clabel->mod_counter);
3069 printf(" Clean: %s Status: %d\n",
3070 clabel->clean ? "Yes" : "No", clabel->status);
3071 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3072 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3073 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3074 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3075 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3076 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]);
3077 printf(" Last configured as: raid%d\n", clabel->last_unit);
3078 #if 0
3079 printf(" Config order: %d\n", clabel->config_order);
3080 #endif
3081
3082 }
3083 #endif
3084
3085 RF_ConfigSet_t *
3086 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3087 {
3088 RF_AutoConfig_t *ac;
3089 RF_ConfigSet_t *config_sets;
3090 RF_ConfigSet_t *cset;
3091 RF_AutoConfig_t *ac_next;
3092
3093
3094 config_sets = NULL;
3095
3096 /* Go through the AutoConfig list, and figure out which components
3097 belong to what sets. */
3098 ac = ac_list;
3099 while(ac!=NULL) {
3100 /* we're going to putz with ac->next, so save it here
3101 for use at the end of the loop */
3102 ac_next = ac->next;
3103
3104 if (config_sets == NULL) {
3105 /* will need at least this one... */
3106 config_sets = (RF_ConfigSet_t *)
3107 malloc(sizeof(RF_ConfigSet_t),
3108 M_RAIDFRAME, M_NOWAIT);
3109 if (config_sets == NULL) {
3110 panic("rf_create_auto_sets: No memory!");
3111 }
3112 /* this one is easy :) */
3113 config_sets->ac = ac;
3114 config_sets->next = NULL;
3115 config_sets->rootable = 0;
3116 ac->next = NULL;
3117 } else {
3118 /* which set does this component fit into? */
3119 cset = config_sets;
3120 while(cset!=NULL) {
3121 if (rf_does_it_fit(cset, ac)) {
3122 /* looks like it matches... */
3123 ac->next = cset->ac;
3124 cset->ac = ac;
3125 break;
3126 }
3127 cset = cset->next;
3128 }
3129 if (cset==NULL) {
3130 /* didn't find a match above... new set..*/
3131 cset = (RF_ConfigSet_t *)
3132 malloc(sizeof(RF_ConfigSet_t),
3133 M_RAIDFRAME, M_NOWAIT);
3134 if (cset == NULL) {
3135 panic("rf_create_auto_sets: No memory!");
3136 }
3137 cset->ac = ac;
3138 ac->next = NULL;
3139 cset->next = config_sets;
3140 cset->rootable = 0;
3141 config_sets = cset;
3142 }
3143 }
3144 ac = ac_next;
3145 }
3146
3147
3148 return(config_sets);
3149 }
3150
3151 static int
3152 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3153 {
3154 RF_ComponentLabel_t *clabel1, *clabel2;
3155
3156 /* If this one matches the *first* one in the set, that's good
3157 enough, since the other members of the set would have been
3158 through here too... */
3159 /* note that we are not checking partitionSize here..
3160
3161 Note that we are also not checking the mod_counters here.
3162 If everything else matches except the mod_counter, that's
3163 good enough for this test. We will deal with the mod_counters
3164 a little later in the autoconfiguration process.
3165
3166 (clabel1->mod_counter == clabel2->mod_counter) &&
3167
3168 The reason we don't check for this is that failed disks
3169 will have lower modification counts. If those disks are
3170 not added to the set they used to belong to, then they will
3171 form their own set, which may result in 2 different sets,
3172 for example, competing to be configured at raid0, and
3173 perhaps competing to be the root filesystem set. If the
3174 wrong ones get configured, or both attempt to become /,
3175 weird behaviour and or serious lossage will occur. Thus we
3176 need to bring them into the fold here, and kick them out at
3177 a later point.
3178
3179 */
3180
3181 clabel1 = cset->ac->clabel;
3182 clabel2 = ac->clabel;
3183 if ((clabel1->version == clabel2->version) &&
3184 (clabel1->serial_number == clabel2->serial_number) &&
3185 (clabel1->num_rows == clabel2->num_rows) &&
3186 (clabel1->num_columns == clabel2->num_columns) &&
3187 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3188 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3189 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3190 (clabel1->parityConfig == clabel2->parityConfig) &&
3191 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3192 (clabel1->blockSize == clabel2->blockSize) &&
3193 rf_component_label_numblocks(clabel1) ==
3194 rf_component_label_numblocks(clabel2) &&
3195 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3196 (clabel1->root_partition == clabel2->root_partition) &&
3197 (clabel1->last_unit == clabel2->last_unit) &&
3198 (clabel1->config_order == clabel2->config_order)) {
3199 /* if it get's here, it almost *has* to be a match */
3200 } else {
3201 /* it's not consistent with somebody in the set..
3202 punt */
3203 return(0);
3204 }
3205 /* all was fine.. it must fit... */
3206 return(1);
3207 }
3208
3209 int
3210 rf_have_enough_components(RF_ConfigSet_t *cset)
3211 {
3212 RF_AutoConfig_t *ac;
3213 RF_AutoConfig_t *auto_config;
3214 RF_ComponentLabel_t *clabel;
3215 int c;
3216 int num_cols;
3217 int num_missing;
3218 int mod_counter;
3219 int mod_counter_found;
3220 int even_pair_failed;
3221 char parity_type;
3222
3223
3224 /* check to see that we have enough 'live' components
3225 of this set. If so, we can configure it if necessary */
3226
3227 num_cols = cset->ac->clabel->num_columns;
3228 parity_type = cset->ac->clabel->parityConfig;
3229
3230 /* XXX Check for duplicate components!?!?!? */
3231
3232 /* Determine what the mod_counter is supposed to be for this set. */
3233
3234 mod_counter_found = 0;
3235 mod_counter = 0;
3236 ac = cset->ac;
3237 while(ac!=NULL) {
3238 if (mod_counter_found==0) {
3239 mod_counter = ac->clabel->mod_counter;
3240 mod_counter_found = 1;
3241 } else {
3242 if (ac->clabel->mod_counter > mod_counter) {
3243 mod_counter = ac->clabel->mod_counter;
3244 }
3245 }
3246 ac = ac->next;
3247 }
3248
3249 num_missing = 0;
3250 auto_config = cset->ac;
3251
3252 even_pair_failed = 0;
3253 for(c=0; c<num_cols; c++) {
3254 ac = auto_config;
3255 while(ac!=NULL) {
3256 if ((ac->clabel->column == c) &&
3257 (ac->clabel->mod_counter == mod_counter)) {
3258 /* it's this one... */
3259 #ifdef DEBUG
3260 printf("Found: %s at %d\n",
3261 ac->devname,c);
3262 #endif
3263 break;
3264 }
3265 ac=ac->next;
3266 }
3267 if (ac==NULL) {
3268 /* Didn't find one here! */
3269 /* special case for RAID 1, especially
3270 where there are more than 2
3271 components (where RAIDframe treats
3272 things a little differently :( ) */
3273 if (parity_type == '1') {
3274 if (c%2 == 0) { /* even component */
3275 even_pair_failed = 1;
3276 } else { /* odd component. If
3277 we're failed, and
3278 so is the even
3279 component, it's
3280 "Good Night, Charlie" */
3281 if (even_pair_failed == 1) {
3282 return(0);
3283 }
3284 }
3285 } else {
3286 /* normal accounting */
3287 num_missing++;
3288 }
3289 }
3290 if ((parity_type == '1') && (c%2 == 1)) {
3291 /* Just did an even component, and we didn't
3292 bail.. reset the even_pair_failed flag,
3293 and go on to the next component.... */
3294 even_pair_failed = 0;
3295 }
3296 }
3297
3298 clabel = cset->ac->clabel;
3299
3300 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3301 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3302 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3303 /* XXX this needs to be made *much* more general */
3304 /* Too many failures */
3305 return(0);
3306 }
3307 /* otherwise, all is well, and we've got enough to take a kick
3308 at autoconfiguring this set */
3309 return(1);
3310 }
3311
3312 void
3313 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3314 RF_Raid_t *raidPtr)
3315 {
3316 RF_ComponentLabel_t *clabel;
3317 int i;
3318
3319 clabel = ac->clabel;
3320
3321 /* 1. Fill in the common stuff */
3322 config->numCol = clabel->num_columns;
3323 config->numSpare = 0; /* XXX should this be set here? */
3324 config->sectPerSU = clabel->sectPerSU;
3325 config->SUsPerPU = clabel->SUsPerPU;
3326 config->SUsPerRU = clabel->SUsPerRU;
3327 config->parityConfig = clabel->parityConfig;
3328 /* XXX... */
3329 strcpy(config->diskQueueType,"fifo");
3330 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3331 config->layoutSpecificSize = 0; /* XXX ?? */
3332
3333 while(ac!=NULL) {
3334 /* row/col values will be in range due to the checks
3335 in reasonable_label() */
3336 strcpy(config->devnames[0][ac->clabel->column],
3337 ac->devname);
3338 ac = ac->next;
3339 }
3340
3341 for(i=0;i<RF_MAXDBGV;i++) {
3342 config->debugVars[i][0] = 0;
3343 }
3344 }
3345
3346 int
3347 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3348 {
3349 RF_ComponentLabel_t *clabel;
3350 int column;
3351 int sparecol;
3352
3353 raidPtr->autoconfigure = new_value;
3354
3355 for(column=0; column<raidPtr->numCol; column++) {
3356 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3357 clabel = raidget_component_label(raidPtr, column);
3358 clabel->autoconfigure = new_value;
3359 raidflush_component_label(raidPtr, column);
3360 }
3361 }
3362 for(column = 0; column < raidPtr->numSpare ; column++) {
3363 sparecol = raidPtr->numCol + column;
3364 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3365 clabel = raidget_component_label(raidPtr, sparecol);
3366 clabel->autoconfigure = new_value;
3367 raidflush_component_label(raidPtr, sparecol);
3368 }
3369 }
3370 return(new_value);
3371 }
3372
3373 int
3374 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3375 {
3376 RF_ComponentLabel_t *clabel;
3377 int column;
3378 int sparecol;
3379
3380 raidPtr->root_partition = new_value;
3381 for(column=0; column<raidPtr->numCol; column++) {
3382 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3383 clabel = raidget_component_label(raidPtr, column);
3384 clabel->root_partition = new_value;
3385 raidflush_component_label(raidPtr, column);
3386 }
3387 }
3388 for(column = 0; column < raidPtr->numSpare ; column++) {
3389 sparecol = raidPtr->numCol + column;
3390 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3391 clabel = raidget_component_label(raidPtr, sparecol);
3392 clabel->root_partition = new_value;
3393 raidflush_component_label(raidPtr, sparecol);
3394 }
3395 }
3396 return(new_value);
3397 }
3398
3399 void
3400 rf_release_all_vps(RF_ConfigSet_t *cset)
3401 {
3402 RF_AutoConfig_t *ac;
3403
3404 ac = cset->ac;
3405 while(ac!=NULL) {
3406 /* Close the vp, and give it back */
3407 if (ac->vp) {
3408 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3409 VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
3410 vput(ac->vp);
3411 ac->vp = NULL;
3412 }
3413 ac = ac->next;
3414 }
3415 }
3416
3417
3418 void
3419 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3420 {
3421 RF_AutoConfig_t *ac;
3422 RF_AutoConfig_t *next_ac;
3423
3424 ac = cset->ac;
3425 while(ac!=NULL) {
3426 next_ac = ac->next;
3427 /* nuke the label */
3428 free(ac->clabel, M_RAIDFRAME);
3429 /* cleanup the config structure */
3430 free(ac, M_RAIDFRAME);
3431 /* "next.." */
3432 ac = next_ac;
3433 }
3434 /* and, finally, nuke the config set */
3435 free(cset, M_RAIDFRAME);
3436 }
3437
3438
3439 void
3440 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3441 {
3442 /* current version number */
3443 clabel->version = RF_COMPONENT_LABEL_VERSION;
3444 clabel->serial_number = raidPtr->serial_number;
3445 clabel->mod_counter = raidPtr->mod_counter;
3446
3447 clabel->num_rows = 1;
3448 clabel->num_columns = raidPtr->numCol;
3449 clabel->clean = RF_RAID_DIRTY; /* not clean */
3450 clabel->status = rf_ds_optimal; /* "It's good!" */
3451
3452 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3453 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3454 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3455
3456 clabel->blockSize = raidPtr->bytesPerSector;
3457 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3458
3459 /* XXX not portable */
3460 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3461 clabel->maxOutstanding = raidPtr->maxOutstanding;
3462 clabel->autoconfigure = raidPtr->autoconfigure;
3463 clabel->root_partition = raidPtr->root_partition;
3464 clabel->last_unit = raidPtr->raidid;
3465 clabel->config_order = raidPtr->config_order;
3466
3467 #ifndef RF_NO_PARITY_MAP
3468 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3469 #endif
3470 }
3471
3472 struct raid_softc *
3473 rf_auto_config_set(RF_ConfigSet_t *cset)
3474 {
3475 RF_Raid_t *raidPtr;
3476 RF_Config_t *config;
3477 int raidID;
3478 struct raid_softc *sc;
3479
3480 #ifdef DEBUG
3481 printf("RAID autoconfigure\n");
3482 #endif
3483
3484 /* 1. Create a config structure */
3485 config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO);
3486 if (config == NULL) {
3487 printf("%s: Out of mem - config!?!?\n", __func__);
3488 /* XXX do something more intelligent here. */
3489 return NULL;
3490 }
3491
3492 /*
3493 2. Figure out what RAID ID this one is supposed to live at
3494 See if we can get the same RAID dev that it was configured
3495 on last time..
3496 */
3497
3498 raidID = cset->ac->clabel->last_unit;
3499 for (sc = raidget(raidID, false); sc && sc->sc_r.valid != 0;
3500 sc = raidget(++raidID, false))
3501 continue;
3502 #ifdef DEBUG
3503 printf("Configuring raid%d:\n",raidID);
3504 #endif
3505
3506 if (sc == NULL)
3507 sc = raidget(raidID, true);
3508 if (sc == NULL) {
3509 printf("%s: Out of mem - softc!?!?\n", __func__);
3510 /* XXX do something more intelligent here. */
3511 free(config, M_RAIDFRAME);
3512 return NULL;
3513 }
3514
3515 raidPtr = &sc->sc_r;
3516
3517 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3518 raidPtr->softc = sc;
3519 raidPtr->raidid = raidID;
3520 raidPtr->openings = RAIDOUTSTANDING;
3521
3522 /* 3. Build the configuration structure */
3523 rf_create_configuration(cset->ac, config, raidPtr);
3524
3525 /* 4. Do the configuration */
3526 if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3527 raidinit(sc);
3528
3529 rf_markalldirty(raidPtr);
3530 raidPtr->autoconfigure = 1; /* XXX do this here? */
3531 switch (cset->ac->clabel->root_partition) {
3532 case 1: /* Force Root */
3533 case 2: /* Soft Root: root when boot partition part of raid */
3534 /*
3535 * everything configured just fine. Make a note
3536 * that this set is eligible to be root,
3537 * or forced to be root
3538 */
3539 cset->rootable = cset->ac->clabel->root_partition;
3540 /* XXX do this here? */
3541 raidPtr->root_partition = cset->rootable;
3542 break;
3543 default:
3544 break;
3545 }
3546 } else {
3547 raidput(sc);
3548 sc = NULL;
3549 }
3550
3551 /* 5. Cleanup */
3552 free(config, M_RAIDFRAME);
3553 return sc;
3554 }
3555
3556 void
3557 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3558 size_t xmin, size_t xmax)
3559 {
3560 int error;
3561
3562 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3563 pool_sethiwat(p, xmax);
3564 if ((error = pool_prime(p, xmin)) != 0)
3565 panic("%s: failed to prime pool: %d", __func__, error);
3566 pool_setlowat(p, xmin);
3567 }
3568
3569 /*
3570 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buffer queue
3571 * to see if there is IO pending and if that IO could possibly be done
3572 * for a given RAID set. Returns 0 if IO is waiting and can be done, 1
3573 * otherwise.
3574 *
3575 */
3576 int
3577 rf_buf_queue_check(RF_Raid_t *raidPtr)
3578 {
3579 struct raid_softc *rs;
3580 struct dk_softc *dksc;
3581
3582 rs = raidPtr->softc;
3583 dksc = &rs->sc_dksc;
3584
3585 if ((rs->sc_flags & RAIDF_INITED) == 0)
3586 return 1;
3587
3588 if (dk_strategy_pending(dksc) && raidPtr->openings > 0) {
3589 /* there is work to do */
3590 return 0;
3591 }
3592 /* default is nothing to do */
3593 return 1;
3594 }
3595
3596 int
3597 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3598 {
3599 uint64_t numsecs;
3600 unsigned secsize;
3601 int error;
3602
3603 error = getdisksize(vp, &numsecs, &secsize);
3604 if (error == 0) {
3605 diskPtr->blockSize = secsize;
3606 diskPtr->numBlocks = numsecs - rf_protectedSectors;
3607 diskPtr->partitionSize = numsecs;
3608 return 0;
3609 }
3610 return error;
3611 }
3612
3613 static int
3614 raid_match(device_t self, cfdata_t cfdata, void *aux)
3615 {
3616 return 1;
3617 }
3618
3619 static void
3620 raid_attach(device_t parent, device_t self, void *aux)
3621 {
3622 }
3623
3624
3625 static int
3626 raid_detach(device_t self, int flags)
3627 {
3628 int error;
3629 struct raid_softc *rs = raidsoftc(self);
3630
3631 if (rs == NULL)
3632 return ENXIO;
3633
3634 if ((error = raidlock(rs)) != 0)
3635 return (error);
3636
3637 error = raid_detach_unlocked(rs);
3638
3639 raidunlock(rs);
3640
3641 /* XXX raid can be referenced here */
3642
3643 if (error)
3644 return error;
3645
3646 /* Free the softc */
3647 raidput(rs);
3648
3649 return 0;
3650 }
3651
3652 static void
3653 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
3654 {
3655 struct dk_softc *dksc = &rs->sc_dksc;
3656 struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
3657
3658 memset(dg, 0, sizeof(*dg));
3659
3660 dg->dg_secperunit = raidPtr->totalSectors;
3661 dg->dg_secsize = raidPtr->bytesPerSector;
3662 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
3663 dg->dg_ntracks = 4 * raidPtr->numCol;
3664
3665 disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, NULL);
3666 }
3667
3668 /*
3669 * Get cache info for all the components (including spares).
3670 * Returns intersection of all the cache flags of all disks, or first
3671 * error if any encountered.
3672 * XXXfua feature flags can change as spares are added - lock down somehow
3673 */
3674 static int
3675 rf_get_component_caches(RF_Raid_t *raidPtr, int *data)
3676 {
3677 int c;
3678 int error;
3679 int dkwhole = 0, dkpart;
3680
3681 for (c = 0; c < raidPtr->numCol + raidPtr->numSpare; c++) {
3682 /*
3683 * Check any non-dead disk, even when currently being
3684 * reconstructed.
3685 */
3686 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)
3687 || raidPtr->Disks[c].status == rf_ds_reconstructing) {
3688 error = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp,
3689 DIOCGCACHE, &dkpart, FREAD, NOCRED);
3690 if (error) {
3691 if (error != ENODEV) {
3692 printf("raid%d: get cache for component %s failed\n",
3693 raidPtr->raidid,
3694 raidPtr->Disks[c].devname);
3695 }
3696
3697 return error;
3698 }
3699
3700 if (c == 0)
3701 dkwhole = dkpart;
3702 else
3703 dkwhole = DKCACHE_COMBINE(dkwhole, dkpart);
3704 }
3705 }
3706
3707 *data = dkwhole;
3708
3709 return 0;
3710 }
3711
3712 /*
3713 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3714 * We end up returning whatever error was returned by the first cache flush
3715 * that fails.
3716 */
3717
3718 int
3719 rf_sync_component_caches(RF_Raid_t *raidPtr)
3720 {
3721 int c, sparecol;
3722 int e,error;
3723 int force = 1;
3724
3725 error = 0;
3726 for (c = 0; c < raidPtr->numCol; c++) {
3727 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3728 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3729 &force, FWRITE, NOCRED);
3730 if (e) {
3731 if (e != ENODEV)
3732 printf("raid%d: cache flush to component %s failed.\n",
3733 raidPtr->raidid, raidPtr->Disks[c].devname);
3734 if (error == 0) {
3735 error = e;
3736 }
3737 }
3738 }
3739 }
3740
3741 for( c = 0; c < raidPtr->numSpare ; c++) {
3742 sparecol = raidPtr->numCol + c;
3743 /* Need to ensure that the reconstruct actually completed! */
3744 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3745 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3746 DIOCCACHESYNC, &force, FWRITE, NOCRED);
3747 if (e) {
3748 if (e != ENODEV)
3749 printf("raid%d: cache flush to component %s failed.\n",
3750 raidPtr->raidid, raidPtr->Disks[sparecol].devname);
3751 if (error == 0) {
3752 error = e;
3753 }
3754 }
3755 }
3756 }
3757 return error;
3758 }
3759
3760 /* Fill in info with the current status */
3761 void
3762 rf_check_recon_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3763 {
3764
3765 if (raidPtr->status != rf_rs_reconstructing) {
3766 info->total = 100;
3767 info->completed = 100;
3768 } else {
3769 info->total = raidPtr->reconControl->numRUsTotal;
3770 info->completed = raidPtr->reconControl->numRUsComplete;
3771 }
3772 info->remaining = info->total - info->completed;
3773 }
3774
3775 /* Fill in info with the current status */
3776 void
3777 rf_check_parityrewrite_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3778 {
3779
3780 if (raidPtr->parity_rewrite_in_progress == 1) {
3781 info->total = raidPtr->Layout.numStripe;
3782 info->completed = raidPtr->parity_rewrite_stripes_done;
3783 } else {
3784 info->completed = 100;
3785 info->total = 100;
3786 }
3787 info->remaining = info->total - info->completed;
3788 }
3789
3790 /* Fill in info with the current status */
3791 void
3792 rf_check_copyback_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3793 {
3794
3795 if (raidPtr->copyback_in_progress == 1) {
3796 info->total = raidPtr->Layout.numStripe;
3797 info->completed = raidPtr->copyback_stripes_done;
3798 info->remaining = info->total - info->completed;
3799 } else {
3800 info->remaining = 0;
3801 info->completed = 100;
3802 info->total = 100;
3803 }
3804 }
3805
3806 /* Fill in config with the current info */
3807 int
3808 rf_get_info(RF_Raid_t *raidPtr, RF_DeviceConfig_t *config)
3809 {
3810 int d, i, j;
3811
3812 if (!raidPtr->valid)
3813 return (ENODEV);
3814 config->cols = raidPtr->numCol;
3815 config->ndevs = raidPtr->numCol;
3816 if (config->ndevs >= RF_MAX_DISKS)
3817 return (ENOMEM);
3818 config->nspares = raidPtr->numSpare;
3819 if (config->nspares >= RF_MAX_DISKS)
3820 return (ENOMEM);
3821 config->maxqdepth = raidPtr->maxQueueDepth;
3822 d = 0;
3823 for (j = 0; j < config->cols; j++) {
3824 config->devs[d] = raidPtr->Disks[j];
3825 d++;
3826 }
3827 for (j = config->cols, i = 0; i < config->nspares; i++, j++) {
3828 config->spares[i] = raidPtr->Disks[j];
3829 if (config->spares[i].status == rf_ds_rebuilding_spare) {
3830 /* XXX: raidctl(8) expects to see this as a used spare */
3831 config->spares[i].status = rf_ds_used_spare;
3832 }
3833 }
3834 return 0;
3835 }
3836
3837 int
3838 rf_get_component_label(RF_Raid_t *raidPtr, void *data)
3839 {
3840 RF_ComponentLabel_t *clabel = (RF_ComponentLabel_t *)data;
3841 RF_ComponentLabel_t *raid_clabel;
3842 int column = clabel->column;
3843
3844 if ((column < 0) || (column >= raidPtr->numCol + raidPtr->numSpare))
3845 return EINVAL;
3846 raid_clabel = raidget_component_label(raidPtr, column);
3847 memcpy(clabel, raid_clabel, sizeof *clabel);
3848
3849 return 0;
3850 }
3851
3852 /*
3853 * Module interface
3854 */
3855
3856 MODULE(MODULE_CLASS_DRIVER, raid, "dk_subr,bufq_fcfs");
3857
3858 #ifdef _MODULE
3859 CFDRIVER_DECL(raid, DV_DISK, NULL);
3860 #endif
3861
3862 static int raid_modcmd(modcmd_t, void *);
3863 static int raid_modcmd_init(void);
3864 static int raid_modcmd_fini(void);
3865
3866 static int
3867 raid_modcmd(modcmd_t cmd, void *data)
3868 {
3869 int error;
3870
3871 error = 0;
3872 switch (cmd) {
3873 case MODULE_CMD_INIT:
3874 error = raid_modcmd_init();
3875 break;
3876 case MODULE_CMD_FINI:
3877 error = raid_modcmd_fini();
3878 break;
3879 default:
3880 error = ENOTTY;
3881 break;
3882 }
3883 return error;
3884 }
3885
3886 static int
3887 raid_modcmd_init(void)
3888 {
3889 int error;
3890 int bmajor, cmajor;
3891
3892 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
3893 mutex_enter(&raid_lock);
3894 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
3895 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
3896 rf_init_cond2(rf_sparet_wait_cv, "sparetw");
3897 rf_init_cond2(rf_sparet_resp_cv, "rfgst");
3898
3899 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
3900 #endif
3901
3902 bmajor = cmajor = -1;
3903 error = devsw_attach("raid", &raid_bdevsw, &bmajor,
3904 &raid_cdevsw, &cmajor);
3905 if (error != 0 && error != EEXIST) {
3906 aprint_error("%s: devsw_attach failed %d\n", __func__, error);
3907 mutex_exit(&raid_lock);
3908 return error;
3909 }
3910 #ifdef _MODULE
3911 error = config_cfdriver_attach(&raid_cd);
3912 if (error != 0) {
3913 aprint_error("%s: config_cfdriver_attach failed %d\n",
3914 __func__, error);
3915 devsw_detach(&raid_bdevsw, &raid_cdevsw);
3916 mutex_exit(&raid_lock);
3917 return error;
3918 }
3919 #endif
3920 error = config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3921 if (error != 0) {
3922 aprint_error("%s: config_cfattach_attach failed %d\n",
3923 __func__, error);
3924 #ifdef _MODULE
3925 config_cfdriver_detach(&raid_cd);
3926 #endif
3927 devsw_detach(&raid_bdevsw, &raid_cdevsw);
3928 mutex_exit(&raid_lock);
3929 return error;
3930 }
3931
3932 raidautoconfigdone = false;
3933
3934 mutex_exit(&raid_lock);
3935
3936 if (error == 0) {
3937 if (rf_BootRaidframe(true) == 0)
3938 aprint_verbose("Kernelized RAIDframe activated\n");
3939 else
3940 panic("Serious error activating RAID!!");
3941 }
3942
3943 /*
3944 * Register a finalizer which will be used to auto-config RAID
3945 * sets once all real hardware devices have been found.
3946 */
3947 error = config_finalize_register(NULL, rf_autoconfig);
3948 if (error != 0) {
3949 aprint_error("WARNING: unable to register RAIDframe "
3950 "finalizer\n");
3951 error = 0;
3952 }
3953
3954 return error;
3955 }
3956
3957 static int
3958 raid_modcmd_fini(void)
3959 {
3960 int error;
3961
3962 mutex_enter(&raid_lock);
3963
3964 /* Don't allow unload if raid device(s) exist. */
3965 if (!LIST_EMPTY(&raids)) {
3966 mutex_exit(&raid_lock);
3967 return EBUSY;
3968 }
3969
3970 error = config_cfattach_detach(raid_cd.cd_name, &raid_ca);
3971 if (error != 0) {
3972 aprint_error("%s: cannot detach cfattach\n",__func__);
3973 mutex_exit(&raid_lock);
3974 return error;
3975 }
3976 #ifdef _MODULE
3977 error = config_cfdriver_detach(&raid_cd);
3978 if (error != 0) {
3979 aprint_error("%s: cannot detach cfdriver\n",__func__);
3980 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3981 mutex_exit(&raid_lock);
3982 return error;
3983 }
3984 #endif
3985 error = devsw_detach(&raid_bdevsw, &raid_cdevsw);
3986 if (error != 0) {
3987 aprint_error("%s: cannot detach devsw\n",__func__);
3988 #ifdef _MODULE
3989 config_cfdriver_attach(&raid_cd);
3990 #endif
3991 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3992 mutex_exit(&raid_lock);
3993 return error;
3994 }
3995 rf_BootRaidframe(false);
3996 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
3997 rf_destroy_mutex2(rf_sparet_wait_mutex);
3998 rf_destroy_cond2(rf_sparet_wait_cv);
3999 rf_destroy_cond2(rf_sparet_resp_cv);
4000 #endif
4001 mutex_exit(&raid_lock);
4002 mutex_destroy(&raid_lock);
4003
4004 return error;
4005 }
4006