rf_netbsdkintf.c revision 1.356.2.2 1 /* $NetBSD: rf_netbsdkintf.c,v 1.356.2.2 2018/09/09 22:12:16 pgoyette Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: cd.c 1.6 90/11/28$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
89 * School of Computer Science
90 * Carnegie Mellon University
91 * Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97 /***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.356.2.2 2018/09/09 22:12:16 pgoyette Exp $");
105
106 #ifdef _KERNEL_OPT
107 #include "opt_compat_netbsd.h"
108 #include "opt_compat_netbsd32.h"
109 #include "opt_raid_autoconfig.h"
110 #endif
111
112 #include <sys/param.h>
113 #include <sys/errno.h>
114 #include <sys/pool.h>
115 #include <sys/proc.h>
116 #include <sys/queue.h>
117 #include <sys/disk.h>
118 #include <sys/device.h>
119 #include <sys/stat.h>
120 #include <sys/ioctl.h>
121 #include <sys/fcntl.h>
122 #include <sys/systm.h>
123 #include <sys/vnode.h>
124 #include <sys/disklabel.h>
125 #include <sys/conf.h>
126 #include <sys/buf.h>
127 #include <sys/bufq.h>
128 #include <sys/reboot.h>
129 #include <sys/kauth.h>
130 #include <sys/module.h>
131 #include <sys/compat_stub.h>
132
133 #include <prop/proplib.h>
134
135 #include <dev/raidframe/raidframevar.h>
136 #include <dev/raidframe/raidframeio.h>
137 #include <dev/raidframe/rf_paritymap.h>
138
139 #include "rf_raid.h"
140 #include "rf_copyback.h"
141 #include "rf_dag.h"
142 #include "rf_dagflags.h"
143 #include "rf_desc.h"
144 #include "rf_diskqueue.h"
145 #include "rf_etimer.h"
146 #include "rf_general.h"
147 #include "rf_kintf.h"
148 #include "rf_options.h"
149 #include "rf_driver.h"
150 #include "rf_parityscan.h"
151 #include "rf_threadstuff.h"
152
153 #include "rf_compat50.h"
154
155 #include "rf_compat80.h"
156
157 #ifdef COMPAT_NETBSD32
158 #include "rf_compat32.h"
159 #endif
160
161 #include "ioconf.h"
162
163 #ifdef DEBUG
164 int rf_kdebug_level = 0;
165 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
166 #else /* DEBUG */
167 #define db1_printf(a) { }
168 #endif /* DEBUG */
169
170 #ifdef DEBUG_ROOT
171 #define DPRINTF(a, ...) printf(a, __VA_ARGS__)
172 #else
173 #define DPRINTF(a, ...)
174 #endif
175
176 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
177 static rf_declare_mutex2(rf_sparet_wait_mutex);
178 static rf_declare_cond2(rf_sparet_wait_cv);
179 static rf_declare_cond2(rf_sparet_resp_cv);
180
181 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
182 * spare table */
183 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
184 * installation process */
185 #endif
186
187 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
188
189 /* prototypes */
190 static void KernelWakeupFunc(struct buf *);
191 static void InitBP(struct buf *, struct vnode *, unsigned,
192 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
193 void *, int, struct proc *);
194 struct raid_softc;
195 static void raidinit(struct raid_softc *);
196 static int raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp);
197 static int rf_get_component_caches(RF_Raid_t *raidPtr, int *);
198
199 static int raid_match(device_t, cfdata_t, void *);
200 static void raid_attach(device_t, device_t, void *);
201 static int raid_detach(device_t, int);
202
203 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
204 daddr_t, daddr_t);
205 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
206 daddr_t, daddr_t, int);
207
208 static int raidwrite_component_label(unsigned,
209 dev_t, struct vnode *, RF_ComponentLabel_t *);
210 static int raidread_component_label(unsigned,
211 dev_t, struct vnode *, RF_ComponentLabel_t *);
212
213 static int raid_diskstart(device_t, struct buf *bp);
214 static int raid_dumpblocks(device_t, void *, daddr_t, int);
215 static int raid_lastclose(device_t);
216
217 static dev_type_open(raidopen);
218 static dev_type_close(raidclose);
219 static dev_type_read(raidread);
220 static dev_type_write(raidwrite);
221 static dev_type_ioctl(raidioctl);
222 static dev_type_strategy(raidstrategy);
223 static dev_type_dump(raiddump);
224 static dev_type_size(raidsize);
225
226 const struct bdevsw raid_bdevsw = {
227 .d_open = raidopen,
228 .d_close = raidclose,
229 .d_strategy = raidstrategy,
230 .d_ioctl = raidioctl,
231 .d_dump = raiddump,
232 .d_psize = raidsize,
233 .d_discard = nodiscard,
234 .d_flag = D_DISK
235 };
236
237 const struct cdevsw raid_cdevsw = {
238 .d_open = raidopen,
239 .d_close = raidclose,
240 .d_read = raidread,
241 .d_write = raidwrite,
242 .d_ioctl = raidioctl,
243 .d_stop = nostop,
244 .d_tty = notty,
245 .d_poll = nopoll,
246 .d_mmap = nommap,
247 .d_kqfilter = nokqfilter,
248 .d_discard = nodiscard,
249 .d_flag = D_DISK
250 };
251
252 static struct dkdriver rf_dkdriver = {
253 .d_open = raidopen,
254 .d_close = raidclose,
255 .d_strategy = raidstrategy,
256 .d_diskstart = raid_diskstart,
257 .d_dumpblocks = raid_dumpblocks,
258 .d_lastclose = raid_lastclose,
259 .d_minphys = minphys
260 };
261
262 struct raid_softc {
263 struct dk_softc sc_dksc;
264 int sc_unit;
265 int sc_flags; /* flags */
266 int sc_cflags; /* configuration flags */
267 kmutex_t sc_mutex; /* interlock mutex */
268 kcondvar_t sc_cv; /* and the condvar */
269 uint64_t sc_size; /* size of the raid device */
270 char sc_xname[20]; /* XXX external name */
271 RF_Raid_t sc_r;
272 LIST_ENTRY(raid_softc) sc_link;
273 };
274 /* sc_flags */
275 #define RAIDF_INITED 0x01 /* unit has been initialized */
276 #define RAIDF_SHUTDOWN 0x02 /* unit is being shutdown */
277 #define RAIDF_DETACH 0x04 /* detach after final close */
278 #define RAIDF_WANTED 0x08 /* someone waiting to obtain a lock */
279 #define RAIDF_LOCKED 0x10 /* unit is locked */
280 #define RAIDF_UNIT_CHANGED 0x20 /* unit is being changed */
281
282 #define raidunit(x) DISKUNIT(x)
283 #define raidsoftc(dev) (((struct raid_softc *)device_private(dev))->sc_r.softc)
284
285 extern struct cfdriver raid_cd;
286 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
287 raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
288 DVF_DETACH_SHUTDOWN);
289
290 /* Internal representation of a rf_recon_req */
291 struct rf_recon_req_internal {
292 RF_RowCol_t col;
293 RF_ReconReqFlags_t flags;
294 void *raidPtr;
295 };
296
297 /*
298 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
299 * Be aware that large numbers can allow the driver to consume a lot of
300 * kernel memory, especially on writes, and in degraded mode reads.
301 *
302 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
303 * a single 64K write will typically require 64K for the old data,
304 * 64K for the old parity, and 64K for the new parity, for a total
305 * of 192K (if the parity buffer is not re-used immediately).
306 * Even it if is used immediately, that's still 128K, which when multiplied
307 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
308 *
309 * Now in degraded mode, for example, a 64K read on the above setup may
310 * require data reconstruction, which will require *all* of the 4 remaining
311 * disks to participate -- 4 * 32K/disk == 128K again.
312 */
313
314 #ifndef RAIDOUTSTANDING
315 #define RAIDOUTSTANDING 6
316 #endif
317
318 #define RAIDLABELDEV(dev) \
319 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
320
321 /* declared here, and made public, for the benefit of KVM stuff.. */
322
323 static int raidlock(struct raid_softc *);
324 static void raidunlock(struct raid_softc *);
325
326 static int raid_detach_unlocked(struct raid_softc *);
327
328 static void rf_markalldirty(RF_Raid_t *);
329 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
330
331 void rf_ReconThread(struct rf_recon_req_internal *);
332 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
333 void rf_CopybackThread(RF_Raid_t *raidPtr);
334 void rf_ReconstructInPlaceThread(struct rf_recon_req_internal *);
335 int rf_autoconfig(device_t);
336 void rf_buildroothack(RF_ConfigSet_t *);
337
338 RF_AutoConfig_t *rf_find_raid_components(void);
339 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
340 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
341 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
342 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
343 int rf_set_autoconfig(RF_Raid_t *, int);
344 int rf_set_rootpartition(RF_Raid_t *, int);
345 void rf_release_all_vps(RF_ConfigSet_t *);
346 void rf_cleanup_config_set(RF_ConfigSet_t *);
347 int rf_have_enough_components(RF_ConfigSet_t *);
348 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
349 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
350
351 /*
352 * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
353 * Note that this is overridden by having RAID_AUTOCONFIG as an option
354 * in the kernel config file.
355 */
356 #ifdef RAID_AUTOCONFIG
357 int raidautoconfig = 1;
358 #else
359 int raidautoconfig = 0;
360 #endif
361 static bool raidautoconfigdone = false;
362
363 struct RF_Pools_s rf_pools;
364
365 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
366 static kmutex_t raid_lock;
367
368 static struct raid_softc *
369 raidcreate(int unit) {
370 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
371 sc->sc_unit = unit;
372 cv_init(&sc->sc_cv, "raidunit");
373 mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_NONE);
374 return sc;
375 }
376
377 static void
378 raiddestroy(struct raid_softc *sc) {
379 cv_destroy(&sc->sc_cv);
380 mutex_destroy(&sc->sc_mutex);
381 kmem_free(sc, sizeof(*sc));
382 }
383
384 static struct raid_softc *
385 raidget(int unit, bool create) {
386 struct raid_softc *sc;
387 if (unit < 0) {
388 #ifdef DIAGNOSTIC
389 panic("%s: unit %d!", __func__, unit);
390 #endif
391 return NULL;
392 }
393 mutex_enter(&raid_lock);
394 LIST_FOREACH(sc, &raids, sc_link) {
395 if (sc->sc_unit == unit) {
396 mutex_exit(&raid_lock);
397 return sc;
398 }
399 }
400 mutex_exit(&raid_lock);
401 if (!create)
402 return NULL;
403 if ((sc = raidcreate(unit)) == NULL)
404 return NULL;
405 mutex_enter(&raid_lock);
406 LIST_INSERT_HEAD(&raids, sc, sc_link);
407 mutex_exit(&raid_lock);
408 return sc;
409 }
410
411 static void
412 raidput(struct raid_softc *sc) {
413 mutex_enter(&raid_lock);
414 LIST_REMOVE(sc, sc_link);
415 mutex_exit(&raid_lock);
416 raiddestroy(sc);
417 }
418
419 void
420 raidattach(int num)
421 {
422
423 /*
424 * Device attachment and associated initialization now occurs
425 * as part of the module initialization.
426 */
427 }
428
429 int
430 rf_autoconfig(device_t self)
431 {
432 RF_AutoConfig_t *ac_list;
433 RF_ConfigSet_t *config_sets;
434
435 if (!raidautoconfig || raidautoconfigdone == true)
436 return (0);
437
438 /* XXX This code can only be run once. */
439 raidautoconfigdone = true;
440
441 #ifdef __HAVE_CPU_BOOTCONF
442 /*
443 * 0. find the boot device if needed first so we can use it later
444 * this needs to be done before we autoconfigure any raid sets,
445 * because if we use wedges we are not going to be able to open
446 * the boot device later
447 */
448 if (booted_device == NULL)
449 cpu_bootconf();
450 #endif
451 /* 1. locate all RAID components on the system */
452 aprint_debug("Searching for RAID components...\n");
453 ac_list = rf_find_raid_components();
454
455 /* 2. Sort them into their respective sets. */
456 config_sets = rf_create_auto_sets(ac_list);
457
458 /*
459 * 3. Evaluate each set and configure the valid ones.
460 * This gets done in rf_buildroothack().
461 */
462 rf_buildroothack(config_sets);
463
464 return 1;
465 }
466
467 static int
468 rf_containsboot(RF_Raid_t *r, device_t bdv) {
469 const char *bootname = device_xname(bdv);
470 size_t len = strlen(bootname);
471
472 for (int col = 0; col < r->numCol; col++) {
473 const char *devname = r->Disks[col].devname;
474 devname += sizeof("/dev/") - 1;
475 if (strncmp(devname, "dk", 2) == 0) {
476 const char *parent =
477 dkwedge_get_parent_name(r->Disks[col].dev);
478 if (parent != NULL)
479 devname = parent;
480 }
481 if (strncmp(devname, bootname, len) == 0) {
482 struct raid_softc *sc = r->softc;
483 aprint_debug("raid%d includes boot device %s\n",
484 sc->sc_unit, devname);
485 return 1;
486 }
487 }
488 return 0;
489 }
490
491 void
492 rf_buildroothack(RF_ConfigSet_t *config_sets)
493 {
494 RF_ConfigSet_t *cset;
495 RF_ConfigSet_t *next_cset;
496 int num_root;
497 struct raid_softc *sc, *rsc;
498 struct dk_softc *dksc;
499
500 sc = rsc = NULL;
501 num_root = 0;
502 cset = config_sets;
503 while (cset != NULL) {
504 next_cset = cset->next;
505 if (rf_have_enough_components(cset) &&
506 cset->ac->clabel->autoconfigure == 1) {
507 sc = rf_auto_config_set(cset);
508 if (sc != NULL) {
509 aprint_debug("raid%d: configured ok\n",
510 sc->sc_unit);
511 if (cset->rootable) {
512 rsc = sc;
513 num_root++;
514 }
515 } else {
516 /* The autoconfig didn't work :( */
517 aprint_debug("Autoconfig failed\n");
518 rf_release_all_vps(cset);
519 }
520 } else {
521 /* we're not autoconfiguring this set...
522 release the associated resources */
523 rf_release_all_vps(cset);
524 }
525 /* cleanup */
526 rf_cleanup_config_set(cset);
527 cset = next_cset;
528 }
529 dksc = &rsc->sc_dksc;
530
531 /* if the user has specified what the root device should be
532 then we don't touch booted_device or boothowto... */
533
534 if (rootspec != NULL)
535 return;
536
537 /* we found something bootable... */
538
539 /*
540 * XXX: The following code assumes that the root raid
541 * is the first ('a') partition. This is about the best
542 * we can do with a BSD disklabel, but we might be able
543 * to do better with a GPT label, by setting a specified
544 * attribute to indicate the root partition. We can then
545 * stash the partition number in the r->root_partition
546 * high bits (the bottom 2 bits are already used). For
547 * now we just set booted_partition to 0 when we override
548 * root.
549 */
550 if (num_root == 1) {
551 device_t candidate_root;
552 if (dksc->sc_dkdev.dk_nwedges != 0) {
553 char cname[sizeof(cset->ac->devname)];
554 /* XXX: assume partition 'a' first */
555 snprintf(cname, sizeof(cname), "%s%c",
556 device_xname(dksc->sc_dev), 'a');
557 candidate_root = dkwedge_find_by_wname(cname);
558 DPRINTF("%s: candidate wedge root=%s\n", __func__,
559 cname);
560 if (candidate_root == NULL) {
561 /*
562 * If that is not found, because we don't use
563 * disklabel, return the first dk child
564 * XXX: we can skip the 'a' check above
565 * and always do this...
566 */
567 size_t i = 0;
568 candidate_root = dkwedge_find_by_parent(
569 device_xname(dksc->sc_dev), &i);
570 }
571 DPRINTF("%s: candidate wedge root=%p\n", __func__,
572 candidate_root);
573 } else
574 candidate_root = dksc->sc_dev;
575 DPRINTF("%s: candidate root=%p\n", __func__, candidate_root);
576 DPRINTF("%s: booted_device=%p root_partition=%d "
577 "contains_boot=%d\n", __func__, booted_device,
578 rsc->sc_r.root_partition,
579 rf_containsboot(&rsc->sc_r, booted_device));
580 if (booted_device == NULL ||
581 rsc->sc_r.root_partition == 1 ||
582 rf_containsboot(&rsc->sc_r, booted_device)) {
583 booted_device = candidate_root;
584 booted_method = "raidframe/single";
585 booted_partition = 0; /* XXX assume 'a' */
586 }
587 } else if (num_root > 1) {
588 DPRINTF("%s: many roots=%d, %p\n", __func__, num_root,
589 booted_device);
590
591 /*
592 * Maybe the MD code can help. If it cannot, then
593 * setroot() will discover that we have no
594 * booted_device and will ask the user if nothing was
595 * hardwired in the kernel config file
596 */
597 if (booted_device == NULL)
598 return;
599
600 num_root = 0;
601 mutex_enter(&raid_lock);
602 LIST_FOREACH(sc, &raids, sc_link) {
603 RF_Raid_t *r = &sc->sc_r;
604 if (r->valid == 0)
605 continue;
606
607 if (r->root_partition == 0)
608 continue;
609
610 if (rf_containsboot(r, booted_device)) {
611 num_root++;
612 rsc = sc;
613 dksc = &rsc->sc_dksc;
614 }
615 }
616 mutex_exit(&raid_lock);
617
618 if (num_root == 1) {
619 booted_device = dksc->sc_dev;
620 booted_method = "raidframe/multi";
621 booted_partition = 0; /* XXX assume 'a' */
622 } else {
623 /* we can't guess.. require the user to answer... */
624 boothowto |= RB_ASKNAME;
625 }
626 }
627 }
628
629 static int
630 raidsize(dev_t dev)
631 {
632 struct raid_softc *rs;
633 struct dk_softc *dksc;
634 unsigned int unit;
635
636 unit = raidunit(dev);
637 if ((rs = raidget(unit, false)) == NULL)
638 return -1;
639 dksc = &rs->sc_dksc;
640
641 if ((rs->sc_flags & RAIDF_INITED) == 0)
642 return -1;
643
644 return dk_size(dksc, dev);
645 }
646
647 static int
648 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
649 {
650 unsigned int unit;
651 struct raid_softc *rs;
652 struct dk_softc *dksc;
653
654 unit = raidunit(dev);
655 if ((rs = raidget(unit, false)) == NULL)
656 return ENXIO;
657 dksc = &rs->sc_dksc;
658
659 if ((rs->sc_flags & RAIDF_INITED) == 0)
660 return ENODEV;
661
662 /*
663 Note that blkno is relative to this particular partition.
664 By adding adding RF_PROTECTED_SECTORS, we get a value that
665 is relative to the partition used for the underlying component.
666 */
667 blkno += RF_PROTECTED_SECTORS;
668
669 return dk_dump(dksc, dev, blkno, va, size);
670 }
671
672 static int
673 raid_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk)
674 {
675 struct raid_softc *rs = raidsoftc(dev);
676 const struct bdevsw *bdev;
677 RF_Raid_t *raidPtr;
678 int c, sparecol, j, scol, dumpto;
679 int error = 0;
680
681 raidPtr = &rs->sc_r;
682
683 /* we only support dumping to RAID 1 sets */
684 if (raidPtr->Layout.numDataCol != 1 ||
685 raidPtr->Layout.numParityCol != 1)
686 return EINVAL;
687
688 if ((error = raidlock(rs)) != 0)
689 return error;
690
691 /* figure out what device is alive.. */
692
693 /*
694 Look for a component to dump to. The preference for the
695 component to dump to is as follows:
696 1) the master
697 2) a used_spare of the master
698 3) the slave
699 4) a used_spare of the slave
700 */
701
702 dumpto = -1;
703 for (c = 0; c < raidPtr->numCol; c++) {
704 if (raidPtr->Disks[c].status == rf_ds_optimal) {
705 /* this might be the one */
706 dumpto = c;
707 break;
708 }
709 }
710
711 /*
712 At this point we have possibly selected a live master or a
713 live slave. We now check to see if there is a spared
714 master (or a spared slave), if we didn't find a live master
715 or a live slave.
716 */
717
718 for (c = 0; c < raidPtr->numSpare; c++) {
719 sparecol = raidPtr->numCol + c;
720 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
721 /* How about this one? */
722 scol = -1;
723 for(j=0;j<raidPtr->numCol;j++) {
724 if (raidPtr->Disks[j].spareCol == sparecol) {
725 scol = j;
726 break;
727 }
728 }
729 if (scol == 0) {
730 /*
731 We must have found a spared master!
732 We'll take that over anything else
733 found so far. (We couldn't have
734 found a real master before, since
735 this is a used spare, and it's
736 saying that it's replacing the
737 master.) On reboot (with
738 autoconfiguration turned on)
739 sparecol will become the 1st
740 component (component0) of this set.
741 */
742 dumpto = sparecol;
743 break;
744 } else if (scol != -1) {
745 /*
746 Must be a spared slave. We'll dump
747 to that if we havn't found anything
748 else so far.
749 */
750 if (dumpto == -1)
751 dumpto = sparecol;
752 }
753 }
754 }
755
756 if (dumpto == -1) {
757 /* we couldn't find any live components to dump to!?!?
758 */
759 error = EINVAL;
760 goto out;
761 }
762
763 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
764 if (bdev == NULL) {
765 error = ENXIO;
766 goto out;
767 }
768
769 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
770 blkno, va, nblk * raidPtr->bytesPerSector);
771
772 out:
773 raidunlock(rs);
774
775 return error;
776 }
777
778 /* ARGSUSED */
779 static int
780 raidopen(dev_t dev, int flags, int fmt,
781 struct lwp *l)
782 {
783 int unit = raidunit(dev);
784 struct raid_softc *rs;
785 struct dk_softc *dksc;
786 int error = 0;
787 int part, pmask;
788
789 if ((rs = raidget(unit, true)) == NULL)
790 return ENXIO;
791 if ((error = raidlock(rs)) != 0)
792 return (error);
793
794 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
795 error = EBUSY;
796 goto bad;
797 }
798
799 dksc = &rs->sc_dksc;
800
801 part = DISKPART(dev);
802 pmask = (1 << part);
803
804 if (!DK_BUSY(dksc, pmask) &&
805 ((rs->sc_flags & RAIDF_INITED) != 0)) {
806 /* First one... mark things as dirty... Note that we *MUST*
807 have done a configure before this. I DO NOT WANT TO BE
808 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
809 THAT THEY BELONG TOGETHER!!!!! */
810 /* XXX should check to see if we're only open for reading
811 here... If so, we needn't do this, but then need some
812 other way of keeping track of what's happened.. */
813
814 rf_markalldirty(&rs->sc_r);
815 }
816
817 if ((rs->sc_flags & RAIDF_INITED) != 0)
818 error = dk_open(dksc, dev, flags, fmt, l);
819
820 bad:
821 raidunlock(rs);
822
823 return (error);
824
825
826 }
827
828 static int
829 raid_lastclose(device_t self)
830 {
831 struct raid_softc *rs = raidsoftc(self);
832
833 /* Last one... device is not unconfigured yet.
834 Device shutdown has taken care of setting the
835 clean bits if RAIDF_INITED is not set
836 mark things as clean... */
837
838 rf_update_component_labels(&rs->sc_r,
839 RF_FINAL_COMPONENT_UPDATE);
840
841 /* pass to unlocked code */
842 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
843 rs->sc_flags |= RAIDF_DETACH;
844
845 return 0;
846 }
847
848 /* ARGSUSED */
849 static int
850 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
851 {
852 int unit = raidunit(dev);
853 struct raid_softc *rs;
854 struct dk_softc *dksc;
855 cfdata_t cf;
856 int error = 0, do_detach = 0, do_put = 0;
857
858 if ((rs = raidget(unit, false)) == NULL)
859 return ENXIO;
860 dksc = &rs->sc_dksc;
861
862 if ((error = raidlock(rs)) != 0)
863 return (error);
864
865 if ((rs->sc_flags & RAIDF_INITED) != 0) {
866 error = dk_close(dksc, dev, flags, fmt, l);
867 if ((rs->sc_flags & RAIDF_DETACH) != 0)
868 do_detach = 1;
869 } else if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
870 do_put = 1;
871
872 raidunlock(rs);
873
874 if (do_detach) {
875 /* free the pseudo device attach bits */
876 cf = device_cfdata(dksc->sc_dev);
877 error = config_detach(dksc->sc_dev, 0);
878 if (error == 0)
879 free(cf, M_RAIDFRAME);
880 } else if (do_put) {
881 raidput(rs);
882 }
883
884 return (error);
885
886 }
887
888 static void
889 raid_wakeup(RF_Raid_t *raidPtr)
890 {
891 rf_lock_mutex2(raidPtr->iodone_lock);
892 rf_signal_cond2(raidPtr->iodone_cv);
893 rf_unlock_mutex2(raidPtr->iodone_lock);
894 }
895
896 static void
897 raidstrategy(struct buf *bp)
898 {
899 unsigned int unit;
900 struct raid_softc *rs;
901 struct dk_softc *dksc;
902 RF_Raid_t *raidPtr;
903
904 unit = raidunit(bp->b_dev);
905 if ((rs = raidget(unit, false)) == NULL) {
906 bp->b_error = ENXIO;
907 goto fail;
908 }
909 if ((rs->sc_flags & RAIDF_INITED) == 0) {
910 bp->b_error = ENXIO;
911 goto fail;
912 }
913 dksc = &rs->sc_dksc;
914 raidPtr = &rs->sc_r;
915
916 /* Queue IO only */
917 if (dk_strategy_defer(dksc, bp))
918 goto done;
919
920 /* schedule the IO to happen at the next convenient time */
921 raid_wakeup(raidPtr);
922
923 done:
924 return;
925
926 fail:
927 bp->b_resid = bp->b_bcount;
928 biodone(bp);
929 }
930
931 static int
932 raid_diskstart(device_t dev, struct buf *bp)
933 {
934 struct raid_softc *rs = raidsoftc(dev);
935 RF_Raid_t *raidPtr;
936
937 raidPtr = &rs->sc_r;
938 if (!raidPtr->valid) {
939 db1_printf(("raid is not valid..\n"));
940 return ENODEV;
941 }
942
943 /* XXX */
944 bp->b_resid = 0;
945
946 return raiddoaccess(raidPtr, bp);
947 }
948
949 void
950 raiddone(RF_Raid_t *raidPtr, struct buf *bp)
951 {
952 struct raid_softc *rs;
953 struct dk_softc *dksc;
954
955 rs = raidPtr->softc;
956 dksc = &rs->sc_dksc;
957
958 dk_done(dksc, bp);
959
960 rf_lock_mutex2(raidPtr->mutex);
961 raidPtr->openings++;
962 rf_unlock_mutex2(raidPtr->mutex);
963
964 /* schedule more IO */
965 raid_wakeup(raidPtr);
966 }
967
968 /* ARGSUSED */
969 static int
970 raidread(dev_t dev, struct uio *uio, int flags)
971 {
972 int unit = raidunit(dev);
973 struct raid_softc *rs;
974
975 if ((rs = raidget(unit, false)) == NULL)
976 return ENXIO;
977
978 if ((rs->sc_flags & RAIDF_INITED) == 0)
979 return (ENXIO);
980
981 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
982
983 }
984
985 /* ARGSUSED */
986 static int
987 raidwrite(dev_t dev, struct uio *uio, int flags)
988 {
989 int unit = raidunit(dev);
990 struct raid_softc *rs;
991
992 if ((rs = raidget(unit, false)) == NULL)
993 return ENXIO;
994
995 if ((rs->sc_flags & RAIDF_INITED) == 0)
996 return (ENXIO);
997
998 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
999
1000 }
1001
1002 static int
1003 raid_detach_unlocked(struct raid_softc *rs)
1004 {
1005 struct dk_softc *dksc = &rs->sc_dksc;
1006 RF_Raid_t *raidPtr;
1007 int error;
1008
1009 raidPtr = &rs->sc_r;
1010
1011 if (DK_BUSY(dksc, 0) ||
1012 raidPtr->recon_in_progress != 0 ||
1013 raidPtr->parity_rewrite_in_progress != 0 ||
1014 raidPtr->copyback_in_progress != 0)
1015 return EBUSY;
1016
1017 if ((rs->sc_flags & RAIDF_INITED) == 0)
1018 return 0;
1019
1020 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1021
1022 if ((error = rf_Shutdown(raidPtr)) != 0)
1023 return error;
1024
1025 rs->sc_flags &= ~RAIDF_INITED;
1026
1027 /* Kill off any queued buffers */
1028 dk_drain(dksc);
1029 bufq_free(dksc->sc_bufq);
1030
1031 /* Detach the disk. */
1032 dkwedge_delall(&dksc->sc_dkdev);
1033 disk_detach(&dksc->sc_dkdev);
1034 disk_destroy(&dksc->sc_dkdev);
1035 dk_detach(dksc);
1036
1037 return 0;
1038 }
1039
1040 static int
1041 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1042 {
1043 int unit = raidunit(dev);
1044 int error = 0;
1045 int part, pmask;
1046 struct raid_softc *rs;
1047 struct dk_softc *dksc;
1048 RF_Config_t *k_cfg, *u_cfg;
1049 RF_Raid_t *raidPtr;
1050 RF_RaidDisk_t *diskPtr;
1051 RF_AccTotals_t *totals;
1052 RF_DeviceConfig_t *d_cfg, *ucfgp;
1053 u_char *specific_buf;
1054 int retcode = 0;
1055 int column;
1056 /* int raidid; */
1057 struct rf_recon_req *rr;
1058 struct rf_recon_req_internal *rrint;
1059 RF_ComponentLabel_t *clabel;
1060 RF_ComponentLabel_t *ci_label;
1061 RF_SingleComponent_t *sparePtr,*componentPtr;
1062 RF_SingleComponent_t component;
1063 int d;
1064
1065 if ((rs = raidget(unit, false)) == NULL)
1066 return ENXIO;
1067 dksc = &rs->sc_dksc;
1068 raidPtr = &rs->sc_r;
1069
1070 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1071 (int) DISKPART(dev), (int) unit, cmd));
1072
1073 /* Must be initialized for these... */
1074 switch (cmd) {
1075 case RAIDFRAME_REWRITEPARITY:
1076 case RAIDFRAME_GET_INFO:
1077 case RAIDFRAME_RESET_ACCTOTALS:
1078 case RAIDFRAME_GET_ACCTOTALS:
1079 case RAIDFRAME_KEEP_ACCTOTALS:
1080 case RAIDFRAME_GET_SIZE:
1081 case RAIDFRAME_FAIL_DISK:
1082 case RAIDFRAME_COPYBACK:
1083 case RAIDFRAME_CHECK_RECON_STATUS:
1084 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1085 case RAIDFRAME_GET_COMPONENT_LABEL:
1086 case RAIDFRAME_SET_COMPONENT_LABEL:
1087 case RAIDFRAME_ADD_HOT_SPARE:
1088 case RAIDFRAME_REMOVE_HOT_SPARE:
1089 case RAIDFRAME_INIT_LABELS:
1090 case RAIDFRAME_REBUILD_IN_PLACE:
1091 case RAIDFRAME_CHECK_PARITY:
1092 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1093 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1094 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1095 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1096 case RAIDFRAME_SET_AUTOCONFIG:
1097 case RAIDFRAME_SET_ROOT:
1098 case RAIDFRAME_DELETE_COMPONENT:
1099 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1100 case RAIDFRAME_PARITYMAP_STATUS:
1101 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1102 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1103 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1104 #ifdef COMPAT_NETBSD32
1105 #ifdef _LP64
1106 case RAIDFRAME_GET_INFO32:
1107 #endif
1108 #endif
1109 if ((rs->sc_flags & RAIDF_INITED) == 0)
1110 return (ENXIO);
1111 }
1112
1113 /*
1114 * Handle compat ioctl calls
1115 *
1116 * * If compat code is not loaded, stub returns ENOSYS and we just
1117 * check the "native" cmd's
1118 * * If compat code is loaded but does not recognize the cmd, it
1119 * returns EPASSTHROUGH, and we just check the "native" cmd's
1120 * * If compat code returns EAGAIN, we need to finish via config
1121 * * Otherwise the cmd has been handled and we just return
1122 */
1123 retcode = (*raidframe50_ioctl)(cmd, (rs->sc_flags & RAIDF_INITED),
1124 raidPtr, unit, data, &k_cfg);
1125 if (retcode == ENOSYS)
1126 retcode = 0;
1127 else if (retcode == EAGAIN)
1128 goto config;
1129 else if (retcode != EPASSTHROUGH)
1130 return retcode;
1131
1132 retcode = (*raidframe80_ioctl)(cmd, (rs->sc_flags & RAIDF_INITED),
1133 raidPtr, unit, data, &k_cfg);
1134 if (retcode == ENOSYS)
1135 retcode = 0;
1136 else if (retcode == EAGAIN)
1137 goto config;
1138 else if (retcode != EPASSTHROUGH)
1139 return retcode;
1140
1141 /*
1142 * XXX
1143 * Handling of FAIL_DISK80 command requires us to retain retcode's
1144 * value of EPASSTHROUGH. If you add more compat code later, make
1145 * sure you don't overwrite retcode and break this!
1146 */
1147
1148 switch (cmd) {
1149
1150 /* configure the system */
1151 case RAIDFRAME_CONFIGURE:
1152 #ifdef COMPAT_NETBSD32
1153 #ifdef _LP64
1154 case RAIDFRAME_CONFIGURE32:
1155 #endif
1156 #endif
1157
1158 if (raidPtr->valid) {
1159 /* There is a valid RAID set running on this unit! */
1160 printf("raid%d: Device already configured!\n",unit);
1161 return(EINVAL);
1162 }
1163
1164 /* copy-in the configuration information */
1165 /* data points to a pointer to the configuration structure */
1166
1167 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1168 if (k_cfg == NULL) {
1169 return (ENOMEM);
1170 }
1171 #ifdef COMPAT_NETBSD32
1172 #ifdef _LP64
1173 if (cmd == RAIDFRAME_CONFIGURE32 &&
1174 (l->l_proc->p_flag & PK_32) != 0)
1175 retcode = rf_config_netbsd32(data, k_cfg);
1176 else
1177 #endif
1178 #endif
1179 {
1180 u_cfg = *((RF_Config_t **) data);
1181 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1182 }
1183 if (retcode) {
1184 RF_Free(k_cfg, sizeof(RF_Config_t));
1185 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1186 retcode));
1187 goto no_config;
1188 }
1189 goto config;
1190 config:
1191 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1192
1193 /* allocate a buffer for the layout-specific data, and copy it
1194 * in */
1195 if (k_cfg->layoutSpecificSize) {
1196 if (k_cfg->layoutSpecificSize > 10000) {
1197 /* sanity check */
1198 RF_Free(k_cfg, sizeof(RF_Config_t));
1199 retcode = EINVAL;
1200 goto no_config;
1201 }
1202 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1203 (u_char *));
1204 if (specific_buf == NULL) {
1205 RF_Free(k_cfg, sizeof(RF_Config_t));
1206 retcode = ENOMEM;
1207 goto no_config;
1208 }
1209 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1210 k_cfg->layoutSpecificSize);
1211 if (retcode) {
1212 RF_Free(k_cfg, sizeof(RF_Config_t));
1213 RF_Free(specific_buf,
1214 k_cfg->layoutSpecificSize);
1215 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1216 retcode));
1217 goto no_config;
1218 }
1219 } else
1220 specific_buf = NULL;
1221 k_cfg->layoutSpecific = specific_buf;
1222
1223 /* should do some kind of sanity check on the configuration.
1224 * Store the sum of all the bytes in the last byte? */
1225
1226 /* configure the system */
1227
1228 /*
1229 * Clear the entire RAID descriptor, just to make sure
1230 * there is no stale data left in the case of a
1231 * reconfiguration
1232 */
1233 memset(raidPtr, 0, sizeof(*raidPtr));
1234 raidPtr->softc = rs;
1235 raidPtr->raidid = unit;
1236
1237 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1238
1239 if (retcode == 0) {
1240
1241 /* allow this many simultaneous IO's to
1242 this RAID device */
1243 raidPtr->openings = RAIDOUTSTANDING;
1244
1245 raidinit(rs);
1246 raid_wakeup(raidPtr);
1247 rf_markalldirty(raidPtr);
1248 }
1249 /* free the buffers. No return code here. */
1250 if (k_cfg->layoutSpecificSize) {
1251 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1252 }
1253 RF_Free(k_cfg, sizeof(RF_Config_t));
1254
1255 no_config:
1256 /*
1257 * If configuration failed, set sc_flags so that we
1258 * will detach the device when we close it.
1259 */
1260 if (retcode != 0)
1261 rs->sc_flags |= RAIDF_SHUTDOWN;
1262 return (retcode);
1263
1264 /* shutdown the system */
1265 case RAIDFRAME_SHUTDOWN:
1266
1267 part = DISKPART(dev);
1268 pmask = (1 << part);
1269
1270 if ((error = raidlock(rs)) != 0)
1271 return (error);
1272
1273 if (DK_BUSY(dksc, pmask) ||
1274 raidPtr->recon_in_progress != 0 ||
1275 raidPtr->parity_rewrite_in_progress != 0 ||
1276 raidPtr->copyback_in_progress != 0)
1277 retcode = EBUSY;
1278 else {
1279 /* detach and free on close */
1280 rs->sc_flags |= RAIDF_SHUTDOWN;
1281 retcode = 0;
1282 }
1283
1284 raidunlock(rs);
1285
1286 return (retcode);
1287 case RAIDFRAME_GET_COMPONENT_LABEL:
1288 return rf_get_component_label(raidPtr, data);
1289
1290 #if 0
1291 case RAIDFRAME_SET_COMPONENT_LABEL:
1292 clabel = (RF_ComponentLabel_t *) data;
1293
1294 /* XXX check the label for valid stuff... */
1295 /* Note that some things *should not* get modified --
1296 the user should be re-initing the labels instead of
1297 trying to patch things.
1298 */
1299
1300 raidid = raidPtr->raidid;
1301 #ifdef DEBUG
1302 printf("raid%d: Got component label:\n", raidid);
1303 printf("raid%d: Version: %d\n", raidid, clabel->version);
1304 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1305 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1306 printf("raid%d: Column: %d\n", raidid, clabel->column);
1307 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1308 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1309 printf("raid%d: Status: %d\n", raidid, clabel->status);
1310 #endif
1311 clabel->row = 0;
1312 column = clabel->column;
1313
1314 if ((column < 0) || (column >= raidPtr->numCol)) {
1315 return(EINVAL);
1316 }
1317
1318 /* XXX this isn't allowed to do anything for now :-) */
1319
1320 /* XXX and before it is, we need to fill in the rest
1321 of the fields!?!?!?! */
1322 memcpy(raidget_component_label(raidPtr, column),
1323 clabel, sizeof(*clabel));
1324 raidflush_component_label(raidPtr, column);
1325 return (0);
1326 #endif
1327
1328 case RAIDFRAME_INIT_LABELS:
1329 clabel = (RF_ComponentLabel_t *) data;
1330 /*
1331 we only want the serial number from
1332 the above. We get all the rest of the information
1333 from the config that was used to create this RAID
1334 set.
1335 */
1336
1337 raidPtr->serial_number = clabel->serial_number;
1338
1339 for(column=0;column<raidPtr->numCol;column++) {
1340 diskPtr = &raidPtr->Disks[column];
1341 if (!RF_DEAD_DISK(diskPtr->status)) {
1342 ci_label = raidget_component_label(raidPtr,
1343 column);
1344 /* Zeroing this is important. */
1345 memset(ci_label, 0, sizeof(*ci_label));
1346 raid_init_component_label(raidPtr, ci_label);
1347 ci_label->serial_number =
1348 raidPtr->serial_number;
1349 ci_label->row = 0; /* we dont' pretend to support more */
1350 rf_component_label_set_partitionsize(ci_label,
1351 diskPtr->partitionSize);
1352 ci_label->column = column;
1353 raidflush_component_label(raidPtr, column);
1354 }
1355 /* XXXjld what about the spares? */
1356 }
1357
1358 return (retcode);
1359 case RAIDFRAME_SET_AUTOCONFIG:
1360 d = rf_set_autoconfig(raidPtr, *(int *) data);
1361 printf("raid%d: New autoconfig value is: %d\n",
1362 raidPtr->raidid, d);
1363 *(int *) data = d;
1364 return (retcode);
1365
1366 case RAIDFRAME_SET_ROOT:
1367 d = rf_set_rootpartition(raidPtr, *(int *) data);
1368 printf("raid%d: New rootpartition value is: %d\n",
1369 raidPtr->raidid, d);
1370 *(int *) data = d;
1371 return (retcode);
1372
1373 /* initialize all parity */
1374 case RAIDFRAME_REWRITEPARITY:
1375
1376 if (raidPtr->Layout.map->faultsTolerated == 0) {
1377 /* Parity for RAID 0 is trivially correct */
1378 raidPtr->parity_good = RF_RAID_CLEAN;
1379 return(0);
1380 }
1381
1382 if (raidPtr->parity_rewrite_in_progress == 1) {
1383 /* Re-write is already in progress! */
1384 return(EINVAL);
1385 }
1386
1387 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1388 rf_RewriteParityThread,
1389 raidPtr,"raid_parity");
1390 return (retcode);
1391
1392
1393 case RAIDFRAME_ADD_HOT_SPARE:
1394 sparePtr = (RF_SingleComponent_t *) data;
1395 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1396 retcode = rf_add_hot_spare(raidPtr, &component);
1397 return(retcode);
1398
1399 case RAIDFRAME_REMOVE_HOT_SPARE:
1400 return(retcode);
1401
1402 case RAIDFRAME_DELETE_COMPONENT:
1403 componentPtr = (RF_SingleComponent_t *)data;
1404 memcpy( &component, componentPtr,
1405 sizeof(RF_SingleComponent_t));
1406 retcode = rf_delete_component(raidPtr, &component);
1407 return(retcode);
1408
1409 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1410 componentPtr = (RF_SingleComponent_t *)data;
1411 memcpy( &component, componentPtr,
1412 sizeof(RF_SingleComponent_t));
1413 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1414 return(retcode);
1415
1416 case RAIDFRAME_REBUILD_IN_PLACE:
1417
1418 if (raidPtr->Layout.map->faultsTolerated == 0) {
1419 /* Can't do this on a RAID 0!! */
1420 return(EINVAL);
1421 }
1422
1423 if (raidPtr->recon_in_progress == 1) {
1424 /* a reconstruct is already in progress! */
1425 return(EINVAL);
1426 }
1427
1428 componentPtr = (RF_SingleComponent_t *) data;
1429 memcpy( &component, componentPtr,
1430 sizeof(RF_SingleComponent_t));
1431 component.row = 0; /* we don't support any more */
1432 column = component.column;
1433
1434 if ((column < 0) || (column >= raidPtr->numCol)) {
1435 return(EINVAL);
1436 }
1437
1438 rf_lock_mutex2(raidPtr->mutex);
1439 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1440 (raidPtr->numFailures > 0)) {
1441 /* XXX 0 above shouldn't be constant!!! */
1442 /* some component other than this has failed.
1443 Let's not make things worse than they already
1444 are... */
1445 printf("raid%d: Unable to reconstruct to disk at:\n",
1446 raidPtr->raidid);
1447 printf("raid%d: Col: %d Too many failures.\n",
1448 raidPtr->raidid, column);
1449 rf_unlock_mutex2(raidPtr->mutex);
1450 return (EINVAL);
1451 }
1452 if (raidPtr->Disks[column].status ==
1453 rf_ds_reconstructing) {
1454 printf("raid%d: Unable to reconstruct to disk at:\n",
1455 raidPtr->raidid);
1456 printf("raid%d: Col: %d Reconstruction already occurring!\n", raidPtr->raidid, column);
1457
1458 rf_unlock_mutex2(raidPtr->mutex);
1459 return (EINVAL);
1460 }
1461 if (raidPtr->Disks[column].status == rf_ds_spared) {
1462 rf_unlock_mutex2(raidPtr->mutex);
1463 return (EINVAL);
1464 }
1465 rf_unlock_mutex2(raidPtr->mutex);
1466
1467 RF_Malloc(rrint, sizeof(*rrint), (struct rf_recon_req_internal *));
1468 if (rrint == NULL)
1469 return(ENOMEM);
1470
1471 rrint->col = column;
1472 rrint->raidPtr = raidPtr;
1473
1474 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1475 rf_ReconstructInPlaceThread,
1476 rrint, "raid_reconip");
1477 return(retcode);
1478
1479 case RAIDFRAME_GET_INFO:
1480 #ifdef COMPAT_NETBSD32
1481 #ifdef _LP64
1482 case RAIDFRAME_GET_INFO32:
1483 #endif
1484 #endif
1485 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1486 (RF_DeviceConfig_t *));
1487 if (d_cfg == NULL)
1488 return (ENOMEM);
1489 retcode = rf_get_info(raidPtr, d_cfg);
1490 if (retcode == 0) {
1491 #ifdef COMPAT_NETBSD32
1492 #ifdef _LP64
1493 if (cmd == RAIDFRAME_GET_INFO32)
1494 ucfgp = NETBSD32PTR64(*(netbsd32_pointer_t *)data);
1495 else
1496 #endif
1497 #endif
1498 ucfgp = *(RF_DeviceConfig_t **)data;
1499 retcode = copyout(d_cfg, ucfgp, sizeof(RF_DeviceConfig_t));
1500 }
1501 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1502
1503 return (retcode);
1504
1505 case RAIDFRAME_CHECK_PARITY:
1506 *(int *) data = raidPtr->parity_good;
1507 return (0);
1508
1509 case RAIDFRAME_PARITYMAP_STATUS:
1510 if (rf_paritymap_ineligible(raidPtr))
1511 return EINVAL;
1512 rf_paritymap_status(raidPtr->parity_map,
1513 (struct rf_pmstat *)data);
1514 return 0;
1515
1516 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1517 if (rf_paritymap_ineligible(raidPtr))
1518 return EINVAL;
1519 if (raidPtr->parity_map == NULL)
1520 return ENOENT; /* ??? */
1521 if (0 != rf_paritymap_set_params(raidPtr->parity_map,
1522 (struct rf_pmparams *)data, 1))
1523 return EINVAL;
1524 return 0;
1525
1526 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1527 if (rf_paritymap_ineligible(raidPtr))
1528 return EINVAL;
1529 *(int *) data = rf_paritymap_get_disable(raidPtr);
1530 return 0;
1531
1532 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1533 if (rf_paritymap_ineligible(raidPtr))
1534 return EINVAL;
1535 rf_paritymap_set_disable(raidPtr, *(int *)data);
1536 /* XXX should errors be passed up? */
1537 return 0;
1538
1539 case RAIDFRAME_RESET_ACCTOTALS:
1540 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1541 return (0);
1542
1543 case RAIDFRAME_GET_ACCTOTALS:
1544 totals = (RF_AccTotals_t *) data;
1545 *totals = raidPtr->acc_totals;
1546 return (0);
1547
1548 case RAIDFRAME_KEEP_ACCTOTALS:
1549 raidPtr->keep_acc_totals = *(int *)data;
1550 return (0);
1551
1552 case RAIDFRAME_GET_SIZE:
1553 *(int *) data = raidPtr->totalSectors;
1554 return (0);
1555
1556 /* fail a disk & optionally start reconstruction */
1557 case RAIDFRAME_FAIL_DISK80:
1558 /* Check if we called compat code for this cmd */
1559 if (retcode != EPASSTHROUGH)
1560 return EINVAL;
1561 /* FALLTHRU */
1562 case RAIDFRAME_FAIL_DISK:
1563 if (raidPtr->Layout.map->faultsTolerated == 0) {
1564 /* Can't do this on a RAID 0!! */
1565 return(EINVAL);
1566 }
1567
1568 rr = (struct rf_recon_req *) data;
1569 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1570 return (EINVAL);
1571
1572 rf_lock_mutex2(raidPtr->mutex);
1573 if (raidPtr->status == rf_rs_reconstructing) {
1574 /* you can't fail a disk while we're reconstructing! */
1575 /* XXX wrong for RAID6 */
1576 rf_unlock_mutex2(raidPtr->mutex);
1577 return (EINVAL);
1578 }
1579 if ((raidPtr->Disks[rr->col].status ==
1580 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1581 /* some other component has failed. Let's not make
1582 things worse. XXX wrong for RAID6 */
1583 rf_unlock_mutex2(raidPtr->mutex);
1584 return (EINVAL);
1585 }
1586 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1587 /* Can't fail a spared disk! */
1588 rf_unlock_mutex2(raidPtr->mutex);
1589 return (EINVAL);
1590 }
1591 rf_unlock_mutex2(raidPtr->mutex);
1592
1593 /* make a copy of the recon request so that we don't rely on
1594 * the user's buffer */
1595 RF_Malloc(rrint, sizeof(*rrint), (struct rf_recon_req_internal *));
1596 if (rrint == NULL)
1597 return(ENOMEM);
1598 rrint->col = rr->col;
1599 rrint->flags = rr->flags;
1600 rrint->raidPtr = raidPtr;
1601
1602 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1603 rf_ReconThread,
1604 rrint, "raid_recon");
1605 return (0);
1606
1607 /* invoke a copyback operation after recon on whatever disk
1608 * needs it, if any */
1609 case RAIDFRAME_COPYBACK:
1610
1611 if (raidPtr->Layout.map->faultsTolerated == 0) {
1612 /* This makes no sense on a RAID 0!! */
1613 return(EINVAL);
1614 }
1615
1616 if (raidPtr->copyback_in_progress == 1) {
1617 /* Copyback is already in progress! */
1618 return(EINVAL);
1619 }
1620
1621 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1622 rf_CopybackThread,
1623 raidPtr,"raid_copyback");
1624 return (retcode);
1625
1626 /* return the percentage completion of reconstruction */
1627 case RAIDFRAME_CHECK_RECON_STATUS:
1628 if (raidPtr->Layout.map->faultsTolerated == 0) {
1629 /* This makes no sense on a RAID 0, so tell the
1630 user it's done. */
1631 *(int *) data = 100;
1632 return(0);
1633 }
1634 if (raidPtr->status != rf_rs_reconstructing)
1635 *(int *) data = 100;
1636 else {
1637 if (raidPtr->reconControl->numRUsTotal > 0) {
1638 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1639 } else {
1640 *(int *) data = 0;
1641 }
1642 }
1643 return (0);
1644 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1645 rf_check_recon_status_ext(raidPtr, data);
1646 return (0);
1647
1648 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1649 if (raidPtr->Layout.map->faultsTolerated == 0) {
1650 /* This makes no sense on a RAID 0, so tell the
1651 user it's done. */
1652 *(int *) data = 100;
1653 return(0);
1654 }
1655 if (raidPtr->parity_rewrite_in_progress == 1) {
1656 *(int *) data = 100 *
1657 raidPtr->parity_rewrite_stripes_done /
1658 raidPtr->Layout.numStripe;
1659 } else {
1660 *(int *) data = 100;
1661 }
1662 return (0);
1663
1664 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1665 rf_check_parityrewrite_status_ext(raidPtr, data);
1666 return (0);
1667
1668 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1669 if (raidPtr->Layout.map->faultsTolerated == 0) {
1670 /* This makes no sense on a RAID 0 */
1671 *(int *) data = 100;
1672 return(0);
1673 }
1674 if (raidPtr->copyback_in_progress == 1) {
1675 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1676 raidPtr->Layout.numStripe;
1677 } else {
1678 *(int *) data = 100;
1679 }
1680 return (0);
1681
1682 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1683 rf_check_copyback_status_ext(raidPtr, data);
1684 return 0;
1685
1686 case RAIDFRAME_SET_LAST_UNIT:
1687 for (column = 0; column < raidPtr->numCol; column++)
1688 if (raidPtr->Disks[column].status != rf_ds_optimal)
1689 return EBUSY;
1690
1691 for (column = 0; column < raidPtr->numCol; column++) {
1692 clabel = raidget_component_label(raidPtr, column);
1693 clabel->last_unit = *(int *)data;
1694 raidflush_component_label(raidPtr, column);
1695 }
1696 rs->sc_cflags |= RAIDF_UNIT_CHANGED;
1697 return 0;
1698
1699 /* the sparetable daemon calls this to wait for the kernel to
1700 * need a spare table. this ioctl does not return until a
1701 * spare table is needed. XXX -- calling mpsleep here in the
1702 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1703 * -- I should either compute the spare table in the kernel,
1704 * or have a different -- XXX XXX -- interface (a different
1705 * character device) for delivering the table -- XXX */
1706 #if 0
1707 case RAIDFRAME_SPARET_WAIT:
1708 rf_lock_mutex2(rf_sparet_wait_mutex);
1709 while (!rf_sparet_wait_queue)
1710 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1711 waitreq = rf_sparet_wait_queue;
1712 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1713 rf_unlock_mutex2(rf_sparet_wait_mutex);
1714
1715 /* structure assignment */
1716 *((RF_SparetWait_t *) data) = *waitreq;
1717
1718 RF_Free(waitreq, sizeof(*waitreq));
1719 return (0);
1720
1721 /* wakes up a process waiting on SPARET_WAIT and puts an error
1722 * code in it that will cause the dameon to exit */
1723 case RAIDFRAME_ABORT_SPARET_WAIT:
1724 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1725 waitreq->fcol = -1;
1726 rf_lock_mutex2(rf_sparet_wait_mutex);
1727 waitreq->next = rf_sparet_wait_queue;
1728 rf_sparet_wait_queue = waitreq;
1729 rf_broadcast_conf2(rf_sparet_wait_cv);
1730 rf_unlock_mutex2(rf_sparet_wait_mutex);
1731 return (0);
1732
1733 /* used by the spare table daemon to deliver a spare table
1734 * into the kernel */
1735 case RAIDFRAME_SEND_SPARET:
1736
1737 /* install the spare table */
1738 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1739
1740 /* respond to the requestor. the return status of the spare
1741 * table installation is passed in the "fcol" field */
1742 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1743 waitreq->fcol = retcode;
1744 rf_lock_mutex2(rf_sparet_wait_mutex);
1745 waitreq->next = rf_sparet_resp_queue;
1746 rf_sparet_resp_queue = waitreq;
1747 rf_broadcast_cond2(rf_sparet_resp_cv);
1748 rf_unlock_mutex2(rf_sparet_wait_mutex);
1749
1750 return (retcode);
1751 #endif
1752
1753 default:
1754 break; /* fall through to the os-specific code below */
1755
1756 }
1757
1758 if (!raidPtr->valid)
1759 return (EINVAL);
1760
1761 /*
1762 * Add support for "regular" device ioctls here.
1763 */
1764
1765 switch (cmd) {
1766 case DIOCGCACHE:
1767 retcode = rf_get_component_caches(raidPtr, (int *)data);
1768 break;
1769
1770 case DIOCCACHESYNC:
1771 retcode = rf_sync_component_caches(raidPtr);
1772 break;
1773
1774 default:
1775 retcode = dk_ioctl(dksc, dev, cmd, data, flag, l);
1776 break;
1777 }
1778
1779 return (retcode);
1780
1781 }
1782
1783
1784 /* raidinit -- complete the rest of the initialization for the
1785 RAIDframe device. */
1786
1787
1788 static void
1789 raidinit(struct raid_softc *rs)
1790 {
1791 cfdata_t cf;
1792 unsigned int unit;
1793 struct dk_softc *dksc = &rs->sc_dksc;
1794 RF_Raid_t *raidPtr = &rs->sc_r;
1795 device_t dev;
1796
1797 unit = raidPtr->raidid;
1798
1799 /* XXX doesn't check bounds. */
1800 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%u", unit);
1801
1802 /* attach the pseudo device */
1803 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1804 cf->cf_name = raid_cd.cd_name;
1805 cf->cf_atname = raid_cd.cd_name;
1806 cf->cf_unit = unit;
1807 cf->cf_fstate = FSTATE_STAR;
1808
1809 dev = config_attach_pseudo(cf);
1810 if (dev == NULL) {
1811 printf("raid%d: config_attach_pseudo failed\n",
1812 raidPtr->raidid);
1813 free(cf, M_RAIDFRAME);
1814 return;
1815 }
1816
1817 /* provide a backpointer to the real softc */
1818 raidsoftc(dev) = rs;
1819
1820 /* disk_attach actually creates space for the CPU disklabel, among
1821 * other things, so it's critical to call this *BEFORE* we try putzing
1822 * with disklabels. */
1823 dk_init(dksc, dev, DKTYPE_RAID);
1824 disk_init(&dksc->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1825
1826 /* XXX There may be a weird interaction here between this, and
1827 * protectedSectors, as used in RAIDframe. */
1828
1829 rs->sc_size = raidPtr->totalSectors;
1830
1831 /* Attach dk and disk subsystems */
1832 dk_attach(dksc);
1833 disk_attach(&dksc->sc_dkdev);
1834 rf_set_geometry(rs, raidPtr);
1835
1836 bufq_alloc(&dksc->sc_bufq, "fcfs", BUFQ_SORT_RAWBLOCK);
1837
1838 /* mark unit as usuable */
1839 rs->sc_flags |= RAIDF_INITED;
1840
1841 dkwedge_discover(&dksc->sc_dkdev);
1842 }
1843
1844 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1845 /* wake up the daemon & tell it to get us a spare table
1846 * XXX
1847 * the entries in the queues should be tagged with the raidPtr
1848 * so that in the extremely rare case that two recons happen at once,
1849 * we know for which device were requesting a spare table
1850 * XXX
1851 *
1852 * XXX This code is not currently used. GO
1853 */
1854 int
1855 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1856 {
1857 int retcode;
1858
1859 rf_lock_mutex2(rf_sparet_wait_mutex);
1860 req->next = rf_sparet_wait_queue;
1861 rf_sparet_wait_queue = req;
1862 rf_broadcast_cond2(rf_sparet_wait_cv);
1863
1864 /* mpsleep unlocks the mutex */
1865 while (!rf_sparet_resp_queue) {
1866 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
1867 }
1868 req = rf_sparet_resp_queue;
1869 rf_sparet_resp_queue = req->next;
1870 rf_unlock_mutex2(rf_sparet_wait_mutex);
1871
1872 retcode = req->fcol;
1873 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1874 * alloc'd */
1875 return (retcode);
1876 }
1877 #endif
1878
1879 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1880 * bp & passes it down.
1881 * any calls originating in the kernel must use non-blocking I/O
1882 * do some extra sanity checking to return "appropriate" error values for
1883 * certain conditions (to make some standard utilities work)
1884 *
1885 * Formerly known as: rf_DoAccessKernel
1886 */
1887 void
1888 raidstart(RF_Raid_t *raidPtr)
1889 {
1890 struct raid_softc *rs;
1891 struct dk_softc *dksc;
1892
1893 rs = raidPtr->softc;
1894 dksc = &rs->sc_dksc;
1895 /* quick check to see if anything has died recently */
1896 rf_lock_mutex2(raidPtr->mutex);
1897 if (raidPtr->numNewFailures > 0) {
1898 rf_unlock_mutex2(raidPtr->mutex);
1899 rf_update_component_labels(raidPtr,
1900 RF_NORMAL_COMPONENT_UPDATE);
1901 rf_lock_mutex2(raidPtr->mutex);
1902 raidPtr->numNewFailures--;
1903 }
1904 rf_unlock_mutex2(raidPtr->mutex);
1905
1906 if ((rs->sc_flags & RAIDF_INITED) == 0) {
1907 printf("raid%d: raidstart not ready\n", raidPtr->raidid);
1908 return;
1909 }
1910
1911 dk_start(dksc, NULL);
1912 }
1913
1914 static int
1915 raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp)
1916 {
1917 RF_SectorCount_t num_blocks, pb, sum;
1918 RF_RaidAddr_t raid_addr;
1919 daddr_t blocknum;
1920 int do_async;
1921 int rc;
1922
1923 rf_lock_mutex2(raidPtr->mutex);
1924 if (raidPtr->openings == 0) {
1925 rf_unlock_mutex2(raidPtr->mutex);
1926 return EAGAIN;
1927 }
1928 rf_unlock_mutex2(raidPtr->mutex);
1929
1930 blocknum = bp->b_rawblkno;
1931
1932 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1933 (int) blocknum));
1934
1935 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1936 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1937
1938 /* *THIS* is where we adjust what block we're going to...
1939 * but DO NOT TOUCH bp->b_blkno!!! */
1940 raid_addr = blocknum;
1941
1942 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1943 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1944 sum = raid_addr + num_blocks + pb;
1945 if (1 || rf_debugKernelAccess) {
1946 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1947 (int) raid_addr, (int) sum, (int) num_blocks,
1948 (int) pb, (int) bp->b_resid));
1949 }
1950 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1951 || (sum < num_blocks) || (sum < pb)) {
1952 rc = ENOSPC;
1953 goto done;
1954 }
1955 /*
1956 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1957 */
1958
1959 if (bp->b_bcount & raidPtr->sectorMask) {
1960 rc = ENOSPC;
1961 goto done;
1962 }
1963 db1_printf(("Calling DoAccess..\n"));
1964
1965
1966 rf_lock_mutex2(raidPtr->mutex);
1967 raidPtr->openings--;
1968 rf_unlock_mutex2(raidPtr->mutex);
1969
1970 /*
1971 * Everything is async.
1972 */
1973 do_async = 1;
1974
1975 /* don't ever condition on bp->b_flags & B_WRITE.
1976 * always condition on B_READ instead */
1977
1978 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1979 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1980 do_async, raid_addr, num_blocks,
1981 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1982
1983 done:
1984 return rc;
1985 }
1986
1987 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1988
1989 int
1990 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
1991 {
1992 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1993 struct buf *bp;
1994
1995 req->queue = queue;
1996 bp = req->bp;
1997
1998 switch (req->type) {
1999 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2000 /* XXX need to do something extra here.. */
2001 /* I'm leaving this in, as I've never actually seen it used,
2002 * and I'd like folks to report it... GO */
2003 printf(("WAKEUP CALLED\n"));
2004 queue->numOutstanding++;
2005
2006 bp->b_flags = 0;
2007 bp->b_private = req;
2008
2009 KernelWakeupFunc(bp);
2010 break;
2011
2012 case RF_IO_TYPE_READ:
2013 case RF_IO_TYPE_WRITE:
2014 #if RF_ACC_TRACE > 0
2015 if (req->tracerec) {
2016 RF_ETIMER_START(req->tracerec->timer);
2017 }
2018 #endif
2019 InitBP(bp, queue->rf_cinfo->ci_vp,
2020 op, queue->rf_cinfo->ci_dev,
2021 req->sectorOffset, req->numSector,
2022 req->buf, KernelWakeupFunc, (void *) req,
2023 queue->raidPtr->logBytesPerSector, req->b_proc);
2024
2025 if (rf_debugKernelAccess) {
2026 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2027 (long) bp->b_blkno));
2028 }
2029 queue->numOutstanding++;
2030 queue->last_deq_sector = req->sectorOffset;
2031 /* acc wouldn't have been let in if there were any pending
2032 * reqs at any other priority */
2033 queue->curPriority = req->priority;
2034
2035 db1_printf(("Going for %c to unit %d col %d\n",
2036 req->type, queue->raidPtr->raidid,
2037 queue->col));
2038 db1_printf(("sector %d count %d (%d bytes) %d\n",
2039 (int) req->sectorOffset, (int) req->numSector,
2040 (int) (req->numSector <<
2041 queue->raidPtr->logBytesPerSector),
2042 (int) queue->raidPtr->logBytesPerSector));
2043
2044 /*
2045 * XXX: drop lock here since this can block at
2046 * least with backing SCSI devices. Retake it
2047 * to minimize fuss with calling interfaces.
2048 */
2049
2050 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2051 bdev_strategy(bp);
2052 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2053 break;
2054
2055 default:
2056 panic("bad req->type in rf_DispatchKernelIO");
2057 }
2058 db1_printf(("Exiting from DispatchKernelIO\n"));
2059
2060 return (0);
2061 }
2062 /* this is the callback function associated with a I/O invoked from
2063 kernel code.
2064 */
2065 static void
2066 KernelWakeupFunc(struct buf *bp)
2067 {
2068 RF_DiskQueueData_t *req = NULL;
2069 RF_DiskQueue_t *queue;
2070
2071 db1_printf(("recovering the request queue:\n"));
2072
2073 req = bp->b_private;
2074
2075 queue = (RF_DiskQueue_t *) req->queue;
2076
2077 rf_lock_mutex2(queue->raidPtr->iodone_lock);
2078
2079 #if RF_ACC_TRACE > 0
2080 if (req->tracerec) {
2081 RF_ETIMER_STOP(req->tracerec->timer);
2082 RF_ETIMER_EVAL(req->tracerec->timer);
2083 rf_lock_mutex2(rf_tracing_mutex);
2084 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2085 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2086 req->tracerec->num_phys_ios++;
2087 rf_unlock_mutex2(rf_tracing_mutex);
2088 }
2089 #endif
2090
2091 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2092 * ballistic, and mark the component as hosed... */
2093
2094 if (bp->b_error != 0) {
2095 /* Mark the disk as dead */
2096 /* but only mark it once... */
2097 /* and only if it wouldn't leave this RAID set
2098 completely broken */
2099 if (((queue->raidPtr->Disks[queue->col].status ==
2100 rf_ds_optimal) ||
2101 (queue->raidPtr->Disks[queue->col].status ==
2102 rf_ds_used_spare)) &&
2103 (queue->raidPtr->numFailures <
2104 queue->raidPtr->Layout.map->faultsTolerated)) {
2105 printf("raid%d: IO Error (%d). Marking %s as failed.\n",
2106 queue->raidPtr->raidid,
2107 bp->b_error,
2108 queue->raidPtr->Disks[queue->col].devname);
2109 queue->raidPtr->Disks[queue->col].status =
2110 rf_ds_failed;
2111 queue->raidPtr->status = rf_rs_degraded;
2112 queue->raidPtr->numFailures++;
2113 queue->raidPtr->numNewFailures++;
2114 } else { /* Disk is already dead... */
2115 /* printf("Disk already marked as dead!\n"); */
2116 }
2117
2118 }
2119
2120 /* Fill in the error value */
2121 req->error = bp->b_error;
2122
2123 /* Drop this one on the "finished" queue... */
2124 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2125
2126 /* Let the raidio thread know there is work to be done. */
2127 rf_signal_cond2(queue->raidPtr->iodone_cv);
2128
2129 rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2130 }
2131
2132
2133 /*
2134 * initialize a buf structure for doing an I/O in the kernel.
2135 */
2136 static void
2137 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2138 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2139 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2140 struct proc *b_proc)
2141 {
2142 /* bp->b_flags = B_PHYS | rw_flag; */
2143 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */
2144 bp->b_oflags = 0;
2145 bp->b_cflags = 0;
2146 bp->b_bcount = numSect << logBytesPerSector;
2147 bp->b_bufsize = bp->b_bcount;
2148 bp->b_error = 0;
2149 bp->b_dev = dev;
2150 bp->b_data = bf;
2151 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2152 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2153 if (bp->b_bcount == 0) {
2154 panic("bp->b_bcount is zero in InitBP!!");
2155 }
2156 bp->b_proc = b_proc;
2157 bp->b_iodone = cbFunc;
2158 bp->b_private = cbArg;
2159 }
2160
2161 /*
2162 * Wait interruptibly for an exclusive lock.
2163 *
2164 * XXX
2165 * Several drivers do this; it should be abstracted and made MP-safe.
2166 * (Hmm... where have we seen this warning before :-> GO )
2167 */
2168 static int
2169 raidlock(struct raid_softc *rs)
2170 {
2171 int error;
2172
2173 error = 0;
2174 mutex_enter(&rs->sc_mutex);
2175 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2176 rs->sc_flags |= RAIDF_WANTED;
2177 error = cv_wait_sig(&rs->sc_cv, &rs->sc_mutex);
2178 if (error != 0)
2179 goto done;
2180 }
2181 rs->sc_flags |= RAIDF_LOCKED;
2182 done:
2183 mutex_exit(&rs->sc_mutex);
2184 return (error);
2185 }
2186 /*
2187 * Unlock and wake up any waiters.
2188 */
2189 static void
2190 raidunlock(struct raid_softc *rs)
2191 {
2192
2193 mutex_enter(&rs->sc_mutex);
2194 rs->sc_flags &= ~RAIDF_LOCKED;
2195 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2196 rs->sc_flags &= ~RAIDF_WANTED;
2197 cv_broadcast(&rs->sc_cv);
2198 }
2199 mutex_exit(&rs->sc_mutex);
2200 }
2201
2202
2203 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2204 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2205 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2206
2207 static daddr_t
2208 rf_component_info_offset(void)
2209 {
2210
2211 return RF_COMPONENT_INFO_OFFSET;
2212 }
2213
2214 static daddr_t
2215 rf_component_info_size(unsigned secsize)
2216 {
2217 daddr_t info_size;
2218
2219 KASSERT(secsize);
2220 if (secsize > RF_COMPONENT_INFO_SIZE)
2221 info_size = secsize;
2222 else
2223 info_size = RF_COMPONENT_INFO_SIZE;
2224
2225 return info_size;
2226 }
2227
2228 static daddr_t
2229 rf_parity_map_offset(RF_Raid_t *raidPtr)
2230 {
2231 daddr_t map_offset;
2232
2233 KASSERT(raidPtr->bytesPerSector);
2234 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2235 map_offset = raidPtr->bytesPerSector;
2236 else
2237 map_offset = RF_COMPONENT_INFO_SIZE;
2238 map_offset += rf_component_info_offset();
2239
2240 return map_offset;
2241 }
2242
2243 static daddr_t
2244 rf_parity_map_size(RF_Raid_t *raidPtr)
2245 {
2246 daddr_t map_size;
2247
2248 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2249 map_size = raidPtr->bytesPerSector;
2250 else
2251 map_size = RF_PARITY_MAP_SIZE;
2252
2253 return map_size;
2254 }
2255
2256 int
2257 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2258 {
2259 RF_ComponentLabel_t *clabel;
2260
2261 clabel = raidget_component_label(raidPtr, col);
2262 clabel->clean = RF_RAID_CLEAN;
2263 raidflush_component_label(raidPtr, col);
2264 return(0);
2265 }
2266
2267
2268 int
2269 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2270 {
2271 RF_ComponentLabel_t *clabel;
2272
2273 clabel = raidget_component_label(raidPtr, col);
2274 clabel->clean = RF_RAID_DIRTY;
2275 raidflush_component_label(raidPtr, col);
2276 return(0);
2277 }
2278
2279 int
2280 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2281 {
2282 KASSERT(raidPtr->bytesPerSector);
2283 return raidread_component_label(raidPtr->bytesPerSector,
2284 raidPtr->Disks[col].dev,
2285 raidPtr->raid_cinfo[col].ci_vp,
2286 &raidPtr->raid_cinfo[col].ci_label);
2287 }
2288
2289 RF_ComponentLabel_t *
2290 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2291 {
2292 return &raidPtr->raid_cinfo[col].ci_label;
2293 }
2294
2295 int
2296 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2297 {
2298 RF_ComponentLabel_t *label;
2299
2300 label = &raidPtr->raid_cinfo[col].ci_label;
2301 label->mod_counter = raidPtr->mod_counter;
2302 #ifndef RF_NO_PARITY_MAP
2303 label->parity_map_modcount = label->mod_counter;
2304 #endif
2305 return raidwrite_component_label(raidPtr->bytesPerSector,
2306 raidPtr->Disks[col].dev,
2307 raidPtr->raid_cinfo[col].ci_vp, label);
2308 }
2309
2310
2311 static int
2312 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2313 RF_ComponentLabel_t *clabel)
2314 {
2315 return raidread_component_area(dev, b_vp, clabel,
2316 sizeof(RF_ComponentLabel_t),
2317 rf_component_info_offset(),
2318 rf_component_info_size(secsize));
2319 }
2320
2321 /* ARGSUSED */
2322 static int
2323 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2324 size_t msize, daddr_t offset, daddr_t dsize)
2325 {
2326 struct buf *bp;
2327 int error;
2328
2329 /* XXX should probably ensure that we don't try to do this if
2330 someone has changed rf_protected_sectors. */
2331
2332 if (b_vp == NULL) {
2333 /* For whatever reason, this component is not valid.
2334 Don't try to read a component label from it. */
2335 return(EINVAL);
2336 }
2337
2338 /* get a block of the appropriate size... */
2339 bp = geteblk((int)dsize);
2340 bp->b_dev = dev;
2341
2342 /* get our ducks in a row for the read */
2343 bp->b_blkno = offset / DEV_BSIZE;
2344 bp->b_bcount = dsize;
2345 bp->b_flags |= B_READ;
2346 bp->b_resid = dsize;
2347
2348 bdev_strategy(bp);
2349 error = biowait(bp);
2350
2351 if (!error) {
2352 memcpy(data, bp->b_data, msize);
2353 }
2354
2355 brelse(bp, 0);
2356 return(error);
2357 }
2358
2359
2360 static int
2361 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2362 RF_ComponentLabel_t *clabel)
2363 {
2364 return raidwrite_component_area(dev, b_vp, clabel,
2365 sizeof(RF_ComponentLabel_t),
2366 rf_component_info_offset(),
2367 rf_component_info_size(secsize), 0);
2368 }
2369
2370 /* ARGSUSED */
2371 static int
2372 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2373 size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2374 {
2375 struct buf *bp;
2376 int error;
2377
2378 /* get a block of the appropriate size... */
2379 bp = geteblk((int)dsize);
2380 bp->b_dev = dev;
2381
2382 /* get our ducks in a row for the write */
2383 bp->b_blkno = offset / DEV_BSIZE;
2384 bp->b_bcount = dsize;
2385 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2386 bp->b_resid = dsize;
2387
2388 memset(bp->b_data, 0, dsize);
2389 memcpy(bp->b_data, data, msize);
2390
2391 bdev_strategy(bp);
2392 if (asyncp)
2393 return 0;
2394 error = biowait(bp);
2395 brelse(bp, 0);
2396 if (error) {
2397 #if 1
2398 printf("Failed to write RAID component info!\n");
2399 #endif
2400 }
2401
2402 return(error);
2403 }
2404
2405 void
2406 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2407 {
2408 int c;
2409
2410 for (c = 0; c < raidPtr->numCol; c++) {
2411 /* Skip dead disks. */
2412 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2413 continue;
2414 /* XXXjld: what if an error occurs here? */
2415 raidwrite_component_area(raidPtr->Disks[c].dev,
2416 raidPtr->raid_cinfo[c].ci_vp, map,
2417 RF_PARITYMAP_NBYTE,
2418 rf_parity_map_offset(raidPtr),
2419 rf_parity_map_size(raidPtr), 0);
2420 }
2421 }
2422
2423 void
2424 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2425 {
2426 struct rf_paritymap_ondisk tmp;
2427 int c,first;
2428
2429 first=1;
2430 for (c = 0; c < raidPtr->numCol; c++) {
2431 /* Skip dead disks. */
2432 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2433 continue;
2434 raidread_component_area(raidPtr->Disks[c].dev,
2435 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2436 RF_PARITYMAP_NBYTE,
2437 rf_parity_map_offset(raidPtr),
2438 rf_parity_map_size(raidPtr));
2439 if (first) {
2440 memcpy(map, &tmp, sizeof(*map));
2441 first = 0;
2442 } else {
2443 rf_paritymap_merge(map, &tmp);
2444 }
2445 }
2446 }
2447
2448 void
2449 rf_markalldirty(RF_Raid_t *raidPtr)
2450 {
2451 RF_ComponentLabel_t *clabel;
2452 int sparecol;
2453 int c;
2454 int j;
2455 int scol = -1;
2456
2457 raidPtr->mod_counter++;
2458 for (c = 0; c < raidPtr->numCol; c++) {
2459 /* we don't want to touch (at all) a disk that has
2460 failed */
2461 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2462 clabel = raidget_component_label(raidPtr, c);
2463 if (clabel->status == rf_ds_spared) {
2464 /* XXX do something special...
2465 but whatever you do, don't
2466 try to access it!! */
2467 } else {
2468 raidmarkdirty(raidPtr, c);
2469 }
2470 }
2471 }
2472
2473 for( c = 0; c < raidPtr->numSpare ; c++) {
2474 sparecol = raidPtr->numCol + c;
2475 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2476 /*
2477
2478 we claim this disk is "optimal" if it's
2479 rf_ds_used_spare, as that means it should be
2480 directly substitutable for the disk it replaced.
2481 We note that too...
2482
2483 */
2484
2485 for(j=0;j<raidPtr->numCol;j++) {
2486 if (raidPtr->Disks[j].spareCol == sparecol) {
2487 scol = j;
2488 break;
2489 }
2490 }
2491
2492 clabel = raidget_component_label(raidPtr, sparecol);
2493 /* make sure status is noted */
2494
2495 raid_init_component_label(raidPtr, clabel);
2496
2497 clabel->row = 0;
2498 clabel->column = scol;
2499 /* Note: we *don't* change status from rf_ds_used_spare
2500 to rf_ds_optimal */
2501 /* clabel.status = rf_ds_optimal; */
2502
2503 raidmarkdirty(raidPtr, sparecol);
2504 }
2505 }
2506 }
2507
2508
2509 void
2510 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2511 {
2512 RF_ComponentLabel_t *clabel;
2513 int sparecol;
2514 int c;
2515 int j;
2516 int scol;
2517 struct raid_softc *rs = raidPtr->softc;
2518
2519 scol = -1;
2520
2521 /* XXX should do extra checks to make sure things really are clean,
2522 rather than blindly setting the clean bit... */
2523
2524 raidPtr->mod_counter++;
2525
2526 for (c = 0; c < raidPtr->numCol; c++) {
2527 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2528 clabel = raidget_component_label(raidPtr, c);
2529 /* make sure status is noted */
2530 clabel->status = rf_ds_optimal;
2531
2532 /* note what unit we are configured as */
2533 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2534 clabel->last_unit = raidPtr->raidid;
2535
2536 raidflush_component_label(raidPtr, c);
2537 if (final == RF_FINAL_COMPONENT_UPDATE) {
2538 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2539 raidmarkclean(raidPtr, c);
2540 }
2541 }
2542 }
2543 /* else we don't touch it.. */
2544 }
2545
2546 for( c = 0; c < raidPtr->numSpare ; c++) {
2547 sparecol = raidPtr->numCol + c;
2548 /* Need to ensure that the reconstruct actually completed! */
2549 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2550 /*
2551
2552 we claim this disk is "optimal" if it's
2553 rf_ds_used_spare, as that means it should be
2554 directly substitutable for the disk it replaced.
2555 We note that too...
2556
2557 */
2558
2559 for(j=0;j<raidPtr->numCol;j++) {
2560 if (raidPtr->Disks[j].spareCol == sparecol) {
2561 scol = j;
2562 break;
2563 }
2564 }
2565
2566 /* XXX shouldn't *really* need this... */
2567 clabel = raidget_component_label(raidPtr, sparecol);
2568 /* make sure status is noted */
2569
2570 raid_init_component_label(raidPtr, clabel);
2571
2572 clabel->column = scol;
2573 clabel->status = rf_ds_optimal;
2574 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2575 clabel->last_unit = raidPtr->raidid;
2576
2577 raidflush_component_label(raidPtr, sparecol);
2578 if (final == RF_FINAL_COMPONENT_UPDATE) {
2579 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2580 raidmarkclean(raidPtr, sparecol);
2581 }
2582 }
2583 }
2584 }
2585 }
2586
2587 void
2588 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2589 {
2590
2591 if (vp != NULL) {
2592 if (auto_configured == 1) {
2593 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2594 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2595 vput(vp);
2596
2597 } else {
2598 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2599 }
2600 }
2601 }
2602
2603
2604 void
2605 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2606 {
2607 int r,c;
2608 struct vnode *vp;
2609 int acd;
2610
2611
2612 /* We take this opportunity to close the vnodes like we should.. */
2613
2614 for (c = 0; c < raidPtr->numCol; c++) {
2615 vp = raidPtr->raid_cinfo[c].ci_vp;
2616 acd = raidPtr->Disks[c].auto_configured;
2617 rf_close_component(raidPtr, vp, acd);
2618 raidPtr->raid_cinfo[c].ci_vp = NULL;
2619 raidPtr->Disks[c].auto_configured = 0;
2620 }
2621
2622 for (r = 0; r < raidPtr->numSpare; r++) {
2623 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2624 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2625 rf_close_component(raidPtr, vp, acd);
2626 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2627 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2628 }
2629 }
2630
2631
2632 void
2633 rf_ReconThread(struct rf_recon_req_internal *req)
2634 {
2635 int s;
2636 RF_Raid_t *raidPtr;
2637
2638 s = splbio();
2639 raidPtr = (RF_Raid_t *) req->raidPtr;
2640 raidPtr->recon_in_progress = 1;
2641
2642 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2643 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2644
2645 RF_Free(req, sizeof(*req));
2646
2647 raidPtr->recon_in_progress = 0;
2648 splx(s);
2649
2650 /* That's all... */
2651 kthread_exit(0); /* does not return */
2652 }
2653
2654 void
2655 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2656 {
2657 int retcode;
2658 int s;
2659
2660 raidPtr->parity_rewrite_stripes_done = 0;
2661 raidPtr->parity_rewrite_in_progress = 1;
2662 s = splbio();
2663 retcode = rf_RewriteParity(raidPtr);
2664 splx(s);
2665 if (retcode) {
2666 printf("raid%d: Error re-writing parity (%d)!\n",
2667 raidPtr->raidid, retcode);
2668 } else {
2669 /* set the clean bit! If we shutdown correctly,
2670 the clean bit on each component label will get
2671 set */
2672 raidPtr->parity_good = RF_RAID_CLEAN;
2673 }
2674 raidPtr->parity_rewrite_in_progress = 0;
2675
2676 /* Anyone waiting for us to stop? If so, inform them... */
2677 if (raidPtr->waitShutdown) {
2678 wakeup(&raidPtr->parity_rewrite_in_progress);
2679 }
2680
2681 /* That's all... */
2682 kthread_exit(0); /* does not return */
2683 }
2684
2685
2686 void
2687 rf_CopybackThread(RF_Raid_t *raidPtr)
2688 {
2689 int s;
2690
2691 raidPtr->copyback_in_progress = 1;
2692 s = splbio();
2693 rf_CopybackReconstructedData(raidPtr);
2694 splx(s);
2695 raidPtr->copyback_in_progress = 0;
2696
2697 /* That's all... */
2698 kthread_exit(0); /* does not return */
2699 }
2700
2701
2702 void
2703 rf_ReconstructInPlaceThread(struct rf_recon_req_internal *req)
2704 {
2705 int s;
2706 RF_Raid_t *raidPtr;
2707
2708 s = splbio();
2709 raidPtr = req->raidPtr;
2710 raidPtr->recon_in_progress = 1;
2711 rf_ReconstructInPlace(raidPtr, req->col);
2712 RF_Free(req, sizeof(*req));
2713 raidPtr->recon_in_progress = 0;
2714 splx(s);
2715
2716 /* That's all... */
2717 kthread_exit(0); /* does not return */
2718 }
2719
2720 static RF_AutoConfig_t *
2721 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
2722 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
2723 unsigned secsize)
2724 {
2725 int good_one = 0;
2726 RF_ComponentLabel_t *clabel;
2727 RF_AutoConfig_t *ac;
2728
2729 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
2730 if (clabel == NULL) {
2731 oomem:
2732 while(ac_list) {
2733 ac = ac_list;
2734 if (ac->clabel)
2735 free(ac->clabel, M_RAIDFRAME);
2736 ac_list = ac_list->next;
2737 free(ac, M_RAIDFRAME);
2738 }
2739 printf("RAID auto config: out of memory!\n");
2740 return NULL; /* XXX probably should panic? */
2741 }
2742
2743 if (!raidread_component_label(secsize, dev, vp, clabel)) {
2744 /* Got the label. Does it look reasonable? */
2745 if (rf_reasonable_label(clabel, numsecs) &&
2746 (rf_component_label_partitionsize(clabel) <= size)) {
2747 #ifdef DEBUG
2748 printf("Component on: %s: %llu\n",
2749 cname, (unsigned long long)size);
2750 rf_print_component_label(clabel);
2751 #endif
2752 /* if it's reasonable, add it, else ignore it. */
2753 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
2754 M_NOWAIT);
2755 if (ac == NULL) {
2756 free(clabel, M_RAIDFRAME);
2757 goto oomem;
2758 }
2759 strlcpy(ac->devname, cname, sizeof(ac->devname));
2760 ac->dev = dev;
2761 ac->vp = vp;
2762 ac->clabel = clabel;
2763 ac->next = ac_list;
2764 ac_list = ac;
2765 good_one = 1;
2766 }
2767 }
2768 if (!good_one) {
2769 /* cleanup */
2770 free(clabel, M_RAIDFRAME);
2771 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2772 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2773 vput(vp);
2774 }
2775 return ac_list;
2776 }
2777
2778 RF_AutoConfig_t *
2779 rf_find_raid_components(void)
2780 {
2781 struct vnode *vp;
2782 struct disklabel label;
2783 device_t dv;
2784 deviter_t di;
2785 dev_t dev;
2786 int bmajor, bminor, wedge, rf_part_found;
2787 int error;
2788 int i;
2789 RF_AutoConfig_t *ac_list;
2790 uint64_t numsecs;
2791 unsigned secsize;
2792 int dowedges;
2793
2794 /* initialize the AutoConfig list */
2795 ac_list = NULL;
2796
2797 /*
2798 * we begin by trolling through *all* the devices on the system *twice*
2799 * first we scan for wedges, second for other devices. This avoids
2800 * using a raw partition instead of a wedge that covers the whole disk
2801 */
2802
2803 for (dowedges=1; dowedges>=0; --dowedges) {
2804 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
2805 dv = deviter_next(&di)) {
2806
2807 /* we are only interested in disks... */
2808 if (device_class(dv) != DV_DISK)
2809 continue;
2810
2811 /* we don't care about floppies... */
2812 if (device_is_a(dv, "fd")) {
2813 continue;
2814 }
2815
2816 /* we don't care about CD's... */
2817 if (device_is_a(dv, "cd")) {
2818 continue;
2819 }
2820
2821 /* we don't care about md's... */
2822 if (device_is_a(dv, "md")) {
2823 continue;
2824 }
2825
2826 /* hdfd is the Atari/Hades floppy driver */
2827 if (device_is_a(dv, "hdfd")) {
2828 continue;
2829 }
2830
2831 /* fdisa is the Atari/Milan floppy driver */
2832 if (device_is_a(dv, "fdisa")) {
2833 continue;
2834 }
2835
2836 /* are we in the wedges pass ? */
2837 wedge = device_is_a(dv, "dk");
2838 if (wedge != dowedges) {
2839 continue;
2840 }
2841
2842 /* need to find the device_name_to_block_device_major stuff */
2843 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
2844
2845 rf_part_found = 0; /*No raid partition as yet*/
2846
2847 /* get a vnode for the raw partition of this disk */
2848 bminor = minor(device_unit(dv));
2849 dev = wedge ? makedev(bmajor, bminor) :
2850 MAKEDISKDEV(bmajor, bminor, RAW_PART);
2851 if (bdevvp(dev, &vp))
2852 panic("RAID can't alloc vnode");
2853
2854 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
2855
2856 if (error) {
2857 /* "Who cares." Continue looking
2858 for something that exists*/
2859 vput(vp);
2860 continue;
2861 }
2862
2863 error = getdisksize(vp, &numsecs, &secsize);
2864 if (error) {
2865 /*
2866 * Pseudo devices like vnd and cgd can be
2867 * opened but may still need some configuration.
2868 * Ignore these quietly.
2869 */
2870 if (error != ENXIO)
2871 printf("RAIDframe: can't get disk size"
2872 " for dev %s (%d)\n",
2873 device_xname(dv), error);
2874 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2875 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2876 vput(vp);
2877 continue;
2878 }
2879 if (wedge) {
2880 struct dkwedge_info dkw;
2881 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
2882 NOCRED);
2883 if (error) {
2884 printf("RAIDframe: can't get wedge info for "
2885 "dev %s (%d)\n", device_xname(dv), error);
2886 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2887 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2888 vput(vp);
2889 continue;
2890 }
2891
2892 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
2893 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2894 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2895 vput(vp);
2896 continue;
2897 }
2898
2899 ac_list = rf_get_component(ac_list, dev, vp,
2900 device_xname(dv), dkw.dkw_size, numsecs, secsize);
2901 rf_part_found = 1; /*There is a raid component on this disk*/
2902 continue;
2903 }
2904
2905 /* Ok, the disk exists. Go get the disklabel. */
2906 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
2907 if (error) {
2908 /*
2909 * XXX can't happen - open() would
2910 * have errored out (or faked up one)
2911 */
2912 if (error != ENOTTY)
2913 printf("RAIDframe: can't get label for dev "
2914 "%s (%d)\n", device_xname(dv), error);
2915 }
2916
2917 /* don't need this any more. We'll allocate it again
2918 a little later if we really do... */
2919 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2920 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2921 vput(vp);
2922
2923 if (error)
2924 continue;
2925
2926 rf_part_found = 0; /*No raid partitions yet*/
2927 for (i = 0; i < label.d_npartitions; i++) {
2928 char cname[sizeof(ac_list->devname)];
2929
2930 /* We only support partitions marked as RAID */
2931 if (label.d_partitions[i].p_fstype != FS_RAID)
2932 continue;
2933
2934 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
2935 if (bdevvp(dev, &vp))
2936 panic("RAID can't alloc vnode");
2937
2938 error = VOP_OPEN(vp, FREAD, NOCRED);
2939 if (error) {
2940 /* Whatever... */
2941 vput(vp);
2942 continue;
2943 }
2944 snprintf(cname, sizeof(cname), "%s%c",
2945 device_xname(dv), 'a' + i);
2946 ac_list = rf_get_component(ac_list, dev, vp, cname,
2947 label.d_partitions[i].p_size, numsecs, secsize);
2948 rf_part_found = 1; /*There is at least one raid partition on this disk*/
2949 }
2950
2951 /*
2952 *If there is no raid component on this disk, either in a
2953 *disklabel or inside a wedge, check the raw partition as well,
2954 *as it is possible to configure raid components on raw disk
2955 *devices.
2956 */
2957
2958 if (!rf_part_found) {
2959 char cname[sizeof(ac_list->devname)];
2960
2961 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
2962 if (bdevvp(dev, &vp))
2963 panic("RAID can't alloc vnode");
2964
2965 error = VOP_OPEN(vp, FREAD, NOCRED);
2966 if (error) {
2967 /* Whatever... */
2968 vput(vp);
2969 continue;
2970 }
2971 snprintf(cname, sizeof(cname), "%s%c",
2972 device_xname(dv), 'a' + RAW_PART);
2973 ac_list = rf_get_component(ac_list, dev, vp, cname,
2974 label.d_partitions[RAW_PART].p_size, numsecs, secsize);
2975 }
2976 }
2977 deviter_release(&di);
2978 }
2979 return ac_list;
2980 }
2981
2982
2983 int
2984 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
2985 {
2986
2987 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2988 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2989 ((clabel->clean == RF_RAID_CLEAN) ||
2990 (clabel->clean == RF_RAID_DIRTY)) &&
2991 clabel->row >=0 &&
2992 clabel->column >= 0 &&
2993 clabel->num_rows > 0 &&
2994 clabel->num_columns > 0 &&
2995 clabel->row < clabel->num_rows &&
2996 clabel->column < clabel->num_columns &&
2997 clabel->blockSize > 0 &&
2998 /*
2999 * numBlocksHi may contain garbage, but it is ok since
3000 * the type is unsigned. If it is really garbage,
3001 * rf_fix_old_label_size() will fix it.
3002 */
3003 rf_component_label_numblocks(clabel) > 0) {
3004 /*
3005 * label looks reasonable enough...
3006 * let's make sure it has no old garbage.
3007 */
3008 if (numsecs)
3009 rf_fix_old_label_size(clabel, numsecs);
3010 return(1);
3011 }
3012 return(0);
3013 }
3014
3015
3016 /*
3017 * For reasons yet unknown, some old component labels have garbage in
3018 * the newer numBlocksHi region, and this causes lossage. Since those
3019 * disks will also have numsecs set to less than 32 bits of sectors,
3020 * we can determine when this corruption has occurred, and fix it.
3021 *
3022 * The exact same problem, with the same unknown reason, happens to
3023 * the partitionSizeHi member as well.
3024 */
3025 static void
3026 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3027 {
3028
3029 if (numsecs < ((uint64_t)1 << 32)) {
3030 if (clabel->numBlocksHi) {
3031 printf("WARNING: total sectors < 32 bits, yet "
3032 "numBlocksHi set\n"
3033 "WARNING: resetting numBlocksHi to zero.\n");
3034 clabel->numBlocksHi = 0;
3035 }
3036
3037 if (clabel->partitionSizeHi) {
3038 printf("WARNING: total sectors < 32 bits, yet "
3039 "partitionSizeHi set\n"
3040 "WARNING: resetting partitionSizeHi to zero.\n");
3041 clabel->partitionSizeHi = 0;
3042 }
3043 }
3044 }
3045
3046
3047 #ifdef DEBUG
3048 void
3049 rf_print_component_label(RF_ComponentLabel_t *clabel)
3050 {
3051 uint64_t numBlocks;
3052 static const char *rp[] = {
3053 "No", "Force", "Soft", "*invalid*"
3054 };
3055
3056
3057 numBlocks = rf_component_label_numblocks(clabel);
3058
3059 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3060 clabel->row, clabel->column,
3061 clabel->num_rows, clabel->num_columns);
3062 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3063 clabel->version, clabel->serial_number,
3064 clabel->mod_counter);
3065 printf(" Clean: %s Status: %d\n",
3066 clabel->clean ? "Yes" : "No", clabel->status);
3067 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3068 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3069 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3070 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3071 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3072 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]);
3073 printf(" Last configured as: raid%d\n", clabel->last_unit);
3074 #if 0
3075 printf(" Config order: %d\n", clabel->config_order);
3076 #endif
3077
3078 }
3079 #endif
3080
3081 RF_ConfigSet_t *
3082 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3083 {
3084 RF_AutoConfig_t *ac;
3085 RF_ConfigSet_t *config_sets;
3086 RF_ConfigSet_t *cset;
3087 RF_AutoConfig_t *ac_next;
3088
3089
3090 config_sets = NULL;
3091
3092 /* Go through the AutoConfig list, and figure out which components
3093 belong to what sets. */
3094 ac = ac_list;
3095 while(ac!=NULL) {
3096 /* we're going to putz with ac->next, so save it here
3097 for use at the end of the loop */
3098 ac_next = ac->next;
3099
3100 if (config_sets == NULL) {
3101 /* will need at least this one... */
3102 config_sets = (RF_ConfigSet_t *)
3103 malloc(sizeof(RF_ConfigSet_t),
3104 M_RAIDFRAME, M_NOWAIT);
3105 if (config_sets == NULL) {
3106 panic("rf_create_auto_sets: No memory!");
3107 }
3108 /* this one is easy :) */
3109 config_sets->ac = ac;
3110 config_sets->next = NULL;
3111 config_sets->rootable = 0;
3112 ac->next = NULL;
3113 } else {
3114 /* which set does this component fit into? */
3115 cset = config_sets;
3116 while(cset!=NULL) {
3117 if (rf_does_it_fit(cset, ac)) {
3118 /* looks like it matches... */
3119 ac->next = cset->ac;
3120 cset->ac = ac;
3121 break;
3122 }
3123 cset = cset->next;
3124 }
3125 if (cset==NULL) {
3126 /* didn't find a match above... new set..*/
3127 cset = (RF_ConfigSet_t *)
3128 malloc(sizeof(RF_ConfigSet_t),
3129 M_RAIDFRAME, M_NOWAIT);
3130 if (cset == NULL) {
3131 panic("rf_create_auto_sets: No memory!");
3132 }
3133 cset->ac = ac;
3134 ac->next = NULL;
3135 cset->next = config_sets;
3136 cset->rootable = 0;
3137 config_sets = cset;
3138 }
3139 }
3140 ac = ac_next;
3141 }
3142
3143
3144 return(config_sets);
3145 }
3146
3147 static int
3148 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3149 {
3150 RF_ComponentLabel_t *clabel1, *clabel2;
3151
3152 /* If this one matches the *first* one in the set, that's good
3153 enough, since the other members of the set would have been
3154 through here too... */
3155 /* note that we are not checking partitionSize here..
3156
3157 Note that we are also not checking the mod_counters here.
3158 If everything else matches except the mod_counter, that's
3159 good enough for this test. We will deal with the mod_counters
3160 a little later in the autoconfiguration process.
3161
3162 (clabel1->mod_counter == clabel2->mod_counter) &&
3163
3164 The reason we don't check for this is that failed disks
3165 will have lower modification counts. If those disks are
3166 not added to the set they used to belong to, then they will
3167 form their own set, which may result in 2 different sets,
3168 for example, competing to be configured at raid0, and
3169 perhaps competing to be the root filesystem set. If the
3170 wrong ones get configured, or both attempt to become /,
3171 weird behaviour and or serious lossage will occur. Thus we
3172 need to bring them into the fold here, and kick them out at
3173 a later point.
3174
3175 */
3176
3177 clabel1 = cset->ac->clabel;
3178 clabel2 = ac->clabel;
3179 if ((clabel1->version == clabel2->version) &&
3180 (clabel1->serial_number == clabel2->serial_number) &&
3181 (clabel1->num_rows == clabel2->num_rows) &&
3182 (clabel1->num_columns == clabel2->num_columns) &&
3183 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3184 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3185 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3186 (clabel1->parityConfig == clabel2->parityConfig) &&
3187 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3188 (clabel1->blockSize == clabel2->blockSize) &&
3189 rf_component_label_numblocks(clabel1) ==
3190 rf_component_label_numblocks(clabel2) &&
3191 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3192 (clabel1->root_partition == clabel2->root_partition) &&
3193 (clabel1->last_unit == clabel2->last_unit) &&
3194 (clabel1->config_order == clabel2->config_order)) {
3195 /* if it get's here, it almost *has* to be a match */
3196 } else {
3197 /* it's not consistent with somebody in the set..
3198 punt */
3199 return(0);
3200 }
3201 /* all was fine.. it must fit... */
3202 return(1);
3203 }
3204
3205 int
3206 rf_have_enough_components(RF_ConfigSet_t *cset)
3207 {
3208 RF_AutoConfig_t *ac;
3209 RF_AutoConfig_t *auto_config;
3210 RF_ComponentLabel_t *clabel;
3211 int c;
3212 int num_cols;
3213 int num_missing;
3214 int mod_counter;
3215 int mod_counter_found;
3216 int even_pair_failed;
3217 char parity_type;
3218
3219
3220 /* check to see that we have enough 'live' components
3221 of this set. If so, we can configure it if necessary */
3222
3223 num_cols = cset->ac->clabel->num_columns;
3224 parity_type = cset->ac->clabel->parityConfig;
3225
3226 /* XXX Check for duplicate components!?!?!? */
3227
3228 /* Determine what the mod_counter is supposed to be for this set. */
3229
3230 mod_counter_found = 0;
3231 mod_counter = 0;
3232 ac = cset->ac;
3233 while(ac!=NULL) {
3234 if (mod_counter_found==0) {
3235 mod_counter = ac->clabel->mod_counter;
3236 mod_counter_found = 1;
3237 } else {
3238 if (ac->clabel->mod_counter > mod_counter) {
3239 mod_counter = ac->clabel->mod_counter;
3240 }
3241 }
3242 ac = ac->next;
3243 }
3244
3245 num_missing = 0;
3246 auto_config = cset->ac;
3247
3248 even_pair_failed = 0;
3249 for(c=0; c<num_cols; c++) {
3250 ac = auto_config;
3251 while(ac!=NULL) {
3252 if ((ac->clabel->column == c) &&
3253 (ac->clabel->mod_counter == mod_counter)) {
3254 /* it's this one... */
3255 #ifdef DEBUG
3256 printf("Found: %s at %d\n",
3257 ac->devname,c);
3258 #endif
3259 break;
3260 }
3261 ac=ac->next;
3262 }
3263 if (ac==NULL) {
3264 /* Didn't find one here! */
3265 /* special case for RAID 1, especially
3266 where there are more than 2
3267 components (where RAIDframe treats
3268 things a little differently :( ) */
3269 if (parity_type == '1') {
3270 if (c%2 == 0) { /* even component */
3271 even_pair_failed = 1;
3272 } else { /* odd component. If
3273 we're failed, and
3274 so is the even
3275 component, it's
3276 "Good Night, Charlie" */
3277 if (even_pair_failed == 1) {
3278 return(0);
3279 }
3280 }
3281 } else {
3282 /* normal accounting */
3283 num_missing++;
3284 }
3285 }
3286 if ((parity_type == '1') && (c%2 == 1)) {
3287 /* Just did an even component, and we didn't
3288 bail.. reset the even_pair_failed flag,
3289 and go on to the next component.... */
3290 even_pair_failed = 0;
3291 }
3292 }
3293
3294 clabel = cset->ac->clabel;
3295
3296 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3297 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3298 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3299 /* XXX this needs to be made *much* more general */
3300 /* Too many failures */
3301 return(0);
3302 }
3303 /* otherwise, all is well, and we've got enough to take a kick
3304 at autoconfiguring this set */
3305 return(1);
3306 }
3307
3308 void
3309 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3310 RF_Raid_t *raidPtr)
3311 {
3312 RF_ComponentLabel_t *clabel;
3313 int i;
3314
3315 clabel = ac->clabel;
3316
3317 /* 1. Fill in the common stuff */
3318 config->numCol = clabel->num_columns;
3319 config->numSpare = 0; /* XXX should this be set here? */
3320 config->sectPerSU = clabel->sectPerSU;
3321 config->SUsPerPU = clabel->SUsPerPU;
3322 config->SUsPerRU = clabel->SUsPerRU;
3323 config->parityConfig = clabel->parityConfig;
3324 /* XXX... */
3325 strcpy(config->diskQueueType,"fifo");
3326 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3327 config->layoutSpecificSize = 0; /* XXX ?? */
3328
3329 while(ac!=NULL) {
3330 /* row/col values will be in range due to the checks
3331 in reasonable_label() */
3332 strcpy(config->devnames[0][ac->clabel->column],
3333 ac->devname);
3334 ac = ac->next;
3335 }
3336
3337 for(i=0;i<RF_MAXDBGV;i++) {
3338 config->debugVars[i][0] = 0;
3339 }
3340 }
3341
3342 int
3343 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3344 {
3345 RF_ComponentLabel_t *clabel;
3346 int column;
3347 int sparecol;
3348
3349 raidPtr->autoconfigure = new_value;
3350
3351 for(column=0; column<raidPtr->numCol; column++) {
3352 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3353 clabel = raidget_component_label(raidPtr, column);
3354 clabel->autoconfigure = new_value;
3355 raidflush_component_label(raidPtr, column);
3356 }
3357 }
3358 for(column = 0; column < raidPtr->numSpare ; column++) {
3359 sparecol = raidPtr->numCol + column;
3360 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3361 clabel = raidget_component_label(raidPtr, sparecol);
3362 clabel->autoconfigure = new_value;
3363 raidflush_component_label(raidPtr, sparecol);
3364 }
3365 }
3366 return(new_value);
3367 }
3368
3369 int
3370 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3371 {
3372 RF_ComponentLabel_t *clabel;
3373 int column;
3374 int sparecol;
3375
3376 raidPtr->root_partition = new_value;
3377 for(column=0; column<raidPtr->numCol; column++) {
3378 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3379 clabel = raidget_component_label(raidPtr, column);
3380 clabel->root_partition = new_value;
3381 raidflush_component_label(raidPtr, column);
3382 }
3383 }
3384 for(column = 0; column < raidPtr->numSpare ; column++) {
3385 sparecol = raidPtr->numCol + column;
3386 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3387 clabel = raidget_component_label(raidPtr, sparecol);
3388 clabel->root_partition = new_value;
3389 raidflush_component_label(raidPtr, sparecol);
3390 }
3391 }
3392 return(new_value);
3393 }
3394
3395 void
3396 rf_release_all_vps(RF_ConfigSet_t *cset)
3397 {
3398 RF_AutoConfig_t *ac;
3399
3400 ac = cset->ac;
3401 while(ac!=NULL) {
3402 /* Close the vp, and give it back */
3403 if (ac->vp) {
3404 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3405 VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
3406 vput(ac->vp);
3407 ac->vp = NULL;
3408 }
3409 ac = ac->next;
3410 }
3411 }
3412
3413
3414 void
3415 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3416 {
3417 RF_AutoConfig_t *ac;
3418 RF_AutoConfig_t *next_ac;
3419
3420 ac = cset->ac;
3421 while(ac!=NULL) {
3422 next_ac = ac->next;
3423 /* nuke the label */
3424 free(ac->clabel, M_RAIDFRAME);
3425 /* cleanup the config structure */
3426 free(ac, M_RAIDFRAME);
3427 /* "next.." */
3428 ac = next_ac;
3429 }
3430 /* and, finally, nuke the config set */
3431 free(cset, M_RAIDFRAME);
3432 }
3433
3434
3435 void
3436 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3437 {
3438 /* current version number */
3439 clabel->version = RF_COMPONENT_LABEL_VERSION;
3440 clabel->serial_number = raidPtr->serial_number;
3441 clabel->mod_counter = raidPtr->mod_counter;
3442
3443 clabel->num_rows = 1;
3444 clabel->num_columns = raidPtr->numCol;
3445 clabel->clean = RF_RAID_DIRTY; /* not clean */
3446 clabel->status = rf_ds_optimal; /* "It's good!" */
3447
3448 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3449 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3450 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3451
3452 clabel->blockSize = raidPtr->bytesPerSector;
3453 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3454
3455 /* XXX not portable */
3456 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3457 clabel->maxOutstanding = raidPtr->maxOutstanding;
3458 clabel->autoconfigure = raidPtr->autoconfigure;
3459 clabel->root_partition = raidPtr->root_partition;
3460 clabel->last_unit = raidPtr->raidid;
3461 clabel->config_order = raidPtr->config_order;
3462
3463 #ifndef RF_NO_PARITY_MAP
3464 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3465 #endif
3466 }
3467
3468 struct raid_softc *
3469 rf_auto_config_set(RF_ConfigSet_t *cset)
3470 {
3471 RF_Raid_t *raidPtr;
3472 RF_Config_t *config;
3473 int raidID;
3474 struct raid_softc *sc;
3475
3476 #ifdef DEBUG
3477 printf("RAID autoconfigure\n");
3478 #endif
3479
3480 /* 1. Create a config structure */
3481 config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO);
3482 if (config == NULL) {
3483 printf("%s: Out of mem - config!?!?\n", __func__);
3484 /* XXX do something more intelligent here. */
3485 return NULL;
3486 }
3487
3488 /*
3489 2. Figure out what RAID ID this one is supposed to live at
3490 See if we can get the same RAID dev that it was configured
3491 on last time..
3492 */
3493
3494 raidID = cset->ac->clabel->last_unit;
3495 for (sc = raidget(raidID, false); sc && sc->sc_r.valid != 0;
3496 sc = raidget(++raidID, false))
3497 continue;
3498 #ifdef DEBUG
3499 printf("Configuring raid%d:\n",raidID);
3500 #endif
3501
3502 if (sc == NULL)
3503 sc = raidget(raidID, true);
3504 if (sc == NULL) {
3505 printf("%s: Out of mem - softc!?!?\n", __func__);
3506 /* XXX do something more intelligent here. */
3507 free(config, M_RAIDFRAME);
3508 return NULL;
3509 }
3510
3511 raidPtr = &sc->sc_r;
3512
3513 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3514 raidPtr->softc = sc;
3515 raidPtr->raidid = raidID;
3516 raidPtr->openings = RAIDOUTSTANDING;
3517
3518 /* 3. Build the configuration structure */
3519 rf_create_configuration(cset->ac, config, raidPtr);
3520
3521 /* 4. Do the configuration */
3522 if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3523 raidinit(sc);
3524
3525 rf_markalldirty(raidPtr);
3526 raidPtr->autoconfigure = 1; /* XXX do this here? */
3527 switch (cset->ac->clabel->root_partition) {
3528 case 1: /* Force Root */
3529 case 2: /* Soft Root: root when boot partition part of raid */
3530 /*
3531 * everything configured just fine. Make a note
3532 * that this set is eligible to be root,
3533 * or forced to be root
3534 */
3535 cset->rootable = cset->ac->clabel->root_partition;
3536 /* XXX do this here? */
3537 raidPtr->root_partition = cset->rootable;
3538 break;
3539 default:
3540 break;
3541 }
3542 } else {
3543 raidput(sc);
3544 sc = NULL;
3545 }
3546
3547 /* 5. Cleanup */
3548 free(config, M_RAIDFRAME);
3549 return sc;
3550 }
3551
3552 void
3553 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3554 size_t xmin, size_t xmax)
3555 {
3556 int error;
3557
3558 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3559 pool_sethiwat(p, xmax);
3560 if ((error = pool_prime(p, xmin)) != 0)
3561 panic("%s: failed to prime pool: %d", __func__, error);
3562 pool_setlowat(p, xmin);
3563 }
3564
3565 /*
3566 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buffer queue
3567 * to see if there is IO pending and if that IO could possibly be done
3568 * for a given RAID set. Returns 0 if IO is waiting and can be done, 1
3569 * otherwise.
3570 *
3571 */
3572 int
3573 rf_buf_queue_check(RF_Raid_t *raidPtr)
3574 {
3575 struct raid_softc *rs;
3576 struct dk_softc *dksc;
3577
3578 rs = raidPtr->softc;
3579 dksc = &rs->sc_dksc;
3580
3581 if ((rs->sc_flags & RAIDF_INITED) == 0)
3582 return 1;
3583
3584 if (dk_strategy_pending(dksc) && raidPtr->openings > 0) {
3585 /* there is work to do */
3586 return 0;
3587 }
3588 /* default is nothing to do */
3589 return 1;
3590 }
3591
3592 int
3593 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3594 {
3595 uint64_t numsecs;
3596 unsigned secsize;
3597 int error;
3598
3599 error = getdisksize(vp, &numsecs, &secsize);
3600 if (error == 0) {
3601 diskPtr->blockSize = secsize;
3602 diskPtr->numBlocks = numsecs - rf_protectedSectors;
3603 diskPtr->partitionSize = numsecs;
3604 return 0;
3605 }
3606 return error;
3607 }
3608
3609 static int
3610 raid_match(device_t self, cfdata_t cfdata, void *aux)
3611 {
3612 return 1;
3613 }
3614
3615 static void
3616 raid_attach(device_t parent, device_t self, void *aux)
3617 {
3618 }
3619
3620
3621 static int
3622 raid_detach(device_t self, int flags)
3623 {
3624 int error;
3625 struct raid_softc *rs = raidsoftc(self);
3626
3627 if (rs == NULL)
3628 return ENXIO;
3629
3630 if ((error = raidlock(rs)) != 0)
3631 return (error);
3632
3633 error = raid_detach_unlocked(rs);
3634
3635 raidunlock(rs);
3636
3637 /* XXX raid can be referenced here */
3638
3639 if (error)
3640 return error;
3641
3642 /* Free the softc */
3643 raidput(rs);
3644
3645 return 0;
3646 }
3647
3648 static void
3649 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
3650 {
3651 struct dk_softc *dksc = &rs->sc_dksc;
3652 struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
3653
3654 memset(dg, 0, sizeof(*dg));
3655
3656 dg->dg_secperunit = raidPtr->totalSectors;
3657 dg->dg_secsize = raidPtr->bytesPerSector;
3658 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
3659 dg->dg_ntracks = 4 * raidPtr->numCol;
3660
3661 disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, NULL);
3662 }
3663
3664 /*
3665 * Get cache info for all the components (including spares).
3666 * Returns intersection of all the cache flags of all disks, or first
3667 * error if any encountered.
3668 * XXXfua feature flags can change as spares are added - lock down somehow
3669 */
3670 static int
3671 rf_get_component_caches(RF_Raid_t *raidPtr, int *data)
3672 {
3673 int c;
3674 int error;
3675 int dkwhole = 0, dkpart;
3676
3677 for (c = 0; c < raidPtr->numCol + raidPtr->numSpare; c++) {
3678 /*
3679 * Check any non-dead disk, even when currently being
3680 * reconstructed.
3681 */
3682 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)
3683 || raidPtr->Disks[c].status == rf_ds_reconstructing) {
3684 error = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp,
3685 DIOCGCACHE, &dkpart, FREAD, NOCRED);
3686 if (error) {
3687 if (error != ENODEV) {
3688 printf("raid%d: get cache for component %s failed\n",
3689 raidPtr->raidid,
3690 raidPtr->Disks[c].devname);
3691 }
3692
3693 return error;
3694 }
3695
3696 if (c == 0)
3697 dkwhole = dkpart;
3698 else
3699 dkwhole = DKCACHE_COMBINE(dkwhole, dkpart);
3700 }
3701 }
3702
3703 *data = dkwhole;
3704
3705 return 0;
3706 }
3707
3708 /*
3709 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3710 * We end up returning whatever error was returned by the first cache flush
3711 * that fails.
3712 */
3713
3714 int
3715 rf_sync_component_caches(RF_Raid_t *raidPtr)
3716 {
3717 int c, sparecol;
3718 int e,error;
3719 int force = 1;
3720
3721 error = 0;
3722 for (c = 0; c < raidPtr->numCol; c++) {
3723 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3724 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3725 &force, FWRITE, NOCRED);
3726 if (e) {
3727 if (e != ENODEV)
3728 printf("raid%d: cache flush to component %s failed.\n",
3729 raidPtr->raidid, raidPtr->Disks[c].devname);
3730 if (error == 0) {
3731 error = e;
3732 }
3733 }
3734 }
3735 }
3736
3737 for( c = 0; c < raidPtr->numSpare ; c++) {
3738 sparecol = raidPtr->numCol + c;
3739 /* Need to ensure that the reconstruct actually completed! */
3740 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3741 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3742 DIOCCACHESYNC, &force, FWRITE, NOCRED);
3743 if (e) {
3744 if (e != ENODEV)
3745 printf("raid%d: cache flush to component %s failed.\n",
3746 raidPtr->raidid, raidPtr->Disks[sparecol].devname);
3747 if (error == 0) {
3748 error = e;
3749 }
3750 }
3751 }
3752 }
3753 return error;
3754 }
3755
3756 /* Fill in info with the current status */
3757 void
3758 rf_check_recon_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3759 {
3760
3761 if (raidPtr->status != rf_rs_reconstructing) {
3762 info->total = 100;
3763 info->completed = 100;
3764 } else {
3765 info->total = raidPtr->reconControl->numRUsTotal;
3766 info->completed = raidPtr->reconControl->numRUsComplete;
3767 }
3768 info->remaining = info->total - info->completed;
3769 }
3770
3771 /* Fill in info with the current status */
3772 void
3773 rf_check_parityrewrite_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3774 {
3775
3776 if (raidPtr->parity_rewrite_in_progress == 1) {
3777 info->total = raidPtr->Layout.numStripe;
3778 info->completed = raidPtr->parity_rewrite_stripes_done;
3779 } else {
3780 info->completed = 100;
3781 info->total = 100;
3782 }
3783 info->remaining = info->total - info->completed;
3784 }
3785
3786 /* Fill in info with the current status */
3787 void
3788 rf_check_copyback_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3789 {
3790
3791 if (raidPtr->copyback_in_progress == 1) {
3792 info->total = raidPtr->Layout.numStripe;
3793 info->completed = raidPtr->copyback_stripes_done;
3794 info->remaining = info->total - info->completed;
3795 } else {
3796 info->remaining = 0;
3797 info->completed = 100;
3798 info->total = 100;
3799 }
3800 }
3801
3802 /* Fill in config with the current info */
3803 int
3804 rf_get_info(RF_Raid_t *raidPtr, RF_DeviceConfig_t *config)
3805 {
3806 int d, i, j;
3807
3808 if (!raidPtr->valid)
3809 return (ENODEV);
3810 config->cols = raidPtr->numCol;
3811 config->ndevs = raidPtr->numCol;
3812 if (config->ndevs >= RF_MAX_DISKS)
3813 return (ENOMEM);
3814 config->nspares = raidPtr->numSpare;
3815 if (config->nspares >= RF_MAX_DISKS)
3816 return (ENOMEM);
3817 config->maxqdepth = raidPtr->maxQueueDepth;
3818 d = 0;
3819 for (j = 0; j < config->cols; j++) {
3820 config->devs[d] = raidPtr->Disks[j];
3821 d++;
3822 }
3823 for (j = config->cols, i = 0; i < config->nspares; i++, j++) {
3824 config->spares[i] = raidPtr->Disks[j];
3825 if (config->spares[i].status == rf_ds_rebuilding_spare) {
3826 /* XXX: raidctl(8) expects to see this as a used spare */
3827 config->spares[i].status = rf_ds_used_spare;
3828 }
3829 }
3830 return 0;
3831 }
3832
3833 int
3834 rf_get_component_label(RF_Raid_t *raidPtr, void *data)
3835 {
3836 RF_ComponentLabel_t *clabel = (RF_ComponentLabel_t *)data;
3837 RF_ComponentLabel_t *raid_clabel;
3838 int column = clabel->column;
3839
3840 if ((column < 0) || (column >= raidPtr->numCol + raidPtr->numSpare))
3841 return EINVAL;
3842 raid_clabel = raidget_component_label(raidPtr, column);
3843 memcpy(clabel, raid_clabel, sizeof *clabel);
3844
3845 return 0;
3846 }
3847
3848 /*
3849 * Module interface
3850 */
3851
3852 MODULE(MODULE_CLASS_DRIVER, raid, "dk_subr,bufq_fcfs");
3853
3854 #ifdef _MODULE
3855 CFDRIVER_DECL(raid, DV_DISK, NULL);
3856 #endif
3857
3858 static int raid_modcmd(modcmd_t, void *);
3859 static int raid_modcmd_init(void);
3860 static int raid_modcmd_fini(void);
3861
3862 static int
3863 raid_modcmd(modcmd_t cmd, void *data)
3864 {
3865 int error;
3866
3867 error = 0;
3868 switch (cmd) {
3869 case MODULE_CMD_INIT:
3870 error = raid_modcmd_init();
3871 break;
3872 case MODULE_CMD_FINI:
3873 error = raid_modcmd_fini();
3874 break;
3875 default:
3876 error = ENOTTY;
3877 break;
3878 }
3879 return error;
3880 }
3881
3882 static int
3883 raid_modcmd_init(void)
3884 {
3885 int error;
3886 int bmajor, cmajor;
3887
3888 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
3889 mutex_enter(&raid_lock);
3890 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
3891 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
3892 rf_init_cond2(rf_sparet_wait_cv, "sparetw");
3893 rf_init_cond2(rf_sparet_resp_cv, "rfgst");
3894
3895 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
3896 #endif
3897
3898 bmajor = cmajor = -1;
3899 error = devsw_attach("raid", &raid_bdevsw, &bmajor,
3900 &raid_cdevsw, &cmajor);
3901 if (error != 0 && error != EEXIST) {
3902 aprint_error("%s: devsw_attach failed %d\n", __func__, error);
3903 mutex_exit(&raid_lock);
3904 return error;
3905 }
3906 #ifdef _MODULE
3907 error = config_cfdriver_attach(&raid_cd);
3908 if (error != 0) {
3909 aprint_error("%s: config_cfdriver_attach failed %d\n",
3910 __func__, error);
3911 devsw_detach(&raid_bdevsw, &raid_cdevsw);
3912 mutex_exit(&raid_lock);
3913 return error;
3914 }
3915 #endif
3916 error = config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3917 if (error != 0) {
3918 aprint_error("%s: config_cfattach_attach failed %d\n",
3919 __func__, error);
3920 #ifdef _MODULE
3921 config_cfdriver_detach(&raid_cd);
3922 #endif
3923 devsw_detach(&raid_bdevsw, &raid_cdevsw);
3924 mutex_exit(&raid_lock);
3925 return error;
3926 }
3927
3928 raidautoconfigdone = false;
3929
3930 mutex_exit(&raid_lock);
3931
3932 if (error == 0) {
3933 if (rf_BootRaidframe(true) == 0)
3934 aprint_verbose("Kernelized RAIDframe activated\n");
3935 else
3936 panic("Serious error activating RAID!!");
3937 }
3938
3939 /*
3940 * Register a finalizer which will be used to auto-config RAID
3941 * sets once all real hardware devices have been found.
3942 */
3943 error = config_finalize_register(NULL, rf_autoconfig);
3944 if (error != 0) {
3945 aprint_error("WARNING: unable to register RAIDframe "
3946 "finalizer\n");
3947 error = 0;
3948 }
3949
3950 return error;
3951 }
3952
3953 static int
3954 raid_modcmd_fini(void)
3955 {
3956 int error;
3957
3958 mutex_enter(&raid_lock);
3959
3960 /* Don't allow unload if raid device(s) exist. */
3961 if (!LIST_EMPTY(&raids)) {
3962 mutex_exit(&raid_lock);
3963 return EBUSY;
3964 }
3965
3966 error = config_cfattach_detach(raid_cd.cd_name, &raid_ca);
3967 if (error != 0) {
3968 aprint_error("%s: cannot detach cfattach\n",__func__);
3969 mutex_exit(&raid_lock);
3970 return error;
3971 }
3972 #ifdef _MODULE
3973 error = config_cfdriver_detach(&raid_cd);
3974 if (error != 0) {
3975 aprint_error("%s: cannot detach cfdriver\n",__func__);
3976 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3977 mutex_exit(&raid_lock);
3978 return error;
3979 }
3980 #endif
3981 error = devsw_detach(&raid_bdevsw, &raid_cdevsw);
3982 if (error != 0) {
3983 aprint_error("%s: cannot detach devsw\n",__func__);
3984 #ifdef _MODULE
3985 config_cfdriver_attach(&raid_cd);
3986 #endif
3987 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3988 mutex_exit(&raid_lock);
3989 return error;
3990 }
3991 rf_BootRaidframe(false);
3992 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
3993 rf_destroy_mutex2(rf_sparet_wait_mutex);
3994 rf_destroy_cond2(rf_sparet_wait_cv);
3995 rf_destroy_cond2(rf_sparet_resp_cv);
3996 #endif
3997 mutex_exit(&raid_lock);
3998 mutex_destroy(&raid_lock);
3999
4000 return error;
4001 }
4002