rf_netbsdkintf.c revision 1.376.4.6 1 /* $NetBSD: rf_netbsdkintf.c,v 1.376.4.6 2023/10/18 12:11:52 martin Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
89 * School of Computer Science
90 * Carnegie Mellon University
91 * Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97 /***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.376.4.6 2023/10/18 12:11:52 martin Exp $");
105
106 #ifdef _KERNEL_OPT
107 #include "opt_raid_autoconfig.h"
108 #include "opt_compat_netbsd32.h"
109 #endif
110
111 #include <sys/param.h>
112 #include <sys/errno.h>
113 #include <sys/pool.h>
114 #include <sys/proc.h>
115 #include <sys/queue.h>
116 #include <sys/disk.h>
117 #include <sys/device.h>
118 #include <sys/stat.h>
119 #include <sys/ioctl.h>
120 #include <sys/fcntl.h>
121 #include <sys/systm.h>
122 #include <sys/vnode.h>
123 #include <sys/disklabel.h>
124 #include <sys/conf.h>
125 #include <sys/buf.h>
126 #include <sys/bufq.h>
127 #include <sys/reboot.h>
128 #include <sys/kauth.h>
129 #include <sys/module.h>
130 #include <sys/compat_stub.h>
131
132 #include <prop/proplib.h>
133
134 #include <dev/raidframe/raidframevar.h>
135 #include <dev/raidframe/raidframeio.h>
136 #include <dev/raidframe/rf_paritymap.h>
137
138 #include "rf_raid.h"
139 #include "rf_copyback.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_desc.h"
143 #include "rf_diskqueue.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_kintf.h"
147 #include "rf_options.h"
148 #include "rf_driver.h"
149 #include "rf_parityscan.h"
150 #include "rf_threadstuff.h"
151
152 #include "ioconf.h"
153
154 #ifdef DEBUG
155 int rf_kdebug_level = 0;
156 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
157 #else /* DEBUG */
158 #define db1_printf(a) { }
159 #endif /* DEBUG */
160
161 #define DEVICE_XNAME(dev) dev ? device_xname(dev) : "null"
162
163 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
164 static rf_declare_mutex2(rf_sparet_wait_mutex);
165 static rf_declare_cond2(rf_sparet_wait_cv);
166 static rf_declare_cond2(rf_sparet_resp_cv);
167
168 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
169 * spare table */
170 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
171 * installation process */
172 #endif
173
174 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
175
176 /* prototypes */
177 static void KernelWakeupFunc(struct buf *);
178 static void InitBP(struct buf *, struct vnode *, unsigned,
179 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
180 void *, int, struct proc *);
181 static void raidinit(struct raid_softc *);
182 static int raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp);
183 static int rf_get_component_caches(RF_Raid_t *raidPtr, int *);
184
185 static int raid_match(device_t, cfdata_t, void *);
186 static void raid_attach(device_t, device_t, void *);
187 static int raid_detach(device_t, int);
188
189 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
190 daddr_t, daddr_t);
191 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
192 daddr_t, daddr_t, int);
193
194 static int raidwrite_component_label(unsigned,
195 dev_t, struct vnode *, RF_ComponentLabel_t *);
196 static int raidread_component_label(unsigned,
197 dev_t, struct vnode *, RF_ComponentLabel_t *);
198
199 static int raid_diskstart(device_t, struct buf *bp);
200 static int raid_dumpblocks(device_t, void *, daddr_t, int);
201 static int raid_lastclose(device_t);
202
203 static dev_type_open(raidopen);
204 static dev_type_close(raidclose);
205 static dev_type_read(raidread);
206 static dev_type_write(raidwrite);
207 static dev_type_ioctl(raidioctl);
208 static dev_type_strategy(raidstrategy);
209 static dev_type_dump(raiddump);
210 static dev_type_size(raidsize);
211
212 const struct bdevsw raid_bdevsw = {
213 .d_open = raidopen,
214 .d_close = raidclose,
215 .d_strategy = raidstrategy,
216 .d_ioctl = raidioctl,
217 .d_dump = raiddump,
218 .d_psize = raidsize,
219 .d_discard = nodiscard,
220 .d_flag = D_DISK
221 };
222
223 const struct cdevsw raid_cdevsw = {
224 .d_open = raidopen,
225 .d_close = raidclose,
226 .d_read = raidread,
227 .d_write = raidwrite,
228 .d_ioctl = raidioctl,
229 .d_stop = nostop,
230 .d_tty = notty,
231 .d_poll = nopoll,
232 .d_mmap = nommap,
233 .d_kqfilter = nokqfilter,
234 .d_discard = nodiscard,
235 .d_flag = D_DISK
236 };
237
238 static struct dkdriver rf_dkdriver = {
239 .d_open = raidopen,
240 .d_close = raidclose,
241 .d_strategy = raidstrategy,
242 .d_diskstart = raid_diskstart,
243 .d_dumpblocks = raid_dumpblocks,
244 .d_lastclose = raid_lastclose,
245 .d_minphys = minphys
246 };
247
248 #define raidunit(x) DISKUNIT(x)
249 #define raidsoftc(dev) (((struct raid_softc *)device_private(dev))->sc_r.softc)
250
251 extern struct cfdriver raid_cd;
252 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
253 raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
254 DVF_DETACH_SHUTDOWN);
255
256 /* Internal representation of a rf_recon_req */
257 struct rf_recon_req_internal {
258 RF_RowCol_t col;
259 RF_ReconReqFlags_t flags;
260 void *raidPtr;
261 };
262
263 /*
264 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
265 * Be aware that large numbers can allow the driver to consume a lot of
266 * kernel memory, especially on writes, and in degraded mode reads.
267 *
268 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
269 * a single 64K write will typically require 64K for the old data,
270 * 64K for the old parity, and 64K for the new parity, for a total
271 * of 192K (if the parity buffer is not re-used immediately).
272 * Even it if is used immediately, that's still 128K, which when multiplied
273 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
274 *
275 * Now in degraded mode, for example, a 64K read on the above setup may
276 * require data reconstruction, which will require *all* of the 4 remaining
277 * disks to participate -- 4 * 32K/disk == 128K again.
278 */
279
280 #ifndef RAIDOUTSTANDING
281 #define RAIDOUTSTANDING 6
282 #endif
283
284 #define RAIDLABELDEV(dev) \
285 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
286
287 /* declared here, and made public, for the benefit of KVM stuff.. */
288
289 static int raidlock(struct raid_softc *);
290 static void raidunlock(struct raid_softc *);
291
292 static int raid_detach_unlocked(struct raid_softc *);
293
294 static void rf_markalldirty(RF_Raid_t *);
295 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
296
297 void rf_ReconThread(struct rf_recon_req_internal *);
298 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
299 void rf_CopybackThread(RF_Raid_t *raidPtr);
300 void rf_ReconstructInPlaceThread(struct rf_recon_req_internal *);
301 int rf_autoconfig(device_t);
302 void rf_buildroothack(RF_ConfigSet_t *);
303
304 RF_AutoConfig_t *rf_find_raid_components(void);
305 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
306 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
307 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
308 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
309 int rf_set_autoconfig(RF_Raid_t *, int);
310 int rf_set_rootpartition(RF_Raid_t *, int);
311 void rf_release_all_vps(RF_ConfigSet_t *);
312 void rf_cleanup_config_set(RF_ConfigSet_t *);
313 int rf_have_enough_components(RF_ConfigSet_t *);
314 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
315 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
316
317 /*
318 * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
319 * Note that this is overridden by having RAID_AUTOCONFIG as an option
320 * in the kernel config file.
321 */
322 #ifdef RAID_AUTOCONFIG
323 int raidautoconfig = 1;
324 #else
325 int raidautoconfig = 0;
326 #endif
327 static bool raidautoconfigdone = false;
328
329 struct RF_Pools_s rf_pools;
330
331 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
332 static kmutex_t raid_lock;
333
334 static struct raid_softc *
335 raidcreate(int unit) {
336 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
337 sc->sc_unit = unit;
338 cv_init(&sc->sc_cv, "raidunit");
339 mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_NONE);
340 return sc;
341 }
342
343 static void
344 raiddestroy(struct raid_softc *sc) {
345 cv_destroy(&sc->sc_cv);
346 mutex_destroy(&sc->sc_mutex);
347 kmem_free(sc, sizeof(*sc));
348 }
349
350 static struct raid_softc *
351 raidget(int unit, bool create) {
352 struct raid_softc *sc;
353 if (unit < 0) {
354 #ifdef DIAGNOSTIC
355 panic("%s: unit %d!", __func__, unit);
356 #endif
357 return NULL;
358 }
359 mutex_enter(&raid_lock);
360 LIST_FOREACH(sc, &raids, sc_link) {
361 if (sc->sc_unit == unit) {
362 mutex_exit(&raid_lock);
363 return sc;
364 }
365 }
366 mutex_exit(&raid_lock);
367 if (!create)
368 return NULL;
369 sc = raidcreate(unit);
370 mutex_enter(&raid_lock);
371 LIST_INSERT_HEAD(&raids, sc, sc_link);
372 mutex_exit(&raid_lock);
373 return sc;
374 }
375
376 static void
377 raidput(struct raid_softc *sc) {
378 mutex_enter(&raid_lock);
379 LIST_REMOVE(sc, sc_link);
380 mutex_exit(&raid_lock);
381 raiddestroy(sc);
382 }
383
384 void
385 raidattach(int num)
386 {
387
388 /*
389 * Device attachment and associated initialization now occurs
390 * as part of the module initialization.
391 */
392 }
393
394 int
395 rf_autoconfig(device_t self)
396 {
397 RF_AutoConfig_t *ac_list;
398 RF_ConfigSet_t *config_sets;
399
400 if (!raidautoconfig || raidautoconfigdone == true)
401 return (0);
402
403 /* XXX This code can only be run once. */
404 raidautoconfigdone = true;
405
406 #ifdef __HAVE_CPU_BOOTCONF
407 /*
408 * 0. find the boot device if needed first so we can use it later
409 * this needs to be done before we autoconfigure any raid sets,
410 * because if we use wedges we are not going to be able to open
411 * the boot device later
412 */
413 if (booted_device == NULL)
414 cpu_bootconf();
415 #endif
416 /* 1. locate all RAID components on the system */
417 aprint_debug("Searching for RAID components...\n");
418 ac_list = rf_find_raid_components();
419
420 /* 2. Sort them into their respective sets. */
421 config_sets = rf_create_auto_sets(ac_list);
422
423 /*
424 * 3. Evaluate each set and configure the valid ones.
425 * This gets done in rf_buildroothack().
426 */
427 rf_buildroothack(config_sets);
428
429 return 1;
430 }
431
432 int
433 rf_inited(const struct raid_softc *rs) {
434 return (rs->sc_flags & RAIDF_INITED) != 0;
435 }
436
437 RF_Raid_t *
438 rf_get_raid(struct raid_softc *rs) {
439 return &rs->sc_r;
440 }
441
442 int
443 rf_get_unit(const struct raid_softc *rs) {
444 return rs->sc_unit;
445 }
446
447 static int
448 rf_containsboot(RF_Raid_t *r, device_t bdv) {
449 const char *bootname;
450 size_t len;
451
452 /* if bdv is NULL, the set can't contain it. exit early. */
453 if (bdv == NULL)
454 return 0;
455
456 bootname = device_xname(bdv);
457 len = strlen(bootname);
458
459 for (int col = 0; col < r->numCol; col++) {
460 const char *devname = r->Disks[col].devname;
461 devname += sizeof("/dev/") - 1;
462 if (strncmp(devname, "dk", 2) == 0) {
463 const char *parent =
464 dkwedge_get_parent_name(r->Disks[col].dev);
465 if (parent != NULL)
466 devname = parent;
467 }
468 if (strncmp(devname, bootname, len) == 0) {
469 struct raid_softc *sc = r->softc;
470 aprint_debug("raid%d includes boot device %s\n",
471 sc->sc_unit, devname);
472 return 1;
473 }
474 }
475 return 0;
476 }
477
478
479 /*
480 * Example setup:
481 * dk1 at wd0: "raid@wd0", 171965 blocks at 32802, type: raidframe
482 * dk3 at wd1: "raid@wd1", 171965 blocks at 32802, type: raidframz
483 * raid1: Components: /dev/dk1 /dev/dk3
484 * dk4 at raid1: "empty@raid1", 8192 blocks at 34, type: msdos
485 * dk5 at raid1: "root@raid1", 163517 blocks at 8226, type: ffs
486 *
487 * If booted from wd0, booted_device will be
488 * disk wd0, startblk = 41092, nblks = 163517
489 *
490 * That is, dk5 with startblk computed from the beginning of wd0
491 * instead of beginning of raid1:
492 * 32802 + 64 (RF_PROTECTED_SECTORS) + 8226 = 41092
493 *
494 * In order to find the boot wedge, we must iterate on each component,
495 * find its offset from disk beginning, abd look for the boot wedge with
496 * startblck adjusted.
497 */
498 static device_t
499 rf_find_bootwedge(struct raid_softc *rsc)
500 {
501 RF_Raid_t *r = &rsc->sc_r;
502 const char *bootname;
503 size_t len;
504 device_t rdev = NULL;
505
506 if (booted_device == NULL)
507 goto out;
508
509 bootname = device_xname(booted_device);
510 len = strlen(bootname);
511
512 aprint_debug("%s: booted_device %s, startblk = %"PRId64", "
513 "nblks = %"PRId64"\n", __func__,
514 bootname, booted_startblk, booted_nblks);
515
516 for (int col = 0; col < r->numCol; col++) {
517 const char *devname = r->Disks[col].devname;
518 const char *parent;
519 struct disk *dk;
520 u_int nwedges;
521 struct dkwedge_info *dkwi;
522 struct dkwedge_list dkwl;
523 size_t dkwi_len;
524 int i;
525
526 devname += sizeof("/dev/") - 1;
527 if (strncmp(devname, "dk", 2) != 0)
528 continue;
529
530 parent = dkwedge_get_parent_name(r->Disks[col].dev);
531 if (parent == NULL) {
532 aprint_debug("%s: cannot find parent for "
533 "component /dev/%s", __func__, devname);
534 continue;
535 }
536
537 if (strncmp(parent, bootname, len) != 0)
538 continue;
539
540 aprint_debug("%s: looking up wedge %s in device %s\n",
541 __func__, devname, parent);
542
543 dk = disk_find(parent);
544 nwedges = dk->dk_nwedges;
545 dkwi_len = sizeof(*dkwi) * nwedges;
546 dkwi = RF_Malloc(dkwi_len);
547
548 dkwl.dkwl_buf = dkwi;
549 dkwl.dkwl_bufsize = dkwi_len;
550 dkwl.dkwl_nwedges = 0;
551 dkwl.dkwl_ncopied = 0;
552
553 if (dkwedge_list(dk, &dkwl, curlwp) == 0) {
554 daddr_t startblk;
555
556 for (i = 0; i < dkwl.dkwl_ncopied; i++) {
557 if (strcmp(dkwi[i].dkw_devname, devname) == 0)
558 break;
559 }
560
561 KASSERT(i < dkwl.dkwl_ncopied);
562
563 aprint_debug("%s: wedge %s, "
564 "startblk = %"PRId64", "
565 "nblks = %"PRId64"\n",
566 __func__,
567 dkwi[i].dkw_devname,
568 dkwi[i].dkw_offset,
569 dkwi[i].dkw_size);
570
571 startblk = booted_startblk
572 - dkwi[i].dkw_offset
573 - RF_PROTECTED_SECTORS;
574
575 aprint_debug("%s: looking for wedge in %s, "
576 "startblk = %"PRId64", "
577 "nblks = %"PRId64"\n",
578 __func__,
579 DEVICE_XNAME(rsc->sc_dksc.sc_dev),
580 startblk, booted_nblks);
581
582 rdev = dkwedge_find_partition(rsc->sc_dksc.sc_dev,
583 startblk,
584 booted_nblks);
585 if (rdev) {
586 aprint_debug("%s: root candidate wedge %s "
587 "shifted from %s\n", __func__,
588 device_xname(rdev),
589 dkwi[i].dkw_devname);
590 goto done;
591 } else {
592 aprint_debug("%s: not found\n", __func__);
593 }
594 }
595
596 aprint_debug("%s: nothing found for col %d\n", __func__, col);
597 done:
598 RF_Free(dkwi, dkwi_len);
599 }
600
601 out:
602 if (!rdev)
603 aprint_debug("%s: nothing found\n", __func__);
604
605 return rdev;
606 }
607
608 void
609 rf_buildroothack(RF_ConfigSet_t *config_sets)
610 {
611 RF_ConfigSet_t *cset;
612 RF_ConfigSet_t *next_cset;
613 int num_root;
614 struct raid_softc *sc, *rsc;
615 struct dk_softc *dksc = NULL; /* XXX gcc -Os: may be used uninit. */
616
617 sc = rsc = NULL;
618 num_root = 0;
619 cset = config_sets;
620 while (cset != NULL) {
621 next_cset = cset->next;
622 if (rf_have_enough_components(cset) &&
623 cset->ac->clabel->autoconfigure == 1) {
624 sc = rf_auto_config_set(cset);
625 if (sc != NULL) {
626 aprint_debug("raid%d: configured ok, rootable %d\n",
627 sc->sc_unit, cset->rootable);
628 if (cset->rootable) {
629 rsc = sc;
630 num_root++;
631 }
632 } else {
633 /* The autoconfig didn't work :( */
634 aprint_debug("Autoconfig failed\n");
635 rf_release_all_vps(cset);
636 }
637 } else {
638 /* we're not autoconfiguring this set...
639 release the associated resources */
640 rf_release_all_vps(cset);
641 }
642 /* cleanup */
643 rf_cleanup_config_set(cset);
644 cset = next_cset;
645 }
646
647 /* if the user has specified what the root device should be
648 then we don't touch booted_device or boothowto... */
649
650 if (rootspec != NULL) {
651 aprint_debug("%s: rootspec %s\n", __func__, rootspec);
652 return;
653 }
654
655 /* we found something bootable... */
656 if (num_root == 1) {
657 device_t candidate_root = NULL;
658 dksc = &rsc->sc_dksc;
659
660 if (dksc->sc_dkdev.dk_nwedges != 0) {
661
662 /* Find the wedge we booted from */
663 candidate_root = rf_find_bootwedge(rsc);
664
665 /* Try first partition */
666 if (candidate_root == NULL) {
667 size_t i = 0;
668 candidate_root = dkwedge_find_by_parent(
669 device_xname(dksc->sc_dev), &i);
670 }
671 aprint_debug("%s: candidate wedge root %s\n",
672 __func__, DEVICE_XNAME(candidate_root));
673 } else {
674 candidate_root = dksc->sc_dev;
675 }
676
677 aprint_debug("%s: candidate root = %s, booted_device = %s, "
678 "root_partition = %d, contains_boot=%d\n",
679 __func__, DEVICE_XNAME(candidate_root),
680 DEVICE_XNAME(booted_device), rsc->sc_r.root_partition,
681 rf_containsboot(&rsc->sc_r, booted_device));
682
683 /* XXX the check for booted_device == NULL can probably be
684 * dropped, now that rf_containsboot handles that case.
685 */
686 if (booted_device == NULL ||
687 rsc->sc_r.root_partition == 1 ||
688 rf_containsboot(&rsc->sc_r, booted_device)) {
689 booted_device = candidate_root;
690 booted_method = "raidframe/single";
691 booted_partition = 0; /* XXX assume 'a' */
692 aprint_debug("%s: set booted_device = %s\n", __func__,
693 DEVICE_XNAME(booted_device));
694 }
695 } else if (num_root > 1) {
696 aprint_debug("%s: many roots=%d, %s\n", __func__, num_root,
697 DEVICE_XNAME(booted_device));
698
699 /*
700 * Maybe the MD code can help. If it cannot, then
701 * setroot() will discover that we have no
702 * booted_device and will ask the user if nothing was
703 * hardwired in the kernel config file
704 */
705 if (booted_device == NULL)
706 return;
707
708 num_root = 0;
709 mutex_enter(&raid_lock);
710 LIST_FOREACH(sc, &raids, sc_link) {
711 RF_Raid_t *r = &sc->sc_r;
712 if (r->valid == 0)
713 continue;
714
715 if (r->root_partition == 0)
716 continue;
717
718 if (rf_containsboot(r, booted_device)) {
719 num_root++;
720 rsc = sc;
721 dksc = &rsc->sc_dksc;
722 }
723 }
724 mutex_exit(&raid_lock);
725
726 if (num_root == 1) {
727 booted_device = dksc->sc_dev;
728 booted_method = "raidframe/multi";
729 booted_partition = 0; /* XXX assume 'a' */
730 } else {
731 /* we can't guess.. require the user to answer... */
732 boothowto |= RB_ASKNAME;
733 }
734 }
735 }
736
737 static int
738 raidsize(dev_t dev)
739 {
740 struct raid_softc *rs;
741 struct dk_softc *dksc;
742 unsigned int unit;
743
744 unit = raidunit(dev);
745 if ((rs = raidget(unit, false)) == NULL)
746 return -1;
747 dksc = &rs->sc_dksc;
748
749 if ((rs->sc_flags & RAIDF_INITED) == 0)
750 return -1;
751
752 return dk_size(dksc, dev);
753 }
754
755 static int
756 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
757 {
758 unsigned int unit;
759 struct raid_softc *rs;
760 struct dk_softc *dksc;
761
762 unit = raidunit(dev);
763 if ((rs = raidget(unit, false)) == NULL)
764 return ENXIO;
765 dksc = &rs->sc_dksc;
766
767 if ((rs->sc_flags & RAIDF_INITED) == 0)
768 return ENODEV;
769
770 /*
771 Note that blkno is relative to this particular partition.
772 By adding adding RF_PROTECTED_SECTORS, we get a value that
773 is relative to the partition used for the underlying component.
774 */
775 blkno += RF_PROTECTED_SECTORS;
776
777 return dk_dump(dksc, dev, blkno, va, size, DK_DUMP_RECURSIVE);
778 }
779
780 static int
781 raid_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk)
782 {
783 struct raid_softc *rs = raidsoftc(dev);
784 const struct bdevsw *bdev;
785 RF_Raid_t *raidPtr;
786 int c, sparecol, j, scol, dumpto;
787 int error = 0;
788
789 raidPtr = &rs->sc_r;
790
791 /* we only support dumping to RAID 1 sets */
792 if (raidPtr->Layout.numDataCol != 1 ||
793 raidPtr->Layout.numParityCol != 1)
794 return EINVAL;
795
796 if ((error = raidlock(rs)) != 0)
797 return error;
798
799 /* figure out what device is alive.. */
800
801 /*
802 Look for a component to dump to. The preference for the
803 component to dump to is as follows:
804 1) the first component
805 2) a used_spare of the first component
806 3) the second component
807 4) a used_spare of the second component
808 */
809
810 dumpto = -1;
811 for (c = 0; c < raidPtr->numCol; c++) {
812 if (raidPtr->Disks[c].status == rf_ds_optimal) {
813 /* this might be the one */
814 dumpto = c;
815 break;
816 }
817 }
818
819 /*
820 At this point we have possibly selected a live component.
821 If we didn't find a live ocmponent, we now check to see
822 if there is a relevant spared component.
823 */
824
825 for (c = 0; c < raidPtr->numSpare; c++) {
826 sparecol = raidPtr->numCol + c;
827 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
828 /* How about this one? */
829 scol = -1;
830 for(j=0;j<raidPtr->numCol;j++) {
831 if (raidPtr->Disks[j].spareCol == sparecol) {
832 scol = j;
833 break;
834 }
835 }
836 if (scol == 0) {
837 /*
838 We must have found a spared first
839 component! We'll take that over
840 anything else found so far. (We
841 couldn't have found a real first
842 component before, since this is a
843 used spare, and it's saying that
844 it's replacing the first
845 component.) On reboot (with
846 autoconfiguration turned on)
847 sparecol will become the first
848 component (component0) of this set.
849 */
850 dumpto = sparecol;
851 break;
852 } else if (scol != -1) {
853 /*
854 Must be a spared second component.
855 We'll dump to that if we havn't found
856 anything else so far.
857 */
858 if (dumpto == -1)
859 dumpto = sparecol;
860 }
861 }
862 }
863
864 if (dumpto == -1) {
865 /* we couldn't find any live components to dump to!?!?
866 */
867 error = EINVAL;
868 goto out;
869 }
870
871 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
872 if (bdev == NULL) {
873 error = ENXIO;
874 goto out;
875 }
876
877 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
878 blkno, va, nblk * raidPtr->bytesPerSector);
879
880 out:
881 raidunlock(rs);
882
883 return error;
884 }
885
886 /* ARGSUSED */
887 static int
888 raidopen(dev_t dev, int flags, int fmt,
889 struct lwp *l)
890 {
891 int unit = raidunit(dev);
892 struct raid_softc *rs;
893 struct dk_softc *dksc;
894 int error = 0;
895 int part, pmask;
896
897 if ((rs = raidget(unit, true)) == NULL)
898 return ENXIO;
899 if ((error = raidlock(rs)) != 0)
900 return (error);
901
902 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
903 error = EBUSY;
904 goto bad;
905 }
906
907 dksc = &rs->sc_dksc;
908
909 part = DISKPART(dev);
910 pmask = (1 << part);
911
912 if (!DK_BUSY(dksc, pmask) &&
913 ((rs->sc_flags & RAIDF_INITED) != 0)) {
914 /* First one... mark things as dirty... Note that we *MUST*
915 have done a configure before this. I DO NOT WANT TO BE
916 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
917 THAT THEY BELONG TOGETHER!!!!! */
918 /* XXX should check to see if we're only open for reading
919 here... If so, we needn't do this, but then need some
920 other way of keeping track of what's happened.. */
921
922 rf_markalldirty(&rs->sc_r);
923 }
924
925 if ((rs->sc_flags & RAIDF_INITED) != 0)
926 error = dk_open(dksc, dev, flags, fmt, l);
927
928 bad:
929 raidunlock(rs);
930
931 return (error);
932
933
934 }
935
936 static int
937 raid_lastclose(device_t self)
938 {
939 struct raid_softc *rs = raidsoftc(self);
940
941 /* Last one... device is not unconfigured yet.
942 Device shutdown has taken care of setting the
943 clean bits if RAIDF_INITED is not set
944 mark things as clean... */
945
946 rf_update_component_labels(&rs->sc_r,
947 RF_FINAL_COMPONENT_UPDATE);
948
949 /* pass to unlocked code */
950 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
951 rs->sc_flags |= RAIDF_DETACH;
952
953 return 0;
954 }
955
956 /* ARGSUSED */
957 static int
958 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
959 {
960 int unit = raidunit(dev);
961 struct raid_softc *rs;
962 struct dk_softc *dksc;
963 cfdata_t cf;
964 int error = 0, do_detach = 0, do_put = 0;
965
966 if ((rs = raidget(unit, false)) == NULL)
967 return ENXIO;
968 dksc = &rs->sc_dksc;
969
970 if ((error = raidlock(rs)) != 0)
971 return (error);
972
973 if ((rs->sc_flags & RAIDF_INITED) != 0) {
974 error = dk_close(dksc, dev, flags, fmt, l);
975 if ((rs->sc_flags & RAIDF_DETACH) != 0)
976 do_detach = 1;
977 } else if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
978 do_put = 1;
979
980 raidunlock(rs);
981
982 if (do_detach) {
983 /* free the pseudo device attach bits */
984 cf = device_cfdata(dksc->sc_dev);
985 error = config_detach(dksc->sc_dev, 0);
986 if (error == 0)
987 free(cf, M_RAIDFRAME);
988 } else if (do_put) {
989 raidput(rs);
990 }
991
992 return (error);
993
994 }
995
996 static void
997 raid_wakeup(RF_Raid_t *raidPtr)
998 {
999 rf_lock_mutex2(raidPtr->iodone_lock);
1000 rf_signal_cond2(raidPtr->iodone_cv);
1001 rf_unlock_mutex2(raidPtr->iodone_lock);
1002 }
1003
1004 static void
1005 raidstrategy(struct buf *bp)
1006 {
1007 unsigned int unit;
1008 struct raid_softc *rs;
1009 struct dk_softc *dksc;
1010 RF_Raid_t *raidPtr;
1011
1012 unit = raidunit(bp->b_dev);
1013 if ((rs = raidget(unit, false)) == NULL) {
1014 bp->b_error = ENXIO;
1015 goto fail;
1016 }
1017 if ((rs->sc_flags & RAIDF_INITED) == 0) {
1018 bp->b_error = ENXIO;
1019 goto fail;
1020 }
1021 dksc = &rs->sc_dksc;
1022 raidPtr = &rs->sc_r;
1023
1024 /* Queue IO only */
1025 if (dk_strategy_defer(dksc, bp))
1026 goto done;
1027
1028 /* schedule the IO to happen at the next convenient time */
1029 raid_wakeup(raidPtr);
1030
1031 done:
1032 return;
1033
1034 fail:
1035 bp->b_resid = bp->b_bcount;
1036 biodone(bp);
1037 }
1038
1039 static int
1040 raid_diskstart(device_t dev, struct buf *bp)
1041 {
1042 struct raid_softc *rs = raidsoftc(dev);
1043 RF_Raid_t *raidPtr;
1044
1045 raidPtr = &rs->sc_r;
1046 if (!raidPtr->valid) {
1047 db1_printf(("raid is not valid..\n"));
1048 return ENODEV;
1049 }
1050
1051 /* XXX */
1052 bp->b_resid = 0;
1053
1054 return raiddoaccess(raidPtr, bp);
1055 }
1056
1057 void
1058 raiddone(RF_Raid_t *raidPtr, struct buf *bp)
1059 {
1060 struct raid_softc *rs;
1061 struct dk_softc *dksc;
1062
1063 rs = raidPtr->softc;
1064 dksc = &rs->sc_dksc;
1065
1066 dk_done(dksc, bp);
1067
1068 rf_lock_mutex2(raidPtr->mutex);
1069 raidPtr->openings++;
1070 rf_unlock_mutex2(raidPtr->mutex);
1071
1072 /* schedule more IO */
1073 raid_wakeup(raidPtr);
1074 }
1075
1076 /* ARGSUSED */
1077 static int
1078 raidread(dev_t dev, struct uio *uio, int flags)
1079 {
1080 int unit = raidunit(dev);
1081 struct raid_softc *rs;
1082
1083 if ((rs = raidget(unit, false)) == NULL)
1084 return ENXIO;
1085
1086 if ((rs->sc_flags & RAIDF_INITED) == 0)
1087 return (ENXIO);
1088
1089 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
1090
1091 }
1092
1093 /* ARGSUSED */
1094 static int
1095 raidwrite(dev_t dev, struct uio *uio, int flags)
1096 {
1097 int unit = raidunit(dev);
1098 struct raid_softc *rs;
1099
1100 if ((rs = raidget(unit, false)) == NULL)
1101 return ENXIO;
1102
1103 if ((rs->sc_flags & RAIDF_INITED) == 0)
1104 return (ENXIO);
1105
1106 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
1107
1108 }
1109
1110 static int
1111 raid_detach_unlocked(struct raid_softc *rs)
1112 {
1113 struct dk_softc *dksc = &rs->sc_dksc;
1114 RF_Raid_t *raidPtr;
1115 int error;
1116
1117 raidPtr = &rs->sc_r;
1118
1119 if (DK_BUSY(dksc, 0) ||
1120 raidPtr->recon_in_progress != 0 ||
1121 raidPtr->parity_rewrite_in_progress != 0 ||
1122 raidPtr->copyback_in_progress != 0)
1123 return EBUSY;
1124
1125 if ((rs->sc_flags & RAIDF_INITED) == 0)
1126 return 0;
1127
1128 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1129
1130 if ((error = rf_Shutdown(raidPtr)) != 0)
1131 return error;
1132
1133 rs->sc_flags &= ~RAIDF_INITED;
1134
1135 /* Kill off any queued buffers */
1136 dk_drain(dksc);
1137 bufq_free(dksc->sc_bufq);
1138
1139 /* Detach the disk. */
1140 dkwedge_delall(&dksc->sc_dkdev);
1141 disk_detach(&dksc->sc_dkdev);
1142 disk_destroy(&dksc->sc_dkdev);
1143 dk_detach(dksc);
1144
1145 return 0;
1146 }
1147
1148 static bool
1149 rf_must_be_initialized(const struct raid_softc *rs, u_long cmd)
1150 {
1151 switch (cmd) {
1152 case RAIDFRAME_ADD_HOT_SPARE:
1153 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1154 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1155 case RAIDFRAME_CHECK_PARITY:
1156 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1157 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1158 case RAIDFRAME_CHECK_RECON_STATUS:
1159 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1160 case RAIDFRAME_COPYBACK:
1161 case RAIDFRAME_DELETE_COMPONENT:
1162 case RAIDFRAME_FAIL_DISK:
1163 case RAIDFRAME_GET_ACCTOTALS:
1164 case RAIDFRAME_GET_COMPONENT_LABEL:
1165 case RAIDFRAME_GET_INFO:
1166 case RAIDFRAME_GET_SIZE:
1167 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1168 case RAIDFRAME_INIT_LABELS:
1169 case RAIDFRAME_KEEP_ACCTOTALS:
1170 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1171 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1172 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1173 case RAIDFRAME_PARITYMAP_STATUS:
1174 case RAIDFRAME_REBUILD_IN_PLACE:
1175 case RAIDFRAME_REMOVE_HOT_SPARE:
1176 case RAIDFRAME_RESET_ACCTOTALS:
1177 case RAIDFRAME_REWRITEPARITY:
1178 case RAIDFRAME_SET_AUTOCONFIG:
1179 case RAIDFRAME_SET_COMPONENT_LABEL:
1180 case RAIDFRAME_SET_LAST_UNIT:
1181 case RAIDFRAME_SET_ROOT:
1182 case RAIDFRAME_SHUTDOWN:
1183 return (rs->sc_flags & RAIDF_INITED) == 0;
1184 }
1185 return false;
1186 }
1187
1188 int
1189 rf_fail_disk(RF_Raid_t *raidPtr, struct rf_recon_req *rr)
1190 {
1191 struct rf_recon_req_internal *rrint;
1192
1193 if (raidPtr->Layout.map->faultsTolerated == 0) {
1194 /* Can't do this on a RAID 0!! */
1195 return EINVAL;
1196 }
1197
1198 if (rr->col < 0 || rr->col >= raidPtr->numCol) {
1199 /* bad column */
1200 return EINVAL;
1201 }
1202
1203 rf_lock_mutex2(raidPtr->mutex);
1204 if (raidPtr->status == rf_rs_reconstructing) {
1205 /* you can't fail a disk while we're reconstructing! */
1206 /* XXX wrong for RAID6 */
1207 goto out;
1208 }
1209 if ((raidPtr->Disks[rr->col].status == rf_ds_optimal) &&
1210 (raidPtr->numFailures > 0)) {
1211 /* some other component has failed. Let's not make
1212 things worse. XXX wrong for RAID6 */
1213 goto out;
1214 }
1215 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1216 /* Can't fail a spared disk! */
1217 goto out;
1218 }
1219 rf_unlock_mutex2(raidPtr->mutex);
1220
1221 /* make a copy of the recon request so that we don't rely on
1222 * the user's buffer */
1223 rrint = RF_Malloc(sizeof(*rrint));
1224 if (rrint == NULL)
1225 return(ENOMEM);
1226 rrint->col = rr->col;
1227 rrint->flags = rr->flags;
1228 rrint->raidPtr = raidPtr;
1229
1230 return RF_CREATE_THREAD(raidPtr->recon_thread, rf_ReconThread,
1231 rrint, "raid_recon");
1232 out:
1233 rf_unlock_mutex2(raidPtr->mutex);
1234 return EINVAL;
1235 }
1236
1237 static int
1238 rf_copyinspecificbuf(RF_Config_t *k_cfg)
1239 {
1240 /* allocate a buffer for the layout-specific data, and copy it in */
1241 if (k_cfg->layoutSpecificSize == 0)
1242 return 0;
1243
1244 if (k_cfg->layoutSpecificSize > 10000) {
1245 /* sanity check */
1246 return EINVAL;
1247 }
1248
1249 u_char *specific_buf;
1250 specific_buf = RF_Malloc(k_cfg->layoutSpecificSize);
1251 if (specific_buf == NULL)
1252 return ENOMEM;
1253
1254 int retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1255 k_cfg->layoutSpecificSize);
1256 if (retcode) {
1257 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1258 db1_printf(("%s: retcode=%d copyin.2\n", __func__, retcode));
1259 return retcode;
1260 }
1261
1262 k_cfg->layoutSpecific = specific_buf;
1263 return 0;
1264 }
1265
1266 static int
1267 rf_getConfiguration(struct raid_softc *rs, void *data, RF_Config_t **k_cfg)
1268 {
1269 RF_Config_t *u_cfg = *((RF_Config_t **) data);
1270
1271 if (rs->sc_r.valid) {
1272 /* There is a valid RAID set running on this unit! */
1273 printf("raid%d: Device already configured!\n", rs->sc_unit);
1274 return EINVAL;
1275 }
1276
1277 /* copy-in the configuration information */
1278 /* data points to a pointer to the configuration structure */
1279 *k_cfg = RF_Malloc(sizeof(**k_cfg));
1280 if (*k_cfg == NULL) {
1281 return ENOMEM;
1282 }
1283 int retcode = copyin(u_cfg, *k_cfg, sizeof(RF_Config_t));
1284 if (retcode == 0)
1285 return 0;
1286 RF_Free(*k_cfg, sizeof(RF_Config_t));
1287 db1_printf(("%s: retcode=%d copyin.1\n", __func__, retcode));
1288 rs->sc_flags |= RAIDF_SHUTDOWN;
1289 return retcode;
1290 }
1291
1292 int
1293 rf_construct(struct raid_softc *rs, RF_Config_t *k_cfg)
1294 {
1295 int retcode, i;
1296 RF_Raid_t *raidPtr = &rs->sc_r;
1297
1298 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1299
1300 if ((retcode = rf_copyinspecificbuf(k_cfg)) != 0)
1301 goto out;
1302
1303 /* should do some kind of sanity check on the configuration.
1304 * Store the sum of all the bytes in the last byte? */
1305
1306 /* Force nul-termination on all strings. */
1307 #define ZERO_FINAL(s) do { s[sizeof(s) - 1] = '\0'; } while (0)
1308 for (i = 0; i < RF_MAXCOL; i++) {
1309 ZERO_FINAL(k_cfg->devnames[0][i]);
1310 }
1311 for (i = 0; i < RF_MAXSPARE; i++) {
1312 ZERO_FINAL(k_cfg->spare_names[i]);
1313 }
1314 for (i = 0; i < RF_MAXDBGV; i++) {
1315 ZERO_FINAL(k_cfg->debugVars[i]);
1316 }
1317 #undef ZERO_FINAL
1318
1319 /* Check some basic limits. */
1320 if (k_cfg->numCol >= RF_MAXCOL || k_cfg->numCol < 0) {
1321 retcode = EINVAL;
1322 goto out;
1323 }
1324 if (k_cfg->numSpare >= RF_MAXSPARE || k_cfg->numSpare < 0) {
1325 retcode = EINVAL;
1326 goto out;
1327 }
1328
1329 /* configure the system */
1330
1331 /*
1332 * Clear the entire RAID descriptor, just to make sure
1333 * there is no stale data left in the case of a
1334 * reconfiguration
1335 */
1336 memset(raidPtr, 0, sizeof(*raidPtr));
1337 raidPtr->softc = rs;
1338 raidPtr->raidid = rs->sc_unit;
1339
1340 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1341
1342 if (retcode == 0) {
1343 /* allow this many simultaneous IO's to
1344 this RAID device */
1345 raidPtr->openings = RAIDOUTSTANDING;
1346
1347 raidinit(rs);
1348 raid_wakeup(raidPtr);
1349 rf_markalldirty(raidPtr);
1350 }
1351
1352 /* free the buffers. No return code here. */
1353 if (k_cfg->layoutSpecificSize) {
1354 RF_Free(k_cfg->layoutSpecific, k_cfg->layoutSpecificSize);
1355 }
1356 out:
1357 RF_Free(k_cfg, sizeof(RF_Config_t));
1358 if (retcode) {
1359 /*
1360 * If configuration failed, set sc_flags so that we
1361 * will detach the device when we close it.
1362 */
1363 rs->sc_flags |= RAIDF_SHUTDOWN;
1364 }
1365 return retcode;
1366 }
1367
1368 #if RF_DISABLED
1369 static int
1370 rf_set_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
1371 {
1372
1373 /* XXX check the label for valid stuff... */
1374 /* Note that some things *should not* get modified --
1375 the user should be re-initing the labels instead of
1376 trying to patch things.
1377 */
1378 #ifdef DEBUG
1379 int raidid = raidPtr->raidid;
1380 printf("raid%d: Got component label:\n", raidid);
1381 printf("raid%d: Version: %d\n", raidid, clabel->version);
1382 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1383 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1384 printf("raid%d: Column: %d\n", raidid, clabel->column);
1385 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1386 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1387 printf("raid%d: Status: %d\n", raidid, clabel->status);
1388 #endif /* DEBUG */
1389 clabel->row = 0;
1390 int column = clabel->column;
1391
1392 if ((column < 0) || (column >= raidPtr->numCol)) {
1393 return(EINVAL);
1394 }
1395
1396 /* XXX this isn't allowed to do anything for now :-) */
1397
1398 /* XXX and before it is, we need to fill in the rest
1399 of the fields!?!?!?! */
1400 memcpy(raidget_component_label(raidPtr, column),
1401 clabel, sizeof(*clabel));
1402 raidflush_component_label(raidPtr, column);
1403 return 0;
1404 }
1405 #endif
1406
1407 static int
1408 rf_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
1409 {
1410 /*
1411 we only want the serial number from
1412 the above. We get all the rest of the information
1413 from the config that was used to create this RAID
1414 set.
1415 */
1416
1417 raidPtr->serial_number = clabel->serial_number;
1418
1419 for (int column = 0; column < raidPtr->numCol; column++) {
1420 RF_RaidDisk_t *diskPtr = &raidPtr->Disks[column];
1421 if (RF_DEAD_DISK(diskPtr->status))
1422 continue;
1423 RF_ComponentLabel_t *ci_label = raidget_component_label(
1424 raidPtr, column);
1425 /* Zeroing this is important. */
1426 memset(ci_label, 0, sizeof(*ci_label));
1427 raid_init_component_label(raidPtr, ci_label);
1428 ci_label->serial_number = raidPtr->serial_number;
1429 ci_label->row = 0; /* we dont' pretend to support more */
1430 rf_component_label_set_partitionsize(ci_label,
1431 diskPtr->partitionSize);
1432 ci_label->column = column;
1433 raidflush_component_label(raidPtr, column);
1434 /* XXXjld what about the spares? */
1435 }
1436
1437 return 0;
1438 }
1439
1440 static int
1441 rf_rebuild_in_place(RF_Raid_t *raidPtr, RF_SingleComponent_t *componentPtr)
1442 {
1443
1444 if (raidPtr->Layout.map->faultsTolerated == 0) {
1445 /* Can't do this on a RAID 0!! */
1446 return EINVAL;
1447 }
1448
1449 if (raidPtr->recon_in_progress == 1) {
1450 /* a reconstruct is already in progress! */
1451 return EINVAL;
1452 }
1453
1454 RF_SingleComponent_t component;
1455 memcpy(&component, componentPtr, sizeof(RF_SingleComponent_t));
1456 component.row = 0; /* we don't support any more */
1457 int column = component.column;
1458
1459 if ((column < 0) || (column >= raidPtr->numCol)) {
1460 return EINVAL;
1461 }
1462
1463 rf_lock_mutex2(raidPtr->mutex);
1464 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1465 (raidPtr->numFailures > 0)) {
1466 /* XXX 0 above shouldn't be constant!!! */
1467 /* some component other than this has failed.
1468 Let's not make things worse than they already
1469 are... */
1470 printf("raid%d: Unable to reconstruct to disk at:\n",
1471 raidPtr->raidid);
1472 printf("raid%d: Col: %d Too many failures.\n",
1473 raidPtr->raidid, column);
1474 rf_unlock_mutex2(raidPtr->mutex);
1475 return EINVAL;
1476 }
1477
1478 if (raidPtr->Disks[column].status == rf_ds_reconstructing) {
1479 printf("raid%d: Unable to reconstruct to disk at:\n",
1480 raidPtr->raidid);
1481 printf("raid%d: Col: %d "
1482 "Reconstruction already occurring!\n",
1483 raidPtr->raidid, column);
1484
1485 rf_unlock_mutex2(raidPtr->mutex);
1486 return EINVAL;
1487 }
1488
1489 if (raidPtr->Disks[column].status == rf_ds_spared) {
1490 rf_unlock_mutex2(raidPtr->mutex);
1491 return EINVAL;
1492 }
1493
1494 rf_unlock_mutex2(raidPtr->mutex);
1495
1496 struct rf_recon_req_internal *rrint;
1497 rrint = RF_Malloc(sizeof(*rrint));
1498 if (rrint == NULL)
1499 return ENOMEM;
1500
1501 rrint->col = column;
1502 rrint->raidPtr = raidPtr;
1503
1504 return RF_CREATE_THREAD(raidPtr->recon_thread,
1505 rf_ReconstructInPlaceThread, rrint, "raid_reconip");
1506 }
1507
1508 static int
1509 rf_check_recon_status(RF_Raid_t *raidPtr, int *data)
1510 {
1511 /*
1512 * This makes no sense on a RAID 0, or if we are not reconstructing
1513 * so tell the user it's done.
1514 */
1515 if (raidPtr->Layout.map->faultsTolerated == 0 ||
1516 raidPtr->status != rf_rs_reconstructing) {
1517 *data = 100;
1518 return 0;
1519 }
1520 if (raidPtr->reconControl->numRUsTotal == 0) {
1521 *data = 0;
1522 return 0;
1523 }
1524 *data = (raidPtr->reconControl->numRUsComplete * 100
1525 / raidPtr->reconControl->numRUsTotal);
1526 return 0;
1527 }
1528
1529 /*
1530 * Copy a RF_SingleComponent_t from 'data', ensuring nul-termination
1531 * on the component_name[] array.
1532 */
1533 static void
1534 rf_copy_single_component(RF_SingleComponent_t *component, void *data)
1535 {
1536
1537 memcpy(component, data, sizeof *component);
1538 component->component_name[sizeof(component->component_name) - 1] = '\0';
1539 }
1540
1541 static int
1542 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1543 {
1544 int unit = raidunit(dev);
1545 int part, pmask;
1546 struct raid_softc *rs;
1547 struct dk_softc *dksc;
1548 RF_Config_t *k_cfg;
1549 RF_Raid_t *raidPtr;
1550 RF_AccTotals_t *totals;
1551 RF_SingleComponent_t component;
1552 RF_DeviceConfig_t *d_cfg, *ucfgp;
1553 int retcode = 0;
1554 int column;
1555 RF_ComponentLabel_t *clabel;
1556 int d;
1557
1558 if ((rs = raidget(unit, false)) == NULL)
1559 return ENXIO;
1560
1561 dksc = &rs->sc_dksc;
1562 raidPtr = &rs->sc_r;
1563
1564 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1565 (int) DISKPART(dev), (int) unit, cmd));
1566
1567 /* Must be initialized for these... */
1568 if (rf_must_be_initialized(rs, cmd))
1569 return ENXIO;
1570
1571 switch (cmd) {
1572 /* configure the system */
1573 case RAIDFRAME_CONFIGURE:
1574 if ((retcode = rf_getConfiguration(rs, data, &k_cfg)) != 0)
1575 return retcode;
1576 return rf_construct(rs, k_cfg);
1577
1578 /* shutdown the system */
1579 case RAIDFRAME_SHUTDOWN:
1580
1581 part = DISKPART(dev);
1582 pmask = (1 << part);
1583
1584 if ((retcode = raidlock(rs)) != 0)
1585 return retcode;
1586
1587 if (DK_BUSY(dksc, pmask) ||
1588 raidPtr->recon_in_progress != 0 ||
1589 raidPtr->parity_rewrite_in_progress != 0 ||
1590 raidPtr->copyback_in_progress != 0)
1591 retcode = EBUSY;
1592 else {
1593 /* detach and free on close */
1594 rs->sc_flags |= RAIDF_SHUTDOWN;
1595 retcode = 0;
1596 }
1597
1598 raidunlock(rs);
1599
1600 return retcode;
1601 case RAIDFRAME_GET_COMPONENT_LABEL:
1602 return rf_get_component_label(raidPtr, data);
1603
1604 #if RF_DISABLED
1605 case RAIDFRAME_SET_COMPONENT_LABEL:
1606 return rf_set_component_label(raidPtr, data);
1607 #endif
1608
1609 case RAIDFRAME_INIT_LABELS:
1610 return rf_init_component_label(raidPtr, data);
1611
1612 case RAIDFRAME_SET_AUTOCONFIG:
1613 d = rf_set_autoconfig(raidPtr, *(int *) data);
1614 printf("raid%d: New autoconfig value is: %d\n",
1615 raidPtr->raidid, d);
1616 *(int *) data = d;
1617 return retcode;
1618
1619 case RAIDFRAME_SET_ROOT:
1620 d = rf_set_rootpartition(raidPtr, *(int *) data);
1621 printf("raid%d: New rootpartition value is: %d\n",
1622 raidPtr->raidid, d);
1623 *(int *) data = d;
1624 return retcode;
1625
1626 /* initialize all parity */
1627 case RAIDFRAME_REWRITEPARITY:
1628
1629 if (raidPtr->Layout.map->faultsTolerated == 0) {
1630 /* Parity for RAID 0 is trivially correct */
1631 raidPtr->parity_good = RF_RAID_CLEAN;
1632 return 0;
1633 }
1634
1635 if (raidPtr->parity_rewrite_in_progress == 1) {
1636 /* Re-write is already in progress! */
1637 return EINVAL;
1638 }
1639
1640 return RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1641 rf_RewriteParityThread, raidPtr,"raid_parity");
1642
1643 case RAIDFRAME_ADD_HOT_SPARE:
1644 rf_copy_single_component(&component, data);
1645 return rf_add_hot_spare(raidPtr, &component);
1646
1647 case RAIDFRAME_REMOVE_HOT_SPARE:
1648 return retcode;
1649
1650 case RAIDFRAME_DELETE_COMPONENT:
1651 rf_copy_single_component(&component, data);
1652 return rf_delete_component(raidPtr, &component);
1653
1654 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1655 rf_copy_single_component(&component, data);
1656 return rf_incorporate_hot_spare(raidPtr, &component);
1657
1658 case RAIDFRAME_REBUILD_IN_PLACE:
1659 return rf_rebuild_in_place(raidPtr, data);
1660
1661 case RAIDFRAME_GET_INFO:
1662 ucfgp = *(RF_DeviceConfig_t **)data;
1663 d_cfg = RF_Malloc(sizeof(*d_cfg));
1664 if (d_cfg == NULL)
1665 return ENOMEM;
1666 retcode = rf_get_info(raidPtr, d_cfg);
1667 if (retcode == 0) {
1668 retcode = copyout(d_cfg, ucfgp, sizeof(*d_cfg));
1669 }
1670 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1671 return retcode;
1672
1673 case RAIDFRAME_CHECK_PARITY:
1674 *(int *) data = raidPtr->parity_good;
1675 return 0;
1676
1677 case RAIDFRAME_PARITYMAP_STATUS:
1678 if (rf_paritymap_ineligible(raidPtr))
1679 return EINVAL;
1680 rf_paritymap_status(raidPtr->parity_map, data);
1681 return 0;
1682
1683 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1684 if (rf_paritymap_ineligible(raidPtr))
1685 return EINVAL;
1686 if (raidPtr->parity_map == NULL)
1687 return ENOENT; /* ??? */
1688 if (rf_paritymap_set_params(raidPtr->parity_map, data, 1) != 0)
1689 return EINVAL;
1690 return 0;
1691
1692 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1693 if (rf_paritymap_ineligible(raidPtr))
1694 return EINVAL;
1695 *(int *) data = rf_paritymap_get_disable(raidPtr);
1696 return 0;
1697
1698 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1699 if (rf_paritymap_ineligible(raidPtr))
1700 return EINVAL;
1701 rf_paritymap_set_disable(raidPtr, *(int *)data);
1702 /* XXX should errors be passed up? */
1703 return 0;
1704
1705 case RAIDFRAME_RESET_ACCTOTALS:
1706 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1707 return 0;
1708
1709 case RAIDFRAME_GET_ACCTOTALS:
1710 totals = (RF_AccTotals_t *) data;
1711 *totals = raidPtr->acc_totals;
1712 return 0;
1713
1714 case RAIDFRAME_KEEP_ACCTOTALS:
1715 raidPtr->keep_acc_totals = *(int *)data;
1716 return 0;
1717
1718 case RAIDFRAME_GET_SIZE:
1719 *(int *) data = raidPtr->totalSectors;
1720 return 0;
1721
1722 case RAIDFRAME_FAIL_DISK:
1723 return rf_fail_disk(raidPtr, data);
1724
1725 /* invoke a copyback operation after recon on whatever disk
1726 * needs it, if any */
1727 case RAIDFRAME_COPYBACK:
1728
1729 if (raidPtr->Layout.map->faultsTolerated == 0) {
1730 /* This makes no sense on a RAID 0!! */
1731 return EINVAL;
1732 }
1733
1734 if (raidPtr->copyback_in_progress == 1) {
1735 /* Copyback is already in progress! */
1736 return EINVAL;
1737 }
1738
1739 return RF_CREATE_THREAD(raidPtr->copyback_thread,
1740 rf_CopybackThread, raidPtr, "raid_copyback");
1741
1742 /* return the percentage completion of reconstruction */
1743 case RAIDFRAME_CHECK_RECON_STATUS:
1744 return rf_check_recon_status(raidPtr, data);
1745
1746 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1747 rf_check_recon_status_ext(raidPtr, data);
1748 return 0;
1749
1750 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1751 if (raidPtr->Layout.map->faultsTolerated == 0) {
1752 /* This makes no sense on a RAID 0, so tell the
1753 user it's done. */
1754 *(int *) data = 100;
1755 return 0;
1756 }
1757 if (raidPtr->parity_rewrite_in_progress == 1) {
1758 *(int *) data = 100 *
1759 raidPtr->parity_rewrite_stripes_done /
1760 raidPtr->Layout.numStripe;
1761 } else {
1762 *(int *) data = 100;
1763 }
1764 return 0;
1765
1766 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1767 rf_check_parityrewrite_status_ext(raidPtr, data);
1768 return 0;
1769
1770 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1771 if (raidPtr->Layout.map->faultsTolerated == 0) {
1772 /* This makes no sense on a RAID 0 */
1773 *(int *) data = 100;
1774 return 0;
1775 }
1776 if (raidPtr->copyback_in_progress == 1) {
1777 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1778 raidPtr->Layout.numStripe;
1779 } else {
1780 *(int *) data = 100;
1781 }
1782 return 0;
1783
1784 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1785 rf_check_copyback_status_ext(raidPtr, data);
1786 return 0;
1787
1788 case RAIDFRAME_SET_LAST_UNIT:
1789 for (column = 0; column < raidPtr->numCol; column++)
1790 if (raidPtr->Disks[column].status != rf_ds_optimal)
1791 return EBUSY;
1792
1793 for (column = 0; column < raidPtr->numCol; column++) {
1794 clabel = raidget_component_label(raidPtr, column);
1795 clabel->last_unit = *(int *)data;
1796 raidflush_component_label(raidPtr, column);
1797 }
1798 rs->sc_cflags |= RAIDF_UNIT_CHANGED;
1799 return 0;
1800
1801 /* the sparetable daemon calls this to wait for the kernel to
1802 * need a spare table. this ioctl does not return until a
1803 * spare table is needed. XXX -- calling mpsleep here in the
1804 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1805 * -- I should either compute the spare table in the kernel,
1806 * or have a different -- XXX XXX -- interface (a different
1807 * character device) for delivering the table -- XXX */
1808 #if RF_DISABLED
1809 case RAIDFRAME_SPARET_WAIT:
1810 rf_lock_mutex2(rf_sparet_wait_mutex);
1811 while (!rf_sparet_wait_queue)
1812 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1813 RF_SparetWait_t *waitreq = rf_sparet_wait_queue;
1814 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1815 rf_unlock_mutex2(rf_sparet_wait_mutex);
1816
1817 /* structure assignment */
1818 *((RF_SparetWait_t *) data) = *waitreq;
1819
1820 RF_Free(waitreq, sizeof(*waitreq));
1821 return 0;
1822
1823 /* wakes up a process waiting on SPARET_WAIT and puts an error
1824 * code in it that will cause the dameon to exit */
1825 case RAIDFRAME_ABORT_SPARET_WAIT:
1826 waitreq = RF_Malloc(sizeof(*waitreq));
1827 waitreq->fcol = -1;
1828 rf_lock_mutex2(rf_sparet_wait_mutex);
1829 waitreq->next = rf_sparet_wait_queue;
1830 rf_sparet_wait_queue = waitreq;
1831 rf_broadcast_cond2(rf_sparet_wait_cv);
1832 rf_unlock_mutex2(rf_sparet_wait_mutex);
1833 return 0;
1834
1835 /* used by the spare table daemon to deliver a spare table
1836 * into the kernel */
1837 case RAIDFRAME_SEND_SPARET:
1838
1839 /* install the spare table */
1840 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1841
1842 /* respond to the requestor. the return status of the spare
1843 * table installation is passed in the "fcol" field */
1844 waitred = RF_Malloc(sizeof(*waitreq));
1845 waitreq->fcol = retcode;
1846 rf_lock_mutex2(rf_sparet_wait_mutex);
1847 waitreq->next = rf_sparet_resp_queue;
1848 rf_sparet_resp_queue = waitreq;
1849 rf_broadcast_cond2(rf_sparet_resp_cv);
1850 rf_unlock_mutex2(rf_sparet_wait_mutex);
1851
1852 return retcode;
1853 #endif
1854 default:
1855 /*
1856 * Don't bother trying to load compat modules
1857 * if it is not our ioctl. This is more efficient
1858 * and makes rump tests not depend on compat code
1859 */
1860 if (IOCGROUP(cmd) != 'r')
1861 break;
1862 #ifdef _LP64
1863 if ((l->l_proc->p_flag & PK_32) != 0) {
1864 module_autoload("compat_netbsd32_raid",
1865 MODULE_CLASS_EXEC);
1866 MODULE_HOOK_CALL(raidframe_netbsd32_ioctl_hook,
1867 (rs, cmd, data), enosys(), retcode);
1868 if (retcode != EPASSTHROUGH)
1869 return retcode;
1870 }
1871 #endif
1872 module_autoload("compat_raid_80", MODULE_CLASS_EXEC);
1873 MODULE_HOOK_CALL(raidframe_ioctl_80_hook,
1874 (rs, cmd, data), enosys(), retcode);
1875 if (retcode != EPASSTHROUGH)
1876 return retcode;
1877
1878 module_autoload("compat_raid_50", MODULE_CLASS_EXEC);
1879 MODULE_HOOK_CALL(raidframe_ioctl_50_hook,
1880 (rs, cmd, data), enosys(), retcode);
1881 if (retcode != EPASSTHROUGH)
1882 return retcode;
1883 break; /* fall through to the os-specific code below */
1884
1885 }
1886
1887 if (!raidPtr->valid)
1888 return (EINVAL);
1889
1890 /*
1891 * Add support for "regular" device ioctls here.
1892 */
1893
1894 switch (cmd) {
1895 case DIOCGCACHE:
1896 retcode = rf_get_component_caches(raidPtr, (int *)data);
1897 break;
1898
1899 case DIOCCACHESYNC:
1900 retcode = rf_sync_component_caches(raidPtr);
1901 break;
1902
1903 default:
1904 retcode = dk_ioctl(dksc, dev, cmd, data, flag, l);
1905 break;
1906 }
1907
1908 return (retcode);
1909
1910 }
1911
1912
1913 /* raidinit -- complete the rest of the initialization for the
1914 RAIDframe device. */
1915
1916
1917 static void
1918 raidinit(struct raid_softc *rs)
1919 {
1920 cfdata_t cf;
1921 unsigned int unit;
1922 struct dk_softc *dksc = &rs->sc_dksc;
1923 RF_Raid_t *raidPtr = &rs->sc_r;
1924 device_t dev;
1925
1926 unit = raidPtr->raidid;
1927
1928 /* XXX doesn't check bounds. */
1929 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%u", unit);
1930
1931 /* attach the pseudo device */
1932 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1933 cf->cf_name = raid_cd.cd_name;
1934 cf->cf_atname = raid_cd.cd_name;
1935 cf->cf_unit = unit;
1936 cf->cf_fstate = FSTATE_STAR;
1937
1938 dev = config_attach_pseudo(cf);
1939 if (dev == NULL) {
1940 printf("raid%d: config_attach_pseudo failed\n",
1941 raidPtr->raidid);
1942 free(cf, M_RAIDFRAME);
1943 return;
1944 }
1945
1946 /* provide a backpointer to the real softc */
1947 raidsoftc(dev) = rs;
1948
1949 /* disk_attach actually creates space for the CPU disklabel, among
1950 * other things, so it's critical to call this *BEFORE* we try putzing
1951 * with disklabels. */
1952 dk_init(dksc, dev, DKTYPE_RAID);
1953 disk_init(&dksc->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1954
1955 /* XXX There may be a weird interaction here between this, and
1956 * protectedSectors, as used in RAIDframe. */
1957
1958 rs->sc_size = raidPtr->totalSectors;
1959
1960 /* Attach dk and disk subsystems */
1961 dk_attach(dksc);
1962 disk_attach(&dksc->sc_dkdev);
1963 rf_set_geometry(rs, raidPtr);
1964
1965 bufq_alloc(&dksc->sc_bufq, "fcfs", BUFQ_SORT_RAWBLOCK);
1966
1967 /* mark unit as usuable */
1968 rs->sc_flags |= RAIDF_INITED;
1969
1970 dkwedge_discover(&dksc->sc_dkdev);
1971 }
1972
1973 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1974 /* wake up the daemon & tell it to get us a spare table
1975 * XXX
1976 * the entries in the queues should be tagged with the raidPtr
1977 * so that in the extremely rare case that two recons happen at once,
1978 * we know for which device were requesting a spare table
1979 * XXX
1980 *
1981 * XXX This code is not currently used. GO
1982 */
1983 int
1984 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1985 {
1986 int retcode;
1987
1988 rf_lock_mutex2(rf_sparet_wait_mutex);
1989 req->next = rf_sparet_wait_queue;
1990 rf_sparet_wait_queue = req;
1991 rf_broadcast_cond2(rf_sparet_wait_cv);
1992
1993 /* mpsleep unlocks the mutex */
1994 while (!rf_sparet_resp_queue) {
1995 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
1996 }
1997 req = rf_sparet_resp_queue;
1998 rf_sparet_resp_queue = req->next;
1999 rf_unlock_mutex2(rf_sparet_wait_mutex);
2000
2001 retcode = req->fcol;
2002 RF_Free(req, sizeof(*req)); /* this is not the same req as we
2003 * alloc'd */
2004 return (retcode);
2005 }
2006 #endif
2007
2008 /* a wrapper around rf_DoAccess that extracts appropriate info from the
2009 * bp & passes it down.
2010 * any calls originating in the kernel must use non-blocking I/O
2011 * do some extra sanity checking to return "appropriate" error values for
2012 * certain conditions (to make some standard utilities work)
2013 *
2014 * Formerly known as: rf_DoAccessKernel
2015 */
2016 void
2017 raidstart(RF_Raid_t *raidPtr)
2018 {
2019 struct raid_softc *rs;
2020 struct dk_softc *dksc;
2021
2022 rs = raidPtr->softc;
2023 dksc = &rs->sc_dksc;
2024 /* quick check to see if anything has died recently */
2025 rf_lock_mutex2(raidPtr->mutex);
2026 if (raidPtr->numNewFailures > 0) {
2027 rf_unlock_mutex2(raidPtr->mutex);
2028 rf_update_component_labels(raidPtr,
2029 RF_NORMAL_COMPONENT_UPDATE);
2030 rf_lock_mutex2(raidPtr->mutex);
2031 raidPtr->numNewFailures--;
2032 }
2033 rf_unlock_mutex2(raidPtr->mutex);
2034
2035 if ((rs->sc_flags & RAIDF_INITED) == 0) {
2036 printf("raid%d: raidstart not ready\n", raidPtr->raidid);
2037 return;
2038 }
2039
2040 dk_start(dksc, NULL);
2041 }
2042
2043 static int
2044 raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp)
2045 {
2046 RF_SectorCount_t num_blocks, pb, sum;
2047 RF_RaidAddr_t raid_addr;
2048 daddr_t blocknum;
2049 int do_async;
2050 int rc;
2051
2052 rf_lock_mutex2(raidPtr->mutex);
2053 if (raidPtr->openings == 0) {
2054 rf_unlock_mutex2(raidPtr->mutex);
2055 return EAGAIN;
2056 }
2057 rf_unlock_mutex2(raidPtr->mutex);
2058
2059 blocknum = bp->b_rawblkno;
2060
2061 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
2062 (int) blocknum));
2063
2064 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
2065 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
2066
2067 /* *THIS* is where we adjust what block we're going to...
2068 * but DO NOT TOUCH bp->b_blkno!!! */
2069 raid_addr = blocknum;
2070
2071 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
2072 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
2073 sum = raid_addr + num_blocks + pb;
2074 if (1 || rf_debugKernelAccess) {
2075 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
2076 (int) raid_addr, (int) sum, (int) num_blocks,
2077 (int) pb, (int) bp->b_resid));
2078 }
2079 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
2080 || (sum < num_blocks) || (sum < pb)) {
2081 rc = ENOSPC;
2082 goto done;
2083 }
2084 /*
2085 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
2086 */
2087
2088 if (bp->b_bcount & raidPtr->sectorMask) {
2089 rc = ENOSPC;
2090 goto done;
2091 }
2092 db1_printf(("Calling DoAccess..\n"));
2093
2094
2095 rf_lock_mutex2(raidPtr->mutex);
2096 raidPtr->openings--;
2097 rf_unlock_mutex2(raidPtr->mutex);
2098
2099 /*
2100 * Everything is async.
2101 */
2102 do_async = 1;
2103
2104 /* don't ever condition on bp->b_flags & B_WRITE.
2105 * always condition on B_READ instead */
2106
2107 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
2108 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
2109 do_async, raid_addr, num_blocks,
2110 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
2111
2112 done:
2113 return rc;
2114 }
2115
2116 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
2117
2118 int
2119 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
2120 {
2121 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2122 struct buf *bp;
2123
2124 req->queue = queue;
2125 bp = req->bp;
2126
2127 switch (req->type) {
2128 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2129 /* XXX need to do something extra here.. */
2130 /* I'm leaving this in, as I've never actually seen it used,
2131 * and I'd like folks to report it... GO */
2132 printf(("WAKEUP CALLED\n"));
2133 queue->numOutstanding++;
2134
2135 bp->b_flags = 0;
2136 bp->b_private = req;
2137
2138 KernelWakeupFunc(bp);
2139 break;
2140
2141 case RF_IO_TYPE_READ:
2142 case RF_IO_TYPE_WRITE:
2143 #if RF_ACC_TRACE > 0
2144 if (req->tracerec) {
2145 RF_ETIMER_START(req->tracerec->timer);
2146 }
2147 #endif
2148 InitBP(bp, queue->rf_cinfo->ci_vp,
2149 op, queue->rf_cinfo->ci_dev,
2150 req->sectorOffset, req->numSector,
2151 req->buf, KernelWakeupFunc, (void *) req,
2152 queue->raidPtr->logBytesPerSector, req->b_proc);
2153
2154 if (rf_debugKernelAccess) {
2155 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2156 (long) bp->b_blkno));
2157 }
2158 queue->numOutstanding++;
2159 queue->last_deq_sector = req->sectorOffset;
2160 /* acc wouldn't have been let in if there were any pending
2161 * reqs at any other priority */
2162 queue->curPriority = req->priority;
2163
2164 db1_printf(("Going for %c to unit %d col %d\n",
2165 req->type, queue->raidPtr->raidid,
2166 queue->col));
2167 db1_printf(("sector %d count %d (%d bytes) %d\n",
2168 (int) req->sectorOffset, (int) req->numSector,
2169 (int) (req->numSector <<
2170 queue->raidPtr->logBytesPerSector),
2171 (int) queue->raidPtr->logBytesPerSector));
2172
2173 /*
2174 * XXX: drop lock here since this can block at
2175 * least with backing SCSI devices. Retake it
2176 * to minimize fuss with calling interfaces.
2177 */
2178
2179 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2180 bdev_strategy(bp);
2181 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2182 break;
2183
2184 default:
2185 panic("bad req->type in rf_DispatchKernelIO");
2186 }
2187 db1_printf(("Exiting from DispatchKernelIO\n"));
2188
2189 return (0);
2190 }
2191 /* this is the callback function associated with a I/O invoked from
2192 kernel code.
2193 */
2194 static void
2195 KernelWakeupFunc(struct buf *bp)
2196 {
2197 RF_DiskQueueData_t *req = NULL;
2198 RF_DiskQueue_t *queue;
2199
2200 db1_printf(("recovering the request queue:\n"));
2201
2202 req = bp->b_private;
2203
2204 queue = (RF_DiskQueue_t *) req->queue;
2205
2206 rf_lock_mutex2(queue->raidPtr->iodone_lock);
2207
2208 #if RF_ACC_TRACE > 0
2209 if (req->tracerec) {
2210 RF_ETIMER_STOP(req->tracerec->timer);
2211 RF_ETIMER_EVAL(req->tracerec->timer);
2212 rf_lock_mutex2(rf_tracing_mutex);
2213 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2214 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2215 req->tracerec->num_phys_ios++;
2216 rf_unlock_mutex2(rf_tracing_mutex);
2217 }
2218 #endif
2219
2220 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2221 * ballistic, and mark the component as hosed... */
2222
2223 if (bp->b_error != 0) {
2224 /* Mark the disk as dead */
2225 /* but only mark it once... */
2226 /* and only if it wouldn't leave this RAID set
2227 completely broken */
2228 if (((queue->raidPtr->Disks[queue->col].status ==
2229 rf_ds_optimal) ||
2230 (queue->raidPtr->Disks[queue->col].status ==
2231 rf_ds_used_spare)) &&
2232 (queue->raidPtr->numFailures <
2233 queue->raidPtr->Layout.map->faultsTolerated)) {
2234 printf("raid%d: IO Error (%d). Marking %s as failed.\n",
2235 queue->raidPtr->raidid,
2236 bp->b_error,
2237 queue->raidPtr->Disks[queue->col].devname);
2238 queue->raidPtr->Disks[queue->col].status =
2239 rf_ds_failed;
2240 queue->raidPtr->status = rf_rs_degraded;
2241 queue->raidPtr->numFailures++;
2242 queue->raidPtr->numNewFailures++;
2243 } else { /* Disk is already dead... */
2244 /* printf("Disk already marked as dead!\n"); */
2245 }
2246
2247 }
2248
2249 /* Fill in the error value */
2250 req->error = bp->b_error;
2251
2252 /* Drop this one on the "finished" queue... */
2253 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2254
2255 /* Let the raidio thread know there is work to be done. */
2256 rf_signal_cond2(queue->raidPtr->iodone_cv);
2257
2258 rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2259 }
2260
2261
2262 /*
2263 * initialize a buf structure for doing an I/O in the kernel.
2264 */
2265 static void
2266 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2267 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2268 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2269 struct proc *b_proc)
2270 {
2271 /* bp->b_flags = B_PHYS | rw_flag; */
2272 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */
2273 bp->b_oflags = 0;
2274 bp->b_cflags = 0;
2275 bp->b_bcount = numSect << logBytesPerSector;
2276 bp->b_bufsize = bp->b_bcount;
2277 bp->b_error = 0;
2278 bp->b_dev = dev;
2279 bp->b_data = bf;
2280 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2281 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2282 if (bp->b_bcount == 0) {
2283 panic("bp->b_bcount is zero in InitBP!!");
2284 }
2285 bp->b_proc = b_proc;
2286 bp->b_iodone = cbFunc;
2287 bp->b_private = cbArg;
2288 }
2289
2290 /*
2291 * Wait interruptibly for an exclusive lock.
2292 *
2293 * XXX
2294 * Several drivers do this; it should be abstracted and made MP-safe.
2295 * (Hmm... where have we seen this warning before :-> GO )
2296 */
2297 static int
2298 raidlock(struct raid_softc *rs)
2299 {
2300 int error;
2301
2302 error = 0;
2303 mutex_enter(&rs->sc_mutex);
2304 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2305 rs->sc_flags |= RAIDF_WANTED;
2306 error = cv_wait_sig(&rs->sc_cv, &rs->sc_mutex);
2307 if (error != 0)
2308 goto done;
2309 }
2310 rs->sc_flags |= RAIDF_LOCKED;
2311 done:
2312 mutex_exit(&rs->sc_mutex);
2313 return (error);
2314 }
2315 /*
2316 * Unlock and wake up any waiters.
2317 */
2318 static void
2319 raidunlock(struct raid_softc *rs)
2320 {
2321
2322 mutex_enter(&rs->sc_mutex);
2323 rs->sc_flags &= ~RAIDF_LOCKED;
2324 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2325 rs->sc_flags &= ~RAIDF_WANTED;
2326 cv_broadcast(&rs->sc_cv);
2327 }
2328 mutex_exit(&rs->sc_mutex);
2329 }
2330
2331
2332 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2333 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2334 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2335
2336 static daddr_t
2337 rf_component_info_offset(void)
2338 {
2339
2340 return RF_COMPONENT_INFO_OFFSET;
2341 }
2342
2343 static daddr_t
2344 rf_component_info_size(unsigned secsize)
2345 {
2346 daddr_t info_size;
2347
2348 KASSERT(secsize);
2349 if (secsize > RF_COMPONENT_INFO_SIZE)
2350 info_size = secsize;
2351 else
2352 info_size = RF_COMPONENT_INFO_SIZE;
2353
2354 return info_size;
2355 }
2356
2357 static daddr_t
2358 rf_parity_map_offset(RF_Raid_t *raidPtr)
2359 {
2360 daddr_t map_offset;
2361
2362 KASSERT(raidPtr->bytesPerSector);
2363 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2364 map_offset = raidPtr->bytesPerSector;
2365 else
2366 map_offset = RF_COMPONENT_INFO_SIZE;
2367 map_offset += rf_component_info_offset();
2368
2369 return map_offset;
2370 }
2371
2372 static daddr_t
2373 rf_parity_map_size(RF_Raid_t *raidPtr)
2374 {
2375 daddr_t map_size;
2376
2377 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2378 map_size = raidPtr->bytesPerSector;
2379 else
2380 map_size = RF_PARITY_MAP_SIZE;
2381
2382 return map_size;
2383 }
2384
2385 int
2386 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2387 {
2388 RF_ComponentLabel_t *clabel;
2389
2390 clabel = raidget_component_label(raidPtr, col);
2391 clabel->clean = RF_RAID_CLEAN;
2392 raidflush_component_label(raidPtr, col);
2393 return(0);
2394 }
2395
2396
2397 int
2398 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2399 {
2400 RF_ComponentLabel_t *clabel;
2401
2402 clabel = raidget_component_label(raidPtr, col);
2403 clabel->clean = RF_RAID_DIRTY;
2404 raidflush_component_label(raidPtr, col);
2405 return(0);
2406 }
2407
2408 int
2409 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2410 {
2411 KASSERT(raidPtr->bytesPerSector);
2412 return raidread_component_label(raidPtr->bytesPerSector,
2413 raidPtr->Disks[col].dev,
2414 raidPtr->raid_cinfo[col].ci_vp,
2415 &raidPtr->raid_cinfo[col].ci_label);
2416 }
2417
2418 RF_ComponentLabel_t *
2419 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2420 {
2421 return &raidPtr->raid_cinfo[col].ci_label;
2422 }
2423
2424 int
2425 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2426 {
2427 RF_ComponentLabel_t *label;
2428
2429 label = &raidPtr->raid_cinfo[col].ci_label;
2430 label->mod_counter = raidPtr->mod_counter;
2431 #ifndef RF_NO_PARITY_MAP
2432 label->parity_map_modcount = label->mod_counter;
2433 #endif
2434 return raidwrite_component_label(raidPtr->bytesPerSector,
2435 raidPtr->Disks[col].dev,
2436 raidPtr->raid_cinfo[col].ci_vp, label);
2437 }
2438
2439
2440 static int
2441 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2442 RF_ComponentLabel_t *clabel)
2443 {
2444 return raidread_component_area(dev, b_vp, clabel,
2445 sizeof(RF_ComponentLabel_t),
2446 rf_component_info_offset(),
2447 rf_component_info_size(secsize));
2448 }
2449
2450 /* ARGSUSED */
2451 static int
2452 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2453 size_t msize, daddr_t offset, daddr_t dsize)
2454 {
2455 struct buf *bp;
2456 int error;
2457
2458 /* XXX should probably ensure that we don't try to do this if
2459 someone has changed rf_protected_sectors. */
2460
2461 if (b_vp == NULL) {
2462 /* For whatever reason, this component is not valid.
2463 Don't try to read a component label from it. */
2464 return(EINVAL);
2465 }
2466
2467 /* get a block of the appropriate size... */
2468 bp = geteblk((int)dsize);
2469 bp->b_dev = dev;
2470
2471 /* get our ducks in a row for the read */
2472 bp->b_blkno = offset / DEV_BSIZE;
2473 bp->b_bcount = dsize;
2474 bp->b_flags |= B_READ;
2475 bp->b_resid = dsize;
2476
2477 bdev_strategy(bp);
2478 error = biowait(bp);
2479
2480 if (!error) {
2481 memcpy(data, bp->b_data, msize);
2482 }
2483
2484 brelse(bp, 0);
2485 return(error);
2486 }
2487
2488
2489 static int
2490 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2491 RF_ComponentLabel_t *clabel)
2492 {
2493 return raidwrite_component_area(dev, b_vp, clabel,
2494 sizeof(RF_ComponentLabel_t),
2495 rf_component_info_offset(),
2496 rf_component_info_size(secsize), 0);
2497 }
2498
2499 /* ARGSUSED */
2500 static int
2501 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2502 size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2503 {
2504 struct buf *bp;
2505 int error;
2506
2507 /* get a block of the appropriate size... */
2508 bp = geteblk((int)dsize);
2509 bp->b_dev = dev;
2510
2511 /* get our ducks in a row for the write */
2512 bp->b_blkno = offset / DEV_BSIZE;
2513 bp->b_bcount = dsize;
2514 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2515 bp->b_resid = dsize;
2516
2517 memset(bp->b_data, 0, dsize);
2518 memcpy(bp->b_data, data, msize);
2519
2520 bdev_strategy(bp);
2521 if (asyncp)
2522 return 0;
2523 error = biowait(bp);
2524 brelse(bp, 0);
2525 if (error) {
2526 #if 1
2527 printf("Failed to write RAID component info!\n");
2528 #endif
2529 }
2530
2531 return(error);
2532 }
2533
2534 void
2535 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2536 {
2537 int c;
2538
2539 for (c = 0; c < raidPtr->numCol; c++) {
2540 /* Skip dead disks. */
2541 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2542 continue;
2543 /* XXXjld: what if an error occurs here? */
2544 raidwrite_component_area(raidPtr->Disks[c].dev,
2545 raidPtr->raid_cinfo[c].ci_vp, map,
2546 RF_PARITYMAP_NBYTE,
2547 rf_parity_map_offset(raidPtr),
2548 rf_parity_map_size(raidPtr), 0);
2549 }
2550 }
2551
2552 void
2553 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2554 {
2555 struct rf_paritymap_ondisk tmp;
2556 int c,first;
2557
2558 first=1;
2559 for (c = 0; c < raidPtr->numCol; c++) {
2560 /* Skip dead disks. */
2561 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2562 continue;
2563 raidread_component_area(raidPtr->Disks[c].dev,
2564 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2565 RF_PARITYMAP_NBYTE,
2566 rf_parity_map_offset(raidPtr),
2567 rf_parity_map_size(raidPtr));
2568 if (first) {
2569 memcpy(map, &tmp, sizeof(*map));
2570 first = 0;
2571 } else {
2572 rf_paritymap_merge(map, &tmp);
2573 }
2574 }
2575 }
2576
2577 void
2578 rf_markalldirty(RF_Raid_t *raidPtr)
2579 {
2580 RF_ComponentLabel_t *clabel;
2581 int sparecol;
2582 int c;
2583 int j;
2584 int scol = -1;
2585
2586 raidPtr->mod_counter++;
2587 for (c = 0; c < raidPtr->numCol; c++) {
2588 /* we don't want to touch (at all) a disk that has
2589 failed */
2590 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2591 clabel = raidget_component_label(raidPtr, c);
2592 if (clabel->status == rf_ds_spared) {
2593 /* XXX do something special...
2594 but whatever you do, don't
2595 try to access it!! */
2596 } else {
2597 raidmarkdirty(raidPtr, c);
2598 }
2599 }
2600 }
2601
2602 for( c = 0; c < raidPtr->numSpare ; c++) {
2603 sparecol = raidPtr->numCol + c;
2604 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2605 /*
2606
2607 we claim this disk is "optimal" if it's
2608 rf_ds_used_spare, as that means it should be
2609 directly substitutable for the disk it replaced.
2610 We note that too...
2611
2612 */
2613
2614 for(j=0;j<raidPtr->numCol;j++) {
2615 if (raidPtr->Disks[j].spareCol == sparecol) {
2616 scol = j;
2617 break;
2618 }
2619 }
2620
2621 clabel = raidget_component_label(raidPtr, sparecol);
2622 /* make sure status is noted */
2623
2624 raid_init_component_label(raidPtr, clabel);
2625
2626 clabel->row = 0;
2627 clabel->column = scol;
2628 /* Note: we *don't* change status from rf_ds_used_spare
2629 to rf_ds_optimal */
2630 /* clabel.status = rf_ds_optimal; */
2631
2632 raidmarkdirty(raidPtr, sparecol);
2633 }
2634 }
2635 }
2636
2637
2638 void
2639 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2640 {
2641 RF_ComponentLabel_t *clabel;
2642 int sparecol;
2643 int c;
2644 int j;
2645 int scol;
2646 struct raid_softc *rs = raidPtr->softc;
2647
2648 scol = -1;
2649
2650 /* XXX should do extra checks to make sure things really are clean,
2651 rather than blindly setting the clean bit... */
2652
2653 raidPtr->mod_counter++;
2654
2655 for (c = 0; c < raidPtr->numCol; c++) {
2656 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2657 clabel = raidget_component_label(raidPtr, c);
2658 /* make sure status is noted */
2659 clabel->status = rf_ds_optimal;
2660
2661 /* note what unit we are configured as */
2662 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2663 clabel->last_unit = raidPtr->raidid;
2664
2665 raidflush_component_label(raidPtr, c);
2666 if (final == RF_FINAL_COMPONENT_UPDATE) {
2667 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2668 raidmarkclean(raidPtr, c);
2669 }
2670 }
2671 }
2672 /* else we don't touch it.. */
2673 }
2674
2675 for( c = 0; c < raidPtr->numSpare ; c++) {
2676 sparecol = raidPtr->numCol + c;
2677 /* Need to ensure that the reconstruct actually completed! */
2678 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2679 /*
2680
2681 we claim this disk is "optimal" if it's
2682 rf_ds_used_spare, as that means it should be
2683 directly substitutable for the disk it replaced.
2684 We note that too...
2685
2686 */
2687
2688 for(j=0;j<raidPtr->numCol;j++) {
2689 if (raidPtr->Disks[j].spareCol == sparecol) {
2690 scol = j;
2691 break;
2692 }
2693 }
2694
2695 /* XXX shouldn't *really* need this... */
2696 clabel = raidget_component_label(raidPtr, sparecol);
2697 /* make sure status is noted */
2698
2699 raid_init_component_label(raidPtr, clabel);
2700
2701 clabel->column = scol;
2702 clabel->status = rf_ds_optimal;
2703 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2704 clabel->last_unit = raidPtr->raidid;
2705
2706 raidflush_component_label(raidPtr, sparecol);
2707 if (final == RF_FINAL_COMPONENT_UPDATE) {
2708 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2709 raidmarkclean(raidPtr, sparecol);
2710 }
2711 }
2712 }
2713 }
2714 }
2715
2716 void
2717 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2718 {
2719
2720 if (vp != NULL) {
2721 if (auto_configured == 1) {
2722 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2723 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2724 vput(vp);
2725
2726 } else {
2727 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2728 }
2729 }
2730 }
2731
2732
2733 void
2734 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2735 {
2736 int r,c;
2737 struct vnode *vp;
2738 int acd;
2739
2740
2741 /* We take this opportunity to close the vnodes like we should.. */
2742
2743 for (c = 0; c < raidPtr->numCol; c++) {
2744 vp = raidPtr->raid_cinfo[c].ci_vp;
2745 acd = raidPtr->Disks[c].auto_configured;
2746 rf_close_component(raidPtr, vp, acd);
2747 raidPtr->raid_cinfo[c].ci_vp = NULL;
2748 raidPtr->Disks[c].auto_configured = 0;
2749 }
2750
2751 for (r = 0; r < raidPtr->numSpare; r++) {
2752 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2753 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2754 rf_close_component(raidPtr, vp, acd);
2755 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2756 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2757 }
2758 }
2759
2760
2761 void
2762 rf_ReconThread(struct rf_recon_req_internal *req)
2763 {
2764 int s;
2765 RF_Raid_t *raidPtr;
2766
2767 s = splbio();
2768 raidPtr = (RF_Raid_t *) req->raidPtr;
2769 raidPtr->recon_in_progress = 1;
2770
2771 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2772 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2773
2774 RF_Free(req, sizeof(*req));
2775
2776 raidPtr->recon_in_progress = 0;
2777 splx(s);
2778
2779 /* That's all... */
2780 kthread_exit(0); /* does not return */
2781 }
2782
2783 void
2784 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2785 {
2786 int retcode;
2787 int s;
2788
2789 raidPtr->parity_rewrite_stripes_done = 0;
2790 raidPtr->parity_rewrite_in_progress = 1;
2791 s = splbio();
2792 retcode = rf_RewriteParity(raidPtr);
2793 splx(s);
2794 if (retcode) {
2795 printf("raid%d: Error re-writing parity (%d)!\n",
2796 raidPtr->raidid, retcode);
2797 } else {
2798 /* set the clean bit! If we shutdown correctly,
2799 the clean bit on each component label will get
2800 set */
2801 raidPtr->parity_good = RF_RAID_CLEAN;
2802 }
2803 raidPtr->parity_rewrite_in_progress = 0;
2804
2805 /* Anyone waiting for us to stop? If so, inform them... */
2806 if (raidPtr->waitShutdown) {
2807 rf_lock_mutex2(raidPtr->rad_lock);
2808 cv_broadcast(&raidPtr->parity_rewrite_cv);
2809 rf_unlock_mutex2(raidPtr->rad_lock);
2810 }
2811
2812 /* That's all... */
2813 kthread_exit(0); /* does not return */
2814 }
2815
2816
2817 void
2818 rf_CopybackThread(RF_Raid_t *raidPtr)
2819 {
2820 int s;
2821
2822 raidPtr->copyback_in_progress = 1;
2823 s = splbio();
2824 rf_CopybackReconstructedData(raidPtr);
2825 splx(s);
2826 raidPtr->copyback_in_progress = 0;
2827
2828 /* That's all... */
2829 kthread_exit(0); /* does not return */
2830 }
2831
2832
2833 void
2834 rf_ReconstructInPlaceThread(struct rf_recon_req_internal *req)
2835 {
2836 int s;
2837 RF_Raid_t *raidPtr;
2838
2839 s = splbio();
2840 raidPtr = req->raidPtr;
2841 raidPtr->recon_in_progress = 1;
2842 rf_ReconstructInPlace(raidPtr, req->col);
2843 RF_Free(req, sizeof(*req));
2844 raidPtr->recon_in_progress = 0;
2845 splx(s);
2846
2847 /* That's all... */
2848 kthread_exit(0); /* does not return */
2849 }
2850
2851 static RF_AutoConfig_t *
2852 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
2853 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
2854 unsigned secsize)
2855 {
2856 int good_one = 0;
2857 RF_ComponentLabel_t *clabel;
2858 RF_AutoConfig_t *ac;
2859
2860 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_WAITOK);
2861
2862 if (!raidread_component_label(secsize, dev, vp, clabel)) {
2863 /* Got the label. Does it look reasonable? */
2864 if (rf_reasonable_label(clabel, numsecs) &&
2865 (rf_component_label_partitionsize(clabel) <= size)) {
2866 #ifdef DEBUG
2867 printf("Component on: %s: %llu\n",
2868 cname, (unsigned long long)size);
2869 rf_print_component_label(clabel);
2870 #endif
2871 /* if it's reasonable, add it, else ignore it. */
2872 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
2873 M_WAITOK);
2874 strlcpy(ac->devname, cname, sizeof(ac->devname));
2875 ac->dev = dev;
2876 ac->vp = vp;
2877 ac->clabel = clabel;
2878 ac->next = ac_list;
2879 ac_list = ac;
2880 good_one = 1;
2881 }
2882 }
2883 if (!good_one) {
2884 /* cleanup */
2885 free(clabel, M_RAIDFRAME);
2886 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2887 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2888 vput(vp);
2889 }
2890 return ac_list;
2891 }
2892
2893 RF_AutoConfig_t *
2894 rf_find_raid_components(void)
2895 {
2896 struct vnode *vp;
2897 struct disklabel label;
2898 device_t dv;
2899 deviter_t di;
2900 dev_t dev;
2901 int bmajor, bminor, wedge, rf_part_found;
2902 int error;
2903 int i;
2904 RF_AutoConfig_t *ac_list;
2905 uint64_t numsecs;
2906 unsigned secsize;
2907 int dowedges;
2908
2909 /* initialize the AutoConfig list */
2910 ac_list = NULL;
2911
2912 /*
2913 * we begin by trolling through *all* the devices on the system *twice*
2914 * first we scan for wedges, second for other devices. This avoids
2915 * using a raw partition instead of a wedge that covers the whole disk
2916 */
2917
2918 for (dowedges=1; dowedges>=0; --dowedges) {
2919 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
2920 dv = deviter_next(&di)) {
2921
2922 /* we are only interested in disks... */
2923 if (device_class(dv) != DV_DISK)
2924 continue;
2925
2926 /* we don't care about floppies... */
2927 if (device_is_a(dv, "fd")) {
2928 continue;
2929 }
2930
2931 /* we don't care about CD's... */
2932 if (device_is_a(dv, "cd")) {
2933 continue;
2934 }
2935
2936 /* we don't care about md's... */
2937 if (device_is_a(dv, "md")) {
2938 continue;
2939 }
2940
2941 /* hdfd is the Atari/Hades floppy driver */
2942 if (device_is_a(dv, "hdfd")) {
2943 continue;
2944 }
2945
2946 /* fdisa is the Atari/Milan floppy driver */
2947 if (device_is_a(dv, "fdisa")) {
2948 continue;
2949 }
2950
2951 /* are we in the wedges pass ? */
2952 wedge = device_is_a(dv, "dk");
2953 if (wedge != dowedges) {
2954 continue;
2955 }
2956
2957 /* need to find the device_name_to_block_device_major stuff */
2958 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
2959
2960 rf_part_found = 0; /*No raid partition as yet*/
2961
2962 /* get a vnode for the raw partition of this disk */
2963 bminor = minor(device_unit(dv));
2964 dev = wedge ? makedev(bmajor, bminor) :
2965 MAKEDISKDEV(bmajor, bminor, RAW_PART);
2966 if (bdevvp(dev, &vp))
2967 panic("RAID can't alloc vnode");
2968
2969 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2970 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
2971
2972 if (error) {
2973 /* "Who cares." Continue looking
2974 for something that exists*/
2975 vput(vp);
2976 continue;
2977 }
2978
2979 error = getdisksize(vp, &numsecs, &secsize);
2980 if (error) {
2981 /*
2982 * Pseudo devices like vnd and cgd can be
2983 * opened but may still need some configuration.
2984 * Ignore these quietly.
2985 */
2986 if (error != ENXIO)
2987 printf("RAIDframe: can't get disk size"
2988 " for dev %s (%d)\n",
2989 device_xname(dv), error);
2990 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2991 vput(vp);
2992 continue;
2993 }
2994 if (wedge) {
2995 struct dkwedge_info dkw;
2996 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
2997 NOCRED);
2998 if (error) {
2999 printf("RAIDframe: can't get wedge info for "
3000 "dev %s (%d)\n", device_xname(dv), error);
3001 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3002 vput(vp);
3003 continue;
3004 }
3005
3006 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
3007 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3008 vput(vp);
3009 continue;
3010 }
3011
3012 VOP_UNLOCK(vp);
3013 ac_list = rf_get_component(ac_list, dev, vp,
3014 device_xname(dv), dkw.dkw_size, numsecs, secsize);
3015 rf_part_found = 1; /*There is a raid component on this disk*/
3016 continue;
3017 }
3018
3019 /* Ok, the disk exists. Go get the disklabel. */
3020 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
3021 if (error) {
3022 /*
3023 * XXX can't happen - open() would
3024 * have errored out (or faked up one)
3025 */
3026 if (error != ENOTTY)
3027 printf("RAIDframe: can't get label for dev "
3028 "%s (%d)\n", device_xname(dv), error);
3029 }
3030
3031 /* don't need this any more. We'll allocate it again
3032 a little later if we really do... */
3033 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3034 vput(vp);
3035
3036 if (error)
3037 continue;
3038
3039 rf_part_found = 0; /*No raid partitions yet*/
3040 for (i = 0; i < label.d_npartitions; i++) {
3041 char cname[sizeof(ac_list->devname)];
3042
3043 /* We only support partitions marked as RAID */
3044 if (label.d_partitions[i].p_fstype != FS_RAID)
3045 continue;
3046
3047 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
3048 if (bdevvp(dev, &vp))
3049 panic("RAID can't alloc vnode");
3050
3051 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3052 error = VOP_OPEN(vp, FREAD, NOCRED);
3053 if (error) {
3054 /* Whatever... */
3055 vput(vp);
3056 continue;
3057 }
3058 VOP_UNLOCK(vp);
3059 snprintf(cname, sizeof(cname), "%s%c",
3060 device_xname(dv), 'a' + i);
3061 ac_list = rf_get_component(ac_list, dev, vp, cname,
3062 label.d_partitions[i].p_size, numsecs, secsize);
3063 rf_part_found = 1; /*There is at least one raid partition on this disk*/
3064 }
3065
3066 /*
3067 *If there is no raid component on this disk, either in a
3068 *disklabel or inside a wedge, check the raw partition as well,
3069 *as it is possible to configure raid components on raw disk
3070 *devices.
3071 */
3072
3073 if (!rf_part_found) {
3074 char cname[sizeof(ac_list->devname)];
3075
3076 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
3077 if (bdevvp(dev, &vp))
3078 panic("RAID can't alloc vnode");
3079
3080 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3081
3082 error = VOP_OPEN(vp, FREAD, NOCRED);
3083 if (error) {
3084 /* Whatever... */
3085 vput(vp);
3086 continue;
3087 }
3088 VOP_UNLOCK(vp);
3089 snprintf(cname, sizeof(cname), "%s%c",
3090 device_xname(dv), 'a' + RAW_PART);
3091 ac_list = rf_get_component(ac_list, dev, vp, cname,
3092 label.d_partitions[RAW_PART].p_size, numsecs, secsize);
3093 }
3094 }
3095 deviter_release(&di);
3096 }
3097 return ac_list;
3098 }
3099
3100
3101 int
3102 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3103 {
3104
3105 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
3106 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
3107 ((clabel->clean == RF_RAID_CLEAN) ||
3108 (clabel->clean == RF_RAID_DIRTY)) &&
3109 clabel->row >=0 &&
3110 clabel->column >= 0 &&
3111 clabel->num_rows > 0 &&
3112 clabel->num_columns > 0 &&
3113 clabel->row < clabel->num_rows &&
3114 clabel->column < clabel->num_columns &&
3115 clabel->blockSize > 0 &&
3116 /*
3117 * numBlocksHi may contain garbage, but it is ok since
3118 * the type is unsigned. If it is really garbage,
3119 * rf_fix_old_label_size() will fix it.
3120 */
3121 rf_component_label_numblocks(clabel) > 0) {
3122 /*
3123 * label looks reasonable enough...
3124 * let's make sure it has no old garbage.
3125 */
3126 if (numsecs)
3127 rf_fix_old_label_size(clabel, numsecs);
3128 return(1);
3129 }
3130 return(0);
3131 }
3132
3133
3134 /*
3135 * For reasons yet unknown, some old component labels have garbage in
3136 * the newer numBlocksHi region, and this causes lossage. Since those
3137 * disks will also have numsecs set to less than 32 bits of sectors,
3138 * we can determine when this corruption has occurred, and fix it.
3139 *
3140 * The exact same problem, with the same unknown reason, happens to
3141 * the partitionSizeHi member as well.
3142 */
3143 static void
3144 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3145 {
3146
3147 if (numsecs < ((uint64_t)1 << 32)) {
3148 if (clabel->numBlocksHi) {
3149 printf("WARNING: total sectors < 32 bits, yet "
3150 "numBlocksHi set\n"
3151 "WARNING: resetting numBlocksHi to zero.\n");
3152 clabel->numBlocksHi = 0;
3153 }
3154
3155 if (clabel->partitionSizeHi) {
3156 printf("WARNING: total sectors < 32 bits, yet "
3157 "partitionSizeHi set\n"
3158 "WARNING: resetting partitionSizeHi to zero.\n");
3159 clabel->partitionSizeHi = 0;
3160 }
3161 }
3162 }
3163
3164
3165 #ifdef DEBUG
3166 void
3167 rf_print_component_label(RF_ComponentLabel_t *clabel)
3168 {
3169 uint64_t numBlocks;
3170 static const char *rp[] = {
3171 "No", "Force", "Soft", "*invalid*"
3172 };
3173
3174
3175 numBlocks = rf_component_label_numblocks(clabel);
3176
3177 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3178 clabel->row, clabel->column,
3179 clabel->num_rows, clabel->num_columns);
3180 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3181 clabel->version, clabel->serial_number,
3182 clabel->mod_counter);
3183 printf(" Clean: %s Status: %d\n",
3184 clabel->clean ? "Yes" : "No", clabel->status);
3185 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3186 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3187 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3188 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3189 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3190 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]);
3191 printf(" Last configured as: raid%d\n", clabel->last_unit);
3192 #if 0
3193 printf(" Config order: %d\n", clabel->config_order);
3194 #endif
3195
3196 }
3197 #endif
3198
3199 RF_ConfigSet_t *
3200 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3201 {
3202 RF_AutoConfig_t *ac;
3203 RF_ConfigSet_t *config_sets;
3204 RF_ConfigSet_t *cset;
3205 RF_AutoConfig_t *ac_next;
3206
3207
3208 config_sets = NULL;
3209
3210 /* Go through the AutoConfig list, and figure out which components
3211 belong to what sets. */
3212 ac = ac_list;
3213 while(ac!=NULL) {
3214 /* we're going to putz with ac->next, so save it here
3215 for use at the end of the loop */
3216 ac_next = ac->next;
3217
3218 if (config_sets == NULL) {
3219 /* will need at least this one... */
3220 config_sets = malloc(sizeof(RF_ConfigSet_t),
3221 M_RAIDFRAME, M_WAITOK);
3222 /* this one is easy :) */
3223 config_sets->ac = ac;
3224 config_sets->next = NULL;
3225 config_sets->rootable = 0;
3226 ac->next = NULL;
3227 } else {
3228 /* which set does this component fit into? */
3229 cset = config_sets;
3230 while(cset!=NULL) {
3231 if (rf_does_it_fit(cset, ac)) {
3232 /* looks like it matches... */
3233 ac->next = cset->ac;
3234 cset->ac = ac;
3235 break;
3236 }
3237 cset = cset->next;
3238 }
3239 if (cset==NULL) {
3240 /* didn't find a match above... new set..*/
3241 cset = malloc(sizeof(RF_ConfigSet_t),
3242 M_RAIDFRAME, M_WAITOK);
3243 cset->ac = ac;
3244 ac->next = NULL;
3245 cset->next = config_sets;
3246 cset->rootable = 0;
3247 config_sets = cset;
3248 }
3249 }
3250 ac = ac_next;
3251 }
3252
3253
3254 return(config_sets);
3255 }
3256
3257 static int
3258 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3259 {
3260 RF_ComponentLabel_t *clabel1, *clabel2;
3261
3262 /* If this one matches the *first* one in the set, that's good
3263 enough, since the other members of the set would have been
3264 through here too... */
3265 /* note that we are not checking partitionSize here..
3266
3267 Note that we are also not checking the mod_counters here.
3268 If everything else matches except the mod_counter, that's
3269 good enough for this test. We will deal with the mod_counters
3270 a little later in the autoconfiguration process.
3271
3272 (clabel1->mod_counter == clabel2->mod_counter) &&
3273
3274 The reason we don't check for this is that failed disks
3275 will have lower modification counts. If those disks are
3276 not added to the set they used to belong to, then they will
3277 form their own set, which may result in 2 different sets,
3278 for example, competing to be configured at raid0, and
3279 perhaps competing to be the root filesystem set. If the
3280 wrong ones get configured, or both attempt to become /,
3281 weird behaviour and or serious lossage will occur. Thus we
3282 need to bring them into the fold here, and kick them out at
3283 a later point.
3284
3285 */
3286
3287 clabel1 = cset->ac->clabel;
3288 clabel2 = ac->clabel;
3289 if ((clabel1->version == clabel2->version) &&
3290 (clabel1->serial_number == clabel2->serial_number) &&
3291 (clabel1->num_rows == clabel2->num_rows) &&
3292 (clabel1->num_columns == clabel2->num_columns) &&
3293 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3294 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3295 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3296 (clabel1->parityConfig == clabel2->parityConfig) &&
3297 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3298 (clabel1->blockSize == clabel2->blockSize) &&
3299 rf_component_label_numblocks(clabel1) ==
3300 rf_component_label_numblocks(clabel2) &&
3301 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3302 (clabel1->root_partition == clabel2->root_partition) &&
3303 (clabel1->last_unit == clabel2->last_unit) &&
3304 (clabel1->config_order == clabel2->config_order)) {
3305 /* if it get's here, it almost *has* to be a match */
3306 } else {
3307 /* it's not consistent with somebody in the set..
3308 punt */
3309 return(0);
3310 }
3311 /* all was fine.. it must fit... */
3312 return(1);
3313 }
3314
3315 int
3316 rf_have_enough_components(RF_ConfigSet_t *cset)
3317 {
3318 RF_AutoConfig_t *ac;
3319 RF_AutoConfig_t *auto_config;
3320 RF_ComponentLabel_t *clabel;
3321 int c;
3322 int num_cols;
3323 int num_missing;
3324 int mod_counter;
3325 int mod_counter_found;
3326 int even_pair_failed;
3327 char parity_type;
3328
3329
3330 /* check to see that we have enough 'live' components
3331 of this set. If so, we can configure it if necessary */
3332
3333 num_cols = cset->ac->clabel->num_columns;
3334 parity_type = cset->ac->clabel->parityConfig;
3335
3336 /* XXX Check for duplicate components!?!?!? */
3337
3338 /* Determine what the mod_counter is supposed to be for this set. */
3339
3340 mod_counter_found = 0;
3341 mod_counter = 0;
3342 ac = cset->ac;
3343 while(ac!=NULL) {
3344 if (mod_counter_found==0) {
3345 mod_counter = ac->clabel->mod_counter;
3346 mod_counter_found = 1;
3347 } else {
3348 if (ac->clabel->mod_counter > mod_counter) {
3349 mod_counter = ac->clabel->mod_counter;
3350 }
3351 }
3352 ac = ac->next;
3353 }
3354
3355 num_missing = 0;
3356 auto_config = cset->ac;
3357
3358 even_pair_failed = 0;
3359 for(c=0; c<num_cols; c++) {
3360 ac = auto_config;
3361 while(ac!=NULL) {
3362 if ((ac->clabel->column == c) &&
3363 (ac->clabel->mod_counter == mod_counter)) {
3364 /* it's this one... */
3365 #ifdef DEBUG
3366 printf("Found: %s at %d\n",
3367 ac->devname,c);
3368 #endif
3369 break;
3370 }
3371 ac=ac->next;
3372 }
3373 if (ac==NULL) {
3374 /* Didn't find one here! */
3375 /* special case for RAID 1, especially
3376 where there are more than 2
3377 components (where RAIDframe treats
3378 things a little differently :( ) */
3379 if (parity_type == '1') {
3380 if (c%2 == 0) { /* even component */
3381 even_pair_failed = 1;
3382 } else { /* odd component. If
3383 we're failed, and
3384 so is the even
3385 component, it's
3386 "Good Night, Charlie" */
3387 if (even_pair_failed == 1) {
3388 return(0);
3389 }
3390 }
3391 } else {
3392 /* normal accounting */
3393 num_missing++;
3394 }
3395 }
3396 if ((parity_type == '1') && (c%2 == 1)) {
3397 /* Just did an even component, and we didn't
3398 bail.. reset the even_pair_failed flag,
3399 and go on to the next component.... */
3400 even_pair_failed = 0;
3401 }
3402 }
3403
3404 clabel = cset->ac->clabel;
3405
3406 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3407 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3408 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3409 /* XXX this needs to be made *much* more general */
3410 /* Too many failures */
3411 return(0);
3412 }
3413 /* otherwise, all is well, and we've got enough to take a kick
3414 at autoconfiguring this set */
3415 return(1);
3416 }
3417
3418 void
3419 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3420 RF_Raid_t *raidPtr)
3421 {
3422 RF_ComponentLabel_t *clabel;
3423 int i;
3424
3425 clabel = ac->clabel;
3426
3427 /* 1. Fill in the common stuff */
3428 config->numCol = clabel->num_columns;
3429 config->numSpare = 0; /* XXX should this be set here? */
3430 config->sectPerSU = clabel->sectPerSU;
3431 config->SUsPerPU = clabel->SUsPerPU;
3432 config->SUsPerRU = clabel->SUsPerRU;
3433 config->parityConfig = clabel->parityConfig;
3434 /* XXX... */
3435 strcpy(config->diskQueueType,"fifo");
3436 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3437 config->layoutSpecificSize = 0; /* XXX ?? */
3438
3439 while(ac!=NULL) {
3440 /* row/col values will be in range due to the checks
3441 in reasonable_label() */
3442 strcpy(config->devnames[0][ac->clabel->column],
3443 ac->devname);
3444 ac = ac->next;
3445 }
3446
3447 for(i=0;i<RF_MAXDBGV;i++) {
3448 config->debugVars[i][0] = 0;
3449 }
3450 }
3451
3452 int
3453 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3454 {
3455 RF_ComponentLabel_t *clabel;
3456 int column;
3457 int sparecol;
3458
3459 raidPtr->autoconfigure = new_value;
3460
3461 for(column=0; column<raidPtr->numCol; column++) {
3462 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3463 clabel = raidget_component_label(raidPtr, column);
3464 clabel->autoconfigure = new_value;
3465 raidflush_component_label(raidPtr, column);
3466 }
3467 }
3468 for(column = 0; column < raidPtr->numSpare ; column++) {
3469 sparecol = raidPtr->numCol + column;
3470 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3471 clabel = raidget_component_label(raidPtr, sparecol);
3472 clabel->autoconfigure = new_value;
3473 raidflush_component_label(raidPtr, sparecol);
3474 }
3475 }
3476 return(new_value);
3477 }
3478
3479 int
3480 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3481 {
3482 RF_ComponentLabel_t *clabel;
3483 int column;
3484 int sparecol;
3485
3486 raidPtr->root_partition = new_value;
3487 for(column=0; column<raidPtr->numCol; column++) {
3488 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3489 clabel = raidget_component_label(raidPtr, column);
3490 clabel->root_partition = new_value;
3491 raidflush_component_label(raidPtr, column);
3492 }
3493 }
3494 for(column = 0; column < raidPtr->numSpare ; column++) {
3495 sparecol = raidPtr->numCol + column;
3496 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3497 clabel = raidget_component_label(raidPtr, sparecol);
3498 clabel->root_partition = new_value;
3499 raidflush_component_label(raidPtr, sparecol);
3500 }
3501 }
3502 return(new_value);
3503 }
3504
3505 void
3506 rf_release_all_vps(RF_ConfigSet_t *cset)
3507 {
3508 RF_AutoConfig_t *ac;
3509
3510 ac = cset->ac;
3511 while(ac!=NULL) {
3512 /* Close the vp, and give it back */
3513 if (ac->vp) {
3514 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3515 VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
3516 vput(ac->vp);
3517 ac->vp = NULL;
3518 }
3519 ac = ac->next;
3520 }
3521 }
3522
3523
3524 void
3525 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3526 {
3527 RF_AutoConfig_t *ac;
3528 RF_AutoConfig_t *next_ac;
3529
3530 ac = cset->ac;
3531 while(ac!=NULL) {
3532 next_ac = ac->next;
3533 /* nuke the label */
3534 free(ac->clabel, M_RAIDFRAME);
3535 /* cleanup the config structure */
3536 free(ac, M_RAIDFRAME);
3537 /* "next.." */
3538 ac = next_ac;
3539 }
3540 /* and, finally, nuke the config set */
3541 free(cset, M_RAIDFRAME);
3542 }
3543
3544
3545 void
3546 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3547 {
3548 /* current version number */
3549 clabel->version = RF_COMPONENT_LABEL_VERSION;
3550 clabel->serial_number = raidPtr->serial_number;
3551 clabel->mod_counter = raidPtr->mod_counter;
3552
3553 clabel->num_rows = 1;
3554 clabel->num_columns = raidPtr->numCol;
3555 clabel->clean = RF_RAID_DIRTY; /* not clean */
3556 clabel->status = rf_ds_optimal; /* "It's good!" */
3557
3558 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3559 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3560 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3561
3562 clabel->blockSize = raidPtr->bytesPerSector;
3563 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3564
3565 /* XXX not portable */
3566 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3567 clabel->maxOutstanding = raidPtr->maxOutstanding;
3568 clabel->autoconfigure = raidPtr->autoconfigure;
3569 clabel->root_partition = raidPtr->root_partition;
3570 clabel->last_unit = raidPtr->raidid;
3571 clabel->config_order = raidPtr->config_order;
3572
3573 #ifndef RF_NO_PARITY_MAP
3574 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3575 #endif
3576 }
3577
3578 struct raid_softc *
3579 rf_auto_config_set(RF_ConfigSet_t *cset)
3580 {
3581 RF_Raid_t *raidPtr;
3582 RF_Config_t *config;
3583 int raidID;
3584 struct raid_softc *sc;
3585
3586 #ifdef DEBUG
3587 printf("RAID autoconfigure\n");
3588 #endif
3589
3590 /* 1. Create a config structure */
3591 config = malloc(sizeof(*config), M_RAIDFRAME, M_WAITOK|M_ZERO);
3592
3593 /*
3594 2. Figure out what RAID ID this one is supposed to live at
3595 See if we can get the same RAID dev that it was configured
3596 on last time..
3597 */
3598
3599 raidID = cset->ac->clabel->last_unit;
3600 for (sc = raidget(raidID, false); sc && sc->sc_r.valid != 0;
3601 sc = raidget(++raidID, false))
3602 continue;
3603 #ifdef DEBUG
3604 printf("Configuring raid%d:\n",raidID);
3605 #endif
3606
3607 if (sc == NULL)
3608 sc = raidget(raidID, true);
3609 raidPtr = &sc->sc_r;
3610
3611 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3612 raidPtr->softc = sc;
3613 raidPtr->raidid = raidID;
3614 raidPtr->openings = RAIDOUTSTANDING;
3615
3616 /* 3. Build the configuration structure */
3617 rf_create_configuration(cset->ac, config, raidPtr);
3618
3619 /* 4. Do the configuration */
3620 if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3621 raidinit(sc);
3622
3623 rf_markalldirty(raidPtr);
3624 raidPtr->autoconfigure = 1; /* XXX do this here? */
3625 switch (cset->ac->clabel->root_partition) {
3626 case 1: /* Force Root */
3627 case 2: /* Soft Root: root when boot partition part of raid */
3628 /*
3629 * everything configured just fine. Make a note
3630 * that this set is eligible to be root,
3631 * or forced to be root
3632 */
3633 cset->rootable = cset->ac->clabel->root_partition;
3634 /* XXX do this here? */
3635 raidPtr->root_partition = cset->rootable;
3636 break;
3637 default:
3638 break;
3639 }
3640 } else {
3641 raidput(sc);
3642 sc = NULL;
3643 }
3644
3645 /* 5. Cleanup */
3646 free(config, M_RAIDFRAME);
3647 return sc;
3648 }
3649
3650 void
3651 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3652 size_t xmin, size_t xmax)
3653 {
3654 int error;
3655
3656 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3657 pool_sethiwat(p, xmax);
3658 if ((error = pool_prime(p, xmin)) != 0)
3659 panic("%s: failed to prime pool: %d", __func__, error);
3660 pool_setlowat(p, xmin);
3661 }
3662
3663 /*
3664 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buffer queue
3665 * to see if there is IO pending and if that IO could possibly be done
3666 * for a given RAID set. Returns 0 if IO is waiting and can be done, 1
3667 * otherwise.
3668 *
3669 */
3670 int
3671 rf_buf_queue_check(RF_Raid_t *raidPtr)
3672 {
3673 struct raid_softc *rs;
3674 struct dk_softc *dksc;
3675
3676 rs = raidPtr->softc;
3677 dksc = &rs->sc_dksc;
3678
3679 if ((rs->sc_flags & RAIDF_INITED) == 0)
3680 return 1;
3681
3682 if (dk_strategy_pending(dksc) && raidPtr->openings > 0) {
3683 /* there is work to do */
3684 return 0;
3685 }
3686 /* default is nothing to do */
3687 return 1;
3688 }
3689
3690 int
3691 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3692 {
3693 uint64_t numsecs;
3694 unsigned secsize;
3695 int error;
3696
3697 error = getdisksize(vp, &numsecs, &secsize);
3698 if (error == 0) {
3699 diskPtr->blockSize = secsize;
3700 diskPtr->numBlocks = numsecs - rf_protectedSectors;
3701 diskPtr->partitionSize = numsecs;
3702 return 0;
3703 }
3704 return error;
3705 }
3706
3707 static int
3708 raid_match(device_t self, cfdata_t cfdata, void *aux)
3709 {
3710 return 1;
3711 }
3712
3713 static void
3714 raid_attach(device_t parent, device_t self, void *aux)
3715 {
3716 }
3717
3718
3719 static int
3720 raid_detach(device_t self, int flags)
3721 {
3722 int error;
3723 struct raid_softc *rs = raidsoftc(self);
3724
3725 if (rs == NULL)
3726 return ENXIO;
3727
3728 if ((error = raidlock(rs)) != 0)
3729 return (error);
3730
3731 error = raid_detach_unlocked(rs);
3732
3733 raidunlock(rs);
3734
3735 /* XXX raid can be referenced here */
3736
3737 if (error)
3738 return error;
3739
3740 /* Free the softc */
3741 raidput(rs);
3742
3743 return 0;
3744 }
3745
3746 static void
3747 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
3748 {
3749 struct dk_softc *dksc = &rs->sc_dksc;
3750 struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
3751
3752 memset(dg, 0, sizeof(*dg));
3753
3754 dg->dg_secperunit = raidPtr->totalSectors;
3755 dg->dg_secsize = raidPtr->bytesPerSector;
3756 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
3757 dg->dg_ntracks = 4 * raidPtr->numCol;
3758
3759 disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, NULL);
3760 }
3761
3762 /*
3763 * Get cache info for all the components (including spares).
3764 * Returns intersection of all the cache flags of all disks, or first
3765 * error if any encountered.
3766 * XXXfua feature flags can change as spares are added - lock down somehow
3767 */
3768 static int
3769 rf_get_component_caches(RF_Raid_t *raidPtr, int *data)
3770 {
3771 int c;
3772 int error;
3773 int dkwhole = 0, dkpart;
3774
3775 for (c = 0; c < raidPtr->numCol + raidPtr->numSpare; c++) {
3776 /*
3777 * Check any non-dead disk, even when currently being
3778 * reconstructed.
3779 */
3780 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
3781 error = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp,
3782 DIOCGCACHE, &dkpart, FREAD, NOCRED);
3783 if (error) {
3784 if (error != ENODEV) {
3785 printf("raid%d: get cache for component %s failed\n",
3786 raidPtr->raidid,
3787 raidPtr->Disks[c].devname);
3788 }
3789
3790 return error;
3791 }
3792
3793 if (c == 0)
3794 dkwhole = dkpart;
3795 else
3796 dkwhole = DKCACHE_COMBINE(dkwhole, dkpart);
3797 }
3798 }
3799
3800 *data = dkwhole;
3801
3802 return 0;
3803 }
3804
3805 /*
3806 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3807 * We end up returning whatever error was returned by the first cache flush
3808 * that fails.
3809 */
3810
3811 int
3812 rf_sync_component_caches(RF_Raid_t *raidPtr)
3813 {
3814 int c, sparecol;
3815 int e,error;
3816 int force = 1;
3817
3818 error = 0;
3819 for (c = 0; c < raidPtr->numCol; c++) {
3820 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3821 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3822 &force, FWRITE, NOCRED);
3823 if (e) {
3824 if (e != ENODEV)
3825 printf("raid%d: cache flush to component %s failed.\n",
3826 raidPtr->raidid, raidPtr->Disks[c].devname);
3827 if (error == 0) {
3828 error = e;
3829 }
3830 }
3831 }
3832 }
3833
3834 for( c = 0; c < raidPtr->numSpare ; c++) {
3835 sparecol = raidPtr->numCol + c;
3836 /* Need to ensure that the reconstruct actually completed! */
3837 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3838 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3839 DIOCCACHESYNC, &force, FWRITE, NOCRED);
3840 if (e) {
3841 if (e != ENODEV)
3842 printf("raid%d: cache flush to component %s failed.\n",
3843 raidPtr->raidid, raidPtr->Disks[sparecol].devname);
3844 if (error == 0) {
3845 error = e;
3846 }
3847 }
3848 }
3849 }
3850 return error;
3851 }
3852
3853 /* Fill in info with the current status */
3854 void
3855 rf_check_recon_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3856 {
3857
3858 memset(info, 0, sizeof(*info));
3859
3860 if (raidPtr->status != rf_rs_reconstructing) {
3861 info->total = 100;
3862 info->completed = 100;
3863 } else {
3864 info->total = raidPtr->reconControl->numRUsTotal;
3865 info->completed = raidPtr->reconControl->numRUsComplete;
3866 }
3867 info->remaining = info->total - info->completed;
3868 }
3869
3870 /* Fill in info with the current status */
3871 void
3872 rf_check_parityrewrite_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3873 {
3874
3875 memset(info, 0, sizeof(*info));
3876
3877 if (raidPtr->parity_rewrite_in_progress == 1) {
3878 info->total = raidPtr->Layout.numStripe;
3879 info->completed = raidPtr->parity_rewrite_stripes_done;
3880 } else {
3881 info->completed = 100;
3882 info->total = 100;
3883 }
3884 info->remaining = info->total - info->completed;
3885 }
3886
3887 /* Fill in info with the current status */
3888 void
3889 rf_check_copyback_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3890 {
3891
3892 memset(info, 0, sizeof(*info));
3893
3894 if (raidPtr->copyback_in_progress == 1) {
3895 info->total = raidPtr->Layout.numStripe;
3896 info->completed = raidPtr->copyback_stripes_done;
3897 info->remaining = info->total - info->completed;
3898 } else {
3899 info->remaining = 0;
3900 info->completed = 100;
3901 info->total = 100;
3902 }
3903 }
3904
3905 /* Fill in config with the current info */
3906 int
3907 rf_get_info(RF_Raid_t *raidPtr, RF_DeviceConfig_t *config)
3908 {
3909 int d, i, j;
3910
3911 if (!raidPtr->valid)
3912 return (ENODEV);
3913 config->cols = raidPtr->numCol;
3914 config->ndevs = raidPtr->numCol;
3915 if (config->ndevs >= RF_MAX_DISKS)
3916 return (ENOMEM);
3917 config->nspares = raidPtr->numSpare;
3918 if (config->nspares >= RF_MAX_DISKS)
3919 return (ENOMEM);
3920 config->maxqdepth = raidPtr->maxQueueDepth;
3921 d = 0;
3922 for (j = 0; j < config->cols; j++) {
3923 config->devs[d] = raidPtr->Disks[j];
3924 d++;
3925 }
3926 for (j = config->cols, i = 0; i < config->nspares; i++, j++) {
3927 config->spares[i] = raidPtr->Disks[j];
3928 if (config->spares[i].status == rf_ds_rebuilding_spare) {
3929 /* XXX: raidctl(8) expects to see this as a used spare */
3930 config->spares[i].status = rf_ds_used_spare;
3931 }
3932 }
3933 return 0;
3934 }
3935
3936 int
3937 rf_get_component_label(RF_Raid_t *raidPtr, void *data)
3938 {
3939 RF_ComponentLabel_t *clabel = (RF_ComponentLabel_t *)data;
3940 RF_ComponentLabel_t *raid_clabel;
3941 int column = clabel->column;
3942
3943 if ((column < 0) || (column >= raidPtr->numCol + raidPtr->numSpare))
3944 return EINVAL;
3945 raid_clabel = raidget_component_label(raidPtr, column);
3946 memcpy(clabel, raid_clabel, sizeof *clabel);
3947
3948 return 0;
3949 }
3950
3951 /*
3952 * Module interface
3953 */
3954
3955 MODULE(MODULE_CLASS_DRIVER, raid, "dk_subr,bufq_fcfs");
3956
3957 #ifdef _MODULE
3958 CFDRIVER_DECL(raid, DV_DISK, NULL);
3959 #endif
3960
3961 static int raid_modcmd(modcmd_t, void *);
3962 static int raid_modcmd_init(void);
3963 static int raid_modcmd_fini(void);
3964
3965 static int
3966 raid_modcmd(modcmd_t cmd, void *data)
3967 {
3968 int error;
3969
3970 error = 0;
3971 switch (cmd) {
3972 case MODULE_CMD_INIT:
3973 error = raid_modcmd_init();
3974 break;
3975 case MODULE_CMD_FINI:
3976 error = raid_modcmd_fini();
3977 break;
3978 default:
3979 error = ENOTTY;
3980 break;
3981 }
3982 return error;
3983 }
3984
3985 static int
3986 raid_modcmd_init(void)
3987 {
3988 int error;
3989 int bmajor, cmajor;
3990
3991 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
3992 mutex_enter(&raid_lock);
3993 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
3994 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
3995 rf_init_cond2(rf_sparet_wait_cv, "sparetw");
3996 rf_init_cond2(rf_sparet_resp_cv, "rfgst");
3997
3998 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
3999 #endif
4000
4001 bmajor = cmajor = -1;
4002 error = devsw_attach("raid", &raid_bdevsw, &bmajor,
4003 &raid_cdevsw, &cmajor);
4004 if (error != 0 && error != EEXIST) {
4005 aprint_error("%s: devsw_attach failed %d\n", __func__, error);
4006 mutex_exit(&raid_lock);
4007 return error;
4008 }
4009 #ifdef _MODULE
4010 error = config_cfdriver_attach(&raid_cd);
4011 if (error != 0) {
4012 aprint_error("%s: config_cfdriver_attach failed %d\n",
4013 __func__, error);
4014 devsw_detach(&raid_bdevsw, &raid_cdevsw);
4015 mutex_exit(&raid_lock);
4016 return error;
4017 }
4018 #endif
4019 error = config_cfattach_attach(raid_cd.cd_name, &raid_ca);
4020 if (error != 0) {
4021 aprint_error("%s: config_cfattach_attach failed %d\n",
4022 __func__, error);
4023 #ifdef _MODULE
4024 config_cfdriver_detach(&raid_cd);
4025 #endif
4026 devsw_detach(&raid_bdevsw, &raid_cdevsw);
4027 mutex_exit(&raid_lock);
4028 return error;
4029 }
4030
4031 raidautoconfigdone = false;
4032
4033 mutex_exit(&raid_lock);
4034
4035 if (error == 0) {
4036 if (rf_BootRaidframe(true) == 0)
4037 aprint_verbose("Kernelized RAIDframe activated\n");
4038 else
4039 panic("Serious error activating RAID!!");
4040 }
4041
4042 /*
4043 * Register a finalizer which will be used to auto-config RAID
4044 * sets once all real hardware devices have been found.
4045 */
4046 error = config_finalize_register(NULL, rf_autoconfig);
4047 if (error != 0) {
4048 aprint_error("WARNING: unable to register RAIDframe "
4049 "finalizer\n");
4050 error = 0;
4051 }
4052
4053 return error;
4054 }
4055
4056 static int
4057 raid_modcmd_fini(void)
4058 {
4059 int error;
4060
4061 mutex_enter(&raid_lock);
4062
4063 /* Don't allow unload if raid device(s) exist. */
4064 if (!LIST_EMPTY(&raids)) {
4065 mutex_exit(&raid_lock);
4066 return EBUSY;
4067 }
4068
4069 error = config_cfattach_detach(raid_cd.cd_name, &raid_ca);
4070 if (error != 0) {
4071 aprint_error("%s: cannot detach cfattach\n",__func__);
4072 mutex_exit(&raid_lock);
4073 return error;
4074 }
4075 #ifdef _MODULE
4076 error = config_cfdriver_detach(&raid_cd);
4077 if (error != 0) {
4078 aprint_error("%s: cannot detach cfdriver\n",__func__);
4079 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
4080 mutex_exit(&raid_lock);
4081 return error;
4082 }
4083 #endif
4084 error = devsw_detach(&raid_bdevsw, &raid_cdevsw);
4085 if (error != 0) {
4086 aprint_error("%s: cannot detach devsw\n",__func__);
4087 #ifdef _MODULE
4088 config_cfdriver_attach(&raid_cd);
4089 #endif
4090 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
4091 mutex_exit(&raid_lock);
4092 return error;
4093 }
4094 rf_BootRaidframe(false);
4095 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
4096 rf_destroy_mutex2(rf_sparet_wait_mutex);
4097 rf_destroy_cond2(rf_sparet_wait_cv);
4098 rf_destroy_cond2(rf_sparet_resp_cv);
4099 #endif
4100 mutex_exit(&raid_lock);
4101 mutex_destroy(&raid_lock);
4102
4103 return error;
4104 }
4105