rf_netbsdkintf.c revision 1.316.2.4 1 /* $NetBSD: rf_netbsdkintf.c,v 1.316.2.4 2015/12/27 12:09:58 skrll Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: cd.c 1.6 90/11/28$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
89 * School of Computer Science
90 * Carnegie Mellon University
91 * Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97 /***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.316.2.4 2015/12/27 12:09:58 skrll Exp $");
105
106 #ifdef _KERNEL_OPT
107 #include "opt_compat_netbsd.h"
108 #include "opt_raid_autoconfig.h"
109 #endif
110
111 #include <sys/param.h>
112 #include <sys/errno.h>
113 #include <sys/pool.h>
114 #include <sys/proc.h>
115 #include <sys/queue.h>
116 #include <sys/disk.h>
117 #include <sys/device.h>
118 #include <sys/stat.h>
119 #include <sys/ioctl.h>
120 #include <sys/fcntl.h>
121 #include <sys/systm.h>
122 #include <sys/vnode.h>
123 #include <sys/disklabel.h>
124 #include <sys/conf.h>
125 #include <sys/buf.h>
126 #include <sys/bufq.h>
127 #include <sys/reboot.h>
128 #include <sys/kauth.h>
129 #include <sys/module.h>
130
131 #include <prop/proplib.h>
132
133 #include <dev/raidframe/raidframevar.h>
134 #include <dev/raidframe/raidframeio.h>
135 #include <dev/raidframe/rf_paritymap.h>
136
137 #include "rf_raid.h"
138 #include "rf_copyback.h"
139 #include "rf_dag.h"
140 #include "rf_dagflags.h"
141 #include "rf_desc.h"
142 #include "rf_diskqueue.h"
143 #include "rf_etimer.h"
144 #include "rf_general.h"
145 #include "rf_kintf.h"
146 #include "rf_options.h"
147 #include "rf_driver.h"
148 #include "rf_parityscan.h"
149 #include "rf_threadstuff.h"
150
151 #ifdef COMPAT_50
152 #include "rf_compat50.h"
153 #endif
154
155 #include "ioconf.h"
156
157 #ifdef DEBUG
158 int rf_kdebug_level = 0;
159 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
160 #else /* DEBUG */
161 #define db1_printf(a) { }
162 #endif /* DEBUG */
163
164 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
165 static rf_declare_mutex2(rf_sparet_wait_mutex);
166 static rf_declare_cond2(rf_sparet_wait_cv);
167 static rf_declare_cond2(rf_sparet_resp_cv);
168
169 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
170 * spare table */
171 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
172 * installation process */
173 #endif
174
175 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
176
177 /* prototypes */
178 static void KernelWakeupFunc(struct buf *);
179 static void InitBP(struct buf *, struct vnode *, unsigned,
180 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
181 void *, int, struct proc *);
182 struct raid_softc;
183 static void raidinit(struct raid_softc *);
184
185 static int raid_match(device_t, cfdata_t, void *);
186 static void raid_attach(device_t, device_t, void *);
187 static int raid_detach(device_t, int);
188
189 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
190 daddr_t, daddr_t);
191 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
192 daddr_t, daddr_t, int);
193
194 static int raidwrite_component_label(unsigned,
195 dev_t, struct vnode *, RF_ComponentLabel_t *);
196 static int raidread_component_label(unsigned,
197 dev_t, struct vnode *, RF_ComponentLabel_t *);
198
199
200 static dev_type_open(raidopen);
201 static dev_type_close(raidclose);
202 static dev_type_read(raidread);
203 static dev_type_write(raidwrite);
204 static dev_type_ioctl(raidioctl);
205 static dev_type_strategy(raidstrategy);
206 static dev_type_dump(raiddump);
207 static dev_type_size(raidsize);
208
209 const struct bdevsw raid_bdevsw = {
210 .d_open = raidopen,
211 .d_close = raidclose,
212 .d_strategy = raidstrategy,
213 .d_ioctl = raidioctl,
214 .d_dump = raiddump,
215 .d_psize = raidsize,
216 .d_discard = nodiscard,
217 .d_flag = D_DISK
218 };
219
220 const struct cdevsw raid_cdevsw = {
221 .d_open = raidopen,
222 .d_close = raidclose,
223 .d_read = raidread,
224 .d_write = raidwrite,
225 .d_ioctl = raidioctl,
226 .d_stop = nostop,
227 .d_tty = notty,
228 .d_poll = nopoll,
229 .d_mmap = nommap,
230 .d_kqfilter = nokqfilter,
231 .d_discard = nodiscard,
232 .d_flag = D_DISK
233 };
234
235 static struct dkdriver rf_dkdriver = {
236 .d_strategy = raidstrategy,
237 .d_minphys = minphys
238 };
239
240 struct raid_softc {
241 device_t sc_dev;
242 int sc_unit;
243 int sc_flags; /* flags */
244 int sc_cflags; /* configuration flags */
245 kmutex_t sc_mutex; /* interlock mutex */
246 kcondvar_t sc_cv; /* and the condvar */
247 uint64_t sc_size; /* size of the raid device */
248 char sc_xname[20]; /* XXX external name */
249 struct disk sc_dkdev; /* generic disk device info */
250 struct bufq_state *buf_queue; /* used for the device queue */
251 RF_Raid_t sc_r;
252 LIST_ENTRY(raid_softc) sc_link;
253 };
254 /* sc_flags */
255 #define RAIDF_INITED 0x01 /* unit has been initialized */
256 #define RAIDF_WLABEL 0x02 /* label area is writable */
257 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
258 #define RAIDF_SHUTDOWN 0x08 /* unit is being shutdown */
259 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
260 #define RAIDF_LOCKED 0x80 /* unit is locked */
261
262 #define raidunit(x) DISKUNIT(x)
263
264 extern struct cfdriver raid_cd;
265 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
266 raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
267 DVF_DETACH_SHUTDOWN);
268
269 /*
270 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
271 * Be aware that large numbers can allow the driver to consume a lot of
272 * kernel memory, especially on writes, and in degraded mode reads.
273 *
274 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
275 * a single 64K write will typically require 64K for the old data,
276 * 64K for the old parity, and 64K for the new parity, for a total
277 * of 192K (if the parity buffer is not re-used immediately).
278 * Even it if is used immediately, that's still 128K, which when multiplied
279 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
280 *
281 * Now in degraded mode, for example, a 64K read on the above setup may
282 * require data reconstruction, which will require *all* of the 4 remaining
283 * disks to participate -- 4 * 32K/disk == 128K again.
284 */
285
286 #ifndef RAIDOUTSTANDING
287 #define RAIDOUTSTANDING 6
288 #endif
289
290 #define RAIDLABELDEV(dev) \
291 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
292
293 /* declared here, and made public, for the benefit of KVM stuff.. */
294
295 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
296 struct disklabel *);
297 static void raidgetdisklabel(dev_t);
298 static void raidmakedisklabel(struct raid_softc *);
299
300 static int raidlock(struct raid_softc *);
301 static void raidunlock(struct raid_softc *);
302
303 static int raid_detach_unlocked(struct raid_softc *);
304
305 static void rf_markalldirty(RF_Raid_t *);
306 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
307
308 void rf_ReconThread(struct rf_recon_req *);
309 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
310 void rf_CopybackThread(RF_Raid_t *raidPtr);
311 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
312 int rf_autoconfig(device_t);
313 void rf_buildroothack(RF_ConfigSet_t *);
314
315 RF_AutoConfig_t *rf_find_raid_components(void);
316 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
317 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
318 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
319 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
320 int rf_set_autoconfig(RF_Raid_t *, int);
321 int rf_set_rootpartition(RF_Raid_t *, int);
322 void rf_release_all_vps(RF_ConfigSet_t *);
323 void rf_cleanup_config_set(RF_ConfigSet_t *);
324 int rf_have_enough_components(RF_ConfigSet_t *);
325 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
326 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
327
328 /*
329 * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
330 * Note that this is overridden by having RAID_AUTOCONFIG as an option
331 * in the kernel config file.
332 */
333 #ifdef RAID_AUTOCONFIG
334 int raidautoconfig = 1;
335 #else
336 int raidautoconfig = 0;
337 #endif
338 static bool raidautoconfigdone = false;
339
340 struct RF_Pools_s rf_pools;
341
342 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
343 static kmutex_t raid_lock;
344
345 static struct raid_softc *
346 raidcreate(int unit) {
347 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
348 if (sc == NULL) {
349 #ifdef DIAGNOSTIC
350 printf("%s: out of memory\n", __func__);
351 #endif
352 return NULL;
353 }
354 sc->sc_unit = unit;
355 bufq_alloc(&sc->buf_queue, "fcfs", BUFQ_SORT_RAWBLOCK);
356 cv_init(&sc->sc_cv, "raidunit");
357 mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_NONE);
358 return sc;
359 }
360
361 static void
362 raiddestroy(struct raid_softc *sc) {
363 cv_destroy(&sc->sc_cv);
364 mutex_destroy(&sc->sc_mutex);
365 bufq_free(sc->buf_queue);
366 kmem_free(sc, sizeof(*sc));
367 }
368
369 static struct raid_softc *
370 raidget(int unit, bool create) {
371 struct raid_softc *sc;
372 if (unit < 0) {
373 #ifdef DIAGNOSTIC
374 panic("%s: unit %d!", __func__, unit);
375 #endif
376 return NULL;
377 }
378 mutex_enter(&raid_lock);
379 LIST_FOREACH(sc, &raids, sc_link) {
380 if (sc->sc_unit == unit) {
381 mutex_exit(&raid_lock);
382 return sc;
383 }
384 }
385 mutex_exit(&raid_lock);
386 if (!create)
387 return NULL;
388 if ((sc = raidcreate(unit)) == NULL)
389 return NULL;
390 mutex_enter(&raid_lock);
391 LIST_INSERT_HEAD(&raids, sc, sc_link);
392 mutex_exit(&raid_lock);
393 return sc;
394 }
395
396 static void
397 raidput(struct raid_softc *sc) {
398 mutex_enter(&raid_lock);
399 LIST_REMOVE(sc, sc_link);
400 mutex_exit(&raid_lock);
401 raiddestroy(sc);
402 }
403
404 void
405 raidattach(int num)
406 {
407
408 /*
409 * Device attachment and associated initialization now occurs
410 * as part of the module initialization.
411 */
412 }
413
414 int
415 rf_autoconfig(device_t self)
416 {
417 RF_AutoConfig_t *ac_list;
418 RF_ConfigSet_t *config_sets;
419
420 if (!raidautoconfig || raidautoconfigdone == true)
421 return (0);
422
423 /* XXX This code can only be run once. */
424 raidautoconfigdone = true;
425
426 #ifdef __HAVE_CPU_BOOTCONF
427 /*
428 * 0. find the boot device if needed first so we can use it later
429 * this needs to be done before we autoconfigure any raid sets,
430 * because if we use wedges we are not going to be able to open
431 * the boot device later
432 */
433 if (booted_device == NULL)
434 cpu_bootconf();
435 #endif
436 /* 1. locate all RAID components on the system */
437 aprint_debug("Searching for RAID components...\n");
438 ac_list = rf_find_raid_components();
439
440 /* 2. Sort them into their respective sets. */
441 config_sets = rf_create_auto_sets(ac_list);
442
443 /*
444 * 3. Evaluate each set and configure the valid ones.
445 * This gets done in rf_buildroothack().
446 */
447 rf_buildroothack(config_sets);
448
449 return 1;
450 }
451
452 static int
453 rf_containsboot(RF_Raid_t *r, device_t bdv) {
454 const char *bootname = device_xname(bdv);
455 size_t len = strlen(bootname);
456
457 for (int col = 0; col < r->numCol; col++) {
458 const char *devname = r->Disks[col].devname;
459 devname += sizeof("/dev/") - 1;
460 if (strncmp(devname, "dk", 2) == 0) {
461 const char *parent =
462 dkwedge_get_parent_name(r->Disks[col].dev);
463 if (parent != NULL)
464 devname = parent;
465 }
466 if (strncmp(devname, bootname, len) == 0) {
467 struct raid_softc *sc = r->softc;
468 aprint_debug("raid%d includes boot device %s\n",
469 sc->sc_unit, devname);
470 return 1;
471 }
472 }
473 return 0;
474 }
475
476 void
477 rf_buildroothack(RF_ConfigSet_t *config_sets)
478 {
479 RF_ConfigSet_t *cset;
480 RF_ConfigSet_t *next_cset;
481 int num_root;
482 struct raid_softc *sc, *rsc;
483
484 sc = rsc = NULL;
485 num_root = 0;
486 cset = config_sets;
487 while (cset != NULL) {
488 next_cset = cset->next;
489 if (rf_have_enough_components(cset) &&
490 cset->ac->clabel->autoconfigure == 1) {
491 sc = rf_auto_config_set(cset);
492 if (sc != NULL) {
493 aprint_debug("raid%d: configured ok\n",
494 sc->sc_unit);
495 if (cset->rootable) {
496 rsc = sc;
497 num_root++;
498 }
499 } else {
500 /* The autoconfig didn't work :( */
501 aprint_debug("Autoconfig failed\n");
502 rf_release_all_vps(cset);
503 }
504 } else {
505 /* we're not autoconfiguring this set...
506 release the associated resources */
507 rf_release_all_vps(cset);
508 }
509 /* cleanup */
510 rf_cleanup_config_set(cset);
511 cset = next_cset;
512 }
513
514 /* if the user has specified what the root device should be
515 then we don't touch booted_device or boothowto... */
516
517 if (rootspec != NULL)
518 return;
519
520 /* we found something bootable... */
521
522 /*
523 * XXX: The following code assumes that the root raid
524 * is the first ('a') partition. This is about the best
525 * we can do with a BSD disklabel, but we might be able
526 * to do better with a GPT label, by setting a specified
527 * attribute to indicate the root partition. We can then
528 * stash the partition number in the r->root_partition
529 * high bits (the bottom 2 bits are already used). For
530 * now we just set booted_partition to 0 when we override
531 * root.
532 */
533 if (num_root == 1) {
534 device_t candidate_root;
535 if (rsc->sc_dkdev.dk_nwedges != 0) {
536 char cname[sizeof(cset->ac->devname)];
537 /* XXX: assume 'a' */
538 snprintf(cname, sizeof(cname), "%s%c",
539 device_xname(rsc->sc_dev), 'a');
540 candidate_root = dkwedge_find_by_wname(cname);
541 } else
542 candidate_root = rsc->sc_dev;
543 if (booted_device == NULL ||
544 rsc->sc_r.root_partition == 1 ||
545 rf_containsboot(&rsc->sc_r, booted_device)) {
546 booted_device = candidate_root;
547 booted_partition = 0; /* XXX assume 'a' */
548 }
549 } else if (num_root > 1) {
550
551 /*
552 * Maybe the MD code can help. If it cannot, then
553 * setroot() will discover that we have no
554 * booted_device and will ask the user if nothing was
555 * hardwired in the kernel config file
556 */
557 if (booted_device == NULL)
558 return;
559
560 num_root = 0;
561 mutex_enter(&raid_lock);
562 LIST_FOREACH(sc, &raids, sc_link) {
563 RF_Raid_t *r = &sc->sc_r;
564 if (r->valid == 0)
565 continue;
566
567 if (r->root_partition == 0)
568 continue;
569
570 if (rf_containsboot(r, booted_device)) {
571 num_root++;
572 rsc = sc;
573 }
574 }
575 mutex_exit(&raid_lock);
576
577 if (num_root == 1) {
578 booted_device = rsc->sc_dev;
579 booted_partition = 0; /* XXX assume 'a' */
580 } else {
581 /* we can't guess.. require the user to answer... */
582 boothowto |= RB_ASKNAME;
583 }
584 }
585 }
586
587 static int
588 raidsize(dev_t dev)
589 {
590 struct raid_softc *rs;
591 struct disklabel *lp;
592 int part, unit, omask, size;
593
594 unit = raidunit(dev);
595 if ((rs = raidget(unit, false)) == NULL)
596 return -1;
597 if ((rs->sc_flags & RAIDF_INITED) == 0)
598 return (-1);
599
600 part = DISKPART(dev);
601 omask = rs->sc_dkdev.dk_openmask & (1 << part);
602 lp = rs->sc_dkdev.dk_label;
603
604 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
605 return (-1);
606
607 if (lp->d_partitions[part].p_fstype != FS_SWAP)
608 size = -1;
609 else
610 size = lp->d_partitions[part].p_size *
611 (lp->d_secsize / DEV_BSIZE);
612
613 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
614 return (-1);
615
616 return (size);
617
618 }
619
620 static int
621 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
622 {
623 int unit = raidunit(dev);
624 struct raid_softc *rs;
625 const struct bdevsw *bdev;
626 struct disklabel *lp;
627 RF_Raid_t *raidPtr;
628 daddr_t offset;
629 int part, c, sparecol, j, scol, dumpto;
630 int error = 0;
631
632 if ((rs = raidget(unit, false)) == NULL)
633 return ENXIO;
634
635 raidPtr = &rs->sc_r;
636
637 if ((rs->sc_flags & RAIDF_INITED) == 0)
638 return ENXIO;
639
640 /* we only support dumping to RAID 1 sets */
641 if (raidPtr->Layout.numDataCol != 1 ||
642 raidPtr->Layout.numParityCol != 1)
643 return EINVAL;
644
645 if ((error = raidlock(rs)) != 0)
646 return error;
647
648 if (size % DEV_BSIZE != 0) {
649 error = EINVAL;
650 goto out;
651 }
652
653 if (blkno + size / DEV_BSIZE > rs->sc_size) {
654 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
655 "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
656 size / DEV_BSIZE, rs->sc_size);
657 error = EINVAL;
658 goto out;
659 }
660
661 part = DISKPART(dev);
662 lp = rs->sc_dkdev.dk_label;
663 offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
664
665 /* figure out what device is alive.. */
666
667 /*
668 Look for a component to dump to. The preference for the
669 component to dump to is as follows:
670 1) the master
671 2) a used_spare of the master
672 3) the slave
673 4) a used_spare of the slave
674 */
675
676 dumpto = -1;
677 for (c = 0; c < raidPtr->numCol; c++) {
678 if (raidPtr->Disks[c].status == rf_ds_optimal) {
679 /* this might be the one */
680 dumpto = c;
681 break;
682 }
683 }
684
685 /*
686 At this point we have possibly selected a live master or a
687 live slave. We now check to see if there is a spared
688 master (or a spared slave), if we didn't find a live master
689 or a live slave.
690 */
691
692 for (c = 0; c < raidPtr->numSpare; c++) {
693 sparecol = raidPtr->numCol + c;
694 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
695 /* How about this one? */
696 scol = -1;
697 for(j=0;j<raidPtr->numCol;j++) {
698 if (raidPtr->Disks[j].spareCol == sparecol) {
699 scol = j;
700 break;
701 }
702 }
703 if (scol == 0) {
704 /*
705 We must have found a spared master!
706 We'll take that over anything else
707 found so far. (We couldn't have
708 found a real master before, since
709 this is a used spare, and it's
710 saying that it's replacing the
711 master.) On reboot (with
712 autoconfiguration turned on)
713 sparecol will become the 1st
714 component (component0) of this set.
715 */
716 dumpto = sparecol;
717 break;
718 } else if (scol != -1) {
719 /*
720 Must be a spared slave. We'll dump
721 to that if we havn't found anything
722 else so far.
723 */
724 if (dumpto == -1)
725 dumpto = sparecol;
726 }
727 }
728 }
729
730 if (dumpto == -1) {
731 /* we couldn't find any live components to dump to!?!?
732 */
733 error = EINVAL;
734 goto out;
735 }
736
737 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
738
739 /*
740 Note that blkno is relative to this particular partition.
741 By adding the offset of this partition in the RAID
742 set, and also adding RF_PROTECTED_SECTORS, we get a
743 value that is relative to the partition used for the
744 underlying component.
745 */
746
747 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
748 blkno + offset, va, size);
749
750 out:
751 raidunlock(rs);
752
753 return error;
754 }
755
756 /* ARGSUSED */
757 static int
758 raidopen(dev_t dev, int flags, int fmt,
759 struct lwp *l)
760 {
761 int unit = raidunit(dev);
762 struct raid_softc *rs;
763 struct disklabel *lp;
764 int part, pmask;
765 int error = 0;
766
767 if ((rs = raidget(unit, true)) == NULL)
768 return ENXIO;
769 if ((error = raidlock(rs)) != 0)
770 return (error);
771
772 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
773 error = EBUSY;
774 goto bad;
775 }
776
777 lp = rs->sc_dkdev.dk_label;
778
779 part = DISKPART(dev);
780
781 /*
782 * If there are wedges, and this is not RAW_PART, then we
783 * need to fail.
784 */
785 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
786 error = EBUSY;
787 goto bad;
788 }
789 pmask = (1 << part);
790
791 if ((rs->sc_flags & RAIDF_INITED) &&
792 (rs->sc_dkdev.dk_nwedges == 0) &&
793 (rs->sc_dkdev.dk_openmask == 0))
794 raidgetdisklabel(dev);
795
796 /* make sure that this partition exists */
797
798 if (part != RAW_PART) {
799 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
800 ((part >= lp->d_npartitions) ||
801 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
802 error = ENXIO;
803 goto bad;
804 }
805 }
806 /* Prevent this unit from being unconfigured while open. */
807 switch (fmt) {
808 case S_IFCHR:
809 rs->sc_dkdev.dk_copenmask |= pmask;
810 break;
811
812 case S_IFBLK:
813 rs->sc_dkdev.dk_bopenmask |= pmask;
814 break;
815 }
816
817 if ((rs->sc_dkdev.dk_openmask == 0) &&
818 ((rs->sc_flags & RAIDF_INITED) != 0)) {
819 /* First one... mark things as dirty... Note that we *MUST*
820 have done a configure before this. I DO NOT WANT TO BE
821 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
822 THAT THEY BELONG TOGETHER!!!!! */
823 /* XXX should check to see if we're only open for reading
824 here... If so, we needn't do this, but then need some
825 other way of keeping track of what's happened.. */
826
827 rf_markalldirty(&rs->sc_r);
828 }
829
830
831 rs->sc_dkdev.dk_openmask =
832 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
833
834 bad:
835 raidunlock(rs);
836
837 return (error);
838
839
840 }
841
842 /* ARGSUSED */
843 static int
844 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
845 {
846 int unit = raidunit(dev);
847 struct raid_softc *rs;
848 int error = 0;
849 int part;
850
851 if ((rs = raidget(unit, false)) == NULL)
852 return ENXIO;
853
854 if ((error = raidlock(rs)) != 0)
855 return (error);
856
857 part = DISKPART(dev);
858
859 /* ...that much closer to allowing unconfiguration... */
860 switch (fmt) {
861 case S_IFCHR:
862 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
863 break;
864
865 case S_IFBLK:
866 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
867 break;
868 }
869 rs->sc_dkdev.dk_openmask =
870 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
871
872 if ((rs->sc_dkdev.dk_openmask == 0) &&
873 ((rs->sc_flags & RAIDF_INITED) != 0)) {
874 /* Last one... device is not unconfigured yet.
875 Device shutdown has taken care of setting the
876 clean bits if RAIDF_INITED is not set
877 mark things as clean... */
878
879 rf_update_component_labels(&rs->sc_r,
880 RF_FINAL_COMPONENT_UPDATE);
881 }
882 if ((rs->sc_dkdev.dk_openmask == 0) &&
883 ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)) {
884 /*
885 * Detach this raid unit
886 */
887 cfdata_t cf = NULL;
888 int retcode = 0;
889
890 if (rs->sc_dev != NULL) {
891 cf = device_cfdata(rs->sc_dev);
892
893 raidunlock(rs);
894 retcode = config_detach(rs->sc_dev, DETACH_QUIET);
895 if (retcode == 0)
896 /* free the pseudo device attach bits */
897 free(cf, M_RAIDFRAME);
898 } else {
899 raidput(rs);
900 }
901 return retcode;
902 }
903
904 raidunlock(rs);
905 return (0);
906 }
907
908 static void
909 raidstrategy(struct buf *bp)
910 {
911 unsigned int unit = raidunit(bp->b_dev);
912 RF_Raid_t *raidPtr;
913 int wlabel;
914 struct raid_softc *rs;
915
916 if ((rs = raidget(unit, false)) == NULL) {
917 bp->b_error = ENXIO;
918 goto done;
919 }
920 if ((rs->sc_flags & RAIDF_INITED) == 0) {
921 bp->b_error = ENXIO;
922 goto done;
923 }
924 raidPtr = &rs->sc_r;
925 if (!raidPtr->valid) {
926 bp->b_error = ENODEV;
927 goto done;
928 }
929 if (bp->b_bcount == 0) {
930 db1_printf(("b_bcount is zero..\n"));
931 goto done;
932 }
933
934 /*
935 * Do bounds checking and adjust transfer. If there's an
936 * error, the bounds check will flag that for us.
937 */
938
939 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
940 if (DISKPART(bp->b_dev) == RAW_PART) {
941 uint64_t size; /* device size in DEV_BSIZE unit */
942
943 if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
944 size = raidPtr->totalSectors <<
945 (raidPtr->logBytesPerSector - DEV_BSHIFT);
946 } else {
947 size = raidPtr->totalSectors >>
948 (DEV_BSHIFT - raidPtr->logBytesPerSector);
949 }
950 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
951 goto done;
952 }
953 } else {
954 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
955 db1_printf(("Bounds check failed!!:%d %d\n",
956 (int) bp->b_blkno, (int) wlabel));
957 goto done;
958 }
959 }
960
961 rf_lock_mutex2(raidPtr->iodone_lock);
962
963 bp->b_resid = 0;
964
965 /* stuff it onto our queue */
966 bufq_put(rs->buf_queue, bp);
967
968 /* scheduled the IO to happen at the next convenient time */
969 rf_signal_cond2(raidPtr->iodone_cv);
970 rf_unlock_mutex2(raidPtr->iodone_lock);
971
972 return;
973
974 done:
975 bp->b_resid = bp->b_bcount;
976 biodone(bp);
977 }
978
979 /* ARGSUSED */
980 static int
981 raidread(dev_t dev, struct uio *uio, int flags)
982 {
983 int unit = raidunit(dev);
984 struct raid_softc *rs;
985
986 if ((rs = raidget(unit, false)) == NULL)
987 return ENXIO;
988
989 if ((rs->sc_flags & RAIDF_INITED) == 0)
990 return (ENXIO);
991
992 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
993
994 }
995
996 /* ARGSUSED */
997 static int
998 raidwrite(dev_t dev, struct uio *uio, int flags)
999 {
1000 int unit = raidunit(dev);
1001 struct raid_softc *rs;
1002
1003 if ((rs = raidget(unit, false)) == NULL)
1004 return ENXIO;
1005
1006 if ((rs->sc_flags & RAIDF_INITED) == 0)
1007 return (ENXIO);
1008
1009 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
1010
1011 }
1012
1013 static int
1014 raid_detach_unlocked(struct raid_softc *rs)
1015 {
1016 int error;
1017 RF_Raid_t *raidPtr;
1018
1019 raidPtr = &rs->sc_r;
1020
1021 /*
1022 * If somebody has a partition mounted, we shouldn't
1023 * shutdown.
1024 */
1025 if (rs->sc_dkdev.dk_openmask != 0)
1026 return EBUSY;
1027
1028 if ((rs->sc_flags & RAIDF_INITED) == 0)
1029 ; /* not initialized: nothing to do */
1030 else if ((error = rf_Shutdown(raidPtr)) != 0)
1031 return error;
1032 else
1033 rs->sc_flags &= ~(RAIDF_INITED|RAIDF_SHUTDOWN);
1034
1035 /* Detach the disk. */
1036 dkwedge_delall(&rs->sc_dkdev);
1037 disk_detach(&rs->sc_dkdev);
1038 disk_destroy(&rs->sc_dkdev);
1039
1040 /* Free the softc */
1041 raidput(rs);
1042
1043 aprint_normal_dev(rs->sc_dev, "detached\n");
1044
1045 return 0;
1046 }
1047
1048 static int
1049 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1050 {
1051 int unit = raidunit(dev);
1052 int error = 0;
1053 int part, pmask, s;
1054 cfdata_t cf;
1055 struct raid_softc *rs;
1056 RF_Config_t *k_cfg, *u_cfg;
1057 RF_Raid_t *raidPtr;
1058 RF_RaidDisk_t *diskPtr;
1059 RF_AccTotals_t *totals;
1060 RF_DeviceConfig_t *d_cfg, **ucfgp;
1061 u_char *specific_buf;
1062 int retcode = 0;
1063 int column;
1064 /* int raidid; */
1065 struct rf_recon_req *rrcopy, *rr;
1066 RF_ComponentLabel_t *clabel;
1067 RF_ComponentLabel_t *ci_label;
1068 RF_ComponentLabel_t **clabel_ptr;
1069 RF_SingleComponent_t *sparePtr,*componentPtr;
1070 RF_SingleComponent_t component;
1071 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
1072 int i, j, d;
1073 #ifdef __HAVE_OLD_DISKLABEL
1074 struct disklabel newlabel;
1075 #endif
1076
1077 if ((rs = raidget(unit, false)) == NULL)
1078 return ENXIO;
1079 raidPtr = &rs->sc_r;
1080
1081 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1082 (int) DISKPART(dev), (int) unit, cmd));
1083
1084 /* Must be open for writes for these commands... */
1085 switch (cmd) {
1086 #ifdef DIOCGSECTORSIZE
1087 case DIOCGSECTORSIZE:
1088 *(u_int *)data = raidPtr->bytesPerSector;
1089 return 0;
1090 case DIOCGMEDIASIZE:
1091 *(off_t *)data =
1092 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
1093 return 0;
1094 #endif
1095 case DIOCSDINFO:
1096 case DIOCWDINFO:
1097 #ifdef __HAVE_OLD_DISKLABEL
1098 case ODIOCWDINFO:
1099 case ODIOCSDINFO:
1100 #endif
1101 case DIOCWLABEL:
1102 case DIOCAWEDGE:
1103 case DIOCDWEDGE:
1104 case DIOCMWEDGES:
1105 case DIOCSSTRATEGY:
1106 if ((flag & FWRITE) == 0)
1107 return (EBADF);
1108 }
1109
1110 /* Must be initialized for these... */
1111 switch (cmd) {
1112 case DIOCGDINFO:
1113 case DIOCSDINFO:
1114 case DIOCWDINFO:
1115 #ifdef __HAVE_OLD_DISKLABEL
1116 case ODIOCGDINFO:
1117 case ODIOCWDINFO:
1118 case ODIOCSDINFO:
1119 case ODIOCGDEFLABEL:
1120 #endif
1121 case DIOCGPARTINFO:
1122 case DIOCWLABEL:
1123 case DIOCGDEFLABEL:
1124 case DIOCAWEDGE:
1125 case DIOCDWEDGE:
1126 case DIOCLWEDGES:
1127 case DIOCMWEDGES:
1128 case DIOCCACHESYNC:
1129 case RAIDFRAME_SHUTDOWN:
1130 case RAIDFRAME_REWRITEPARITY:
1131 case RAIDFRAME_GET_INFO:
1132 case RAIDFRAME_RESET_ACCTOTALS:
1133 case RAIDFRAME_GET_ACCTOTALS:
1134 case RAIDFRAME_KEEP_ACCTOTALS:
1135 case RAIDFRAME_GET_SIZE:
1136 case RAIDFRAME_FAIL_DISK:
1137 case RAIDFRAME_COPYBACK:
1138 case RAIDFRAME_CHECK_RECON_STATUS:
1139 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1140 case RAIDFRAME_GET_COMPONENT_LABEL:
1141 case RAIDFRAME_SET_COMPONENT_LABEL:
1142 case RAIDFRAME_ADD_HOT_SPARE:
1143 case RAIDFRAME_REMOVE_HOT_SPARE:
1144 case RAIDFRAME_INIT_LABELS:
1145 case RAIDFRAME_REBUILD_IN_PLACE:
1146 case RAIDFRAME_CHECK_PARITY:
1147 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1148 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1149 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1150 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1151 case RAIDFRAME_SET_AUTOCONFIG:
1152 case RAIDFRAME_SET_ROOT:
1153 case RAIDFRAME_DELETE_COMPONENT:
1154 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1155 case RAIDFRAME_PARITYMAP_STATUS:
1156 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1157 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1158 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1159 case DIOCGSTRATEGY:
1160 case DIOCSSTRATEGY:
1161 if ((rs->sc_flags & RAIDF_INITED) == 0)
1162 return (ENXIO);
1163 }
1164
1165 switch (cmd) {
1166 #ifdef COMPAT_50
1167 case RAIDFRAME_GET_INFO50:
1168 return rf_get_info50(raidPtr, data);
1169
1170 case RAIDFRAME_CONFIGURE50:
1171 if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0)
1172 return retcode;
1173 goto config;
1174 #endif
1175 /* configure the system */
1176 case RAIDFRAME_CONFIGURE:
1177
1178 if (raidPtr->valid) {
1179 /* There is a valid RAID set running on this unit! */
1180 printf("raid%d: Device already configured!\n",unit);
1181 return(EINVAL);
1182 }
1183
1184 /* copy-in the configuration information */
1185 /* data points to a pointer to the configuration structure */
1186
1187 u_cfg = *((RF_Config_t **) data);
1188 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1189 if (k_cfg == NULL) {
1190 return (ENOMEM);
1191 }
1192 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1193 if (retcode) {
1194 RF_Free(k_cfg, sizeof(RF_Config_t));
1195 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1196 retcode));
1197 goto no_config;
1198 }
1199 goto config;
1200 config:
1201 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1202
1203 /* allocate a buffer for the layout-specific data, and copy it
1204 * in */
1205 if (k_cfg->layoutSpecificSize) {
1206 if (k_cfg->layoutSpecificSize > 10000) {
1207 /* sanity check */
1208 RF_Free(k_cfg, sizeof(RF_Config_t));
1209 retcode = EINVAL;
1210 goto no_config;
1211 }
1212 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1213 (u_char *));
1214 if (specific_buf == NULL) {
1215 RF_Free(k_cfg, sizeof(RF_Config_t));
1216 retcode = ENOMEM;
1217 goto no_config;
1218 }
1219 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1220 k_cfg->layoutSpecificSize);
1221 if (retcode) {
1222 RF_Free(k_cfg, sizeof(RF_Config_t));
1223 RF_Free(specific_buf,
1224 k_cfg->layoutSpecificSize);
1225 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1226 retcode));
1227 goto no_config;
1228 }
1229 } else
1230 specific_buf = NULL;
1231 k_cfg->layoutSpecific = specific_buf;
1232
1233 /* should do some kind of sanity check on the configuration.
1234 * Store the sum of all the bytes in the last byte? */
1235
1236 /* configure the system */
1237
1238 /*
1239 * Clear the entire RAID descriptor, just to make sure
1240 * there is no stale data left in the case of a
1241 * reconfiguration
1242 */
1243 memset(raidPtr, 0, sizeof(*raidPtr));
1244 raidPtr->softc = rs;
1245 raidPtr->raidid = unit;
1246
1247 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1248
1249 if (retcode == 0) {
1250
1251 /* allow this many simultaneous IO's to
1252 this RAID device */
1253 raidPtr->openings = RAIDOUTSTANDING;
1254
1255 raidinit(rs);
1256 rf_markalldirty(raidPtr);
1257 }
1258 /* free the buffers. No return code here. */
1259 if (k_cfg->layoutSpecificSize) {
1260 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1261 }
1262 RF_Free(k_cfg, sizeof(RF_Config_t));
1263
1264 no_config:
1265 /*
1266 * If configuration failed, set sc_flags so that we
1267 * will detach the device when we close it.
1268 */
1269 if (retcode != 0)
1270 rs->sc_flags |= RAIDF_SHUTDOWN;
1271 return (retcode);
1272
1273 /* shutdown the system */
1274 case RAIDFRAME_SHUTDOWN:
1275
1276 part = DISKPART(dev);
1277 pmask = (1 << part);
1278
1279 if ((error = raidlock(rs)) != 0)
1280 return (error);
1281
1282 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
1283 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1284 (rs->sc_dkdev.dk_copenmask & pmask)))
1285 retcode = EBUSY;
1286 else {
1287 rs->sc_flags |= RAIDF_SHUTDOWN;
1288 rs->sc_dkdev.dk_copenmask &= ~pmask;
1289 rs->sc_dkdev.dk_bopenmask &= ~pmask;
1290 rs->sc_dkdev.dk_openmask &= ~pmask;
1291 retcode = 0;
1292 }
1293
1294 raidunlock(rs);
1295
1296 if (retcode != 0)
1297 return retcode;
1298
1299 /* free the pseudo device attach bits */
1300
1301 cf = device_cfdata(rs->sc_dev);
1302 if ((retcode = config_detach(rs->sc_dev, DETACH_QUIET)) == 0)
1303 free(cf, M_RAIDFRAME);
1304
1305 return (retcode);
1306 case RAIDFRAME_GET_COMPONENT_LABEL:
1307 clabel_ptr = (RF_ComponentLabel_t **) data;
1308 /* need to read the component label for the disk indicated
1309 by row,column in clabel */
1310
1311 /*
1312 * Perhaps there should be an option to skip the in-core
1313 * copy and hit the disk, as with disklabel(8).
1314 */
1315 RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *));
1316
1317 retcode = copyin(*clabel_ptr, clabel, sizeof(*clabel));
1318
1319 if (retcode) {
1320 RF_Free(clabel, sizeof(*clabel));
1321 return retcode;
1322 }
1323
1324 clabel->row = 0; /* Don't allow looking at anything else.*/
1325
1326 column = clabel->column;
1327
1328 if ((column < 0) || (column >= raidPtr->numCol +
1329 raidPtr->numSpare)) {
1330 RF_Free(clabel, sizeof(*clabel));
1331 return EINVAL;
1332 }
1333
1334 RF_Free(clabel, sizeof(*clabel));
1335
1336 clabel = raidget_component_label(raidPtr, column);
1337
1338 return copyout(clabel, *clabel_ptr, sizeof(**clabel_ptr));
1339
1340 #if 0
1341 case RAIDFRAME_SET_COMPONENT_LABEL:
1342 clabel = (RF_ComponentLabel_t *) data;
1343
1344 /* XXX check the label for valid stuff... */
1345 /* Note that some things *should not* get modified --
1346 the user should be re-initing the labels instead of
1347 trying to patch things.
1348 */
1349
1350 raidid = raidPtr->raidid;
1351 #ifdef DEBUG
1352 printf("raid%d: Got component label:\n", raidid);
1353 printf("raid%d: Version: %d\n", raidid, clabel->version);
1354 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1355 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1356 printf("raid%d: Column: %d\n", raidid, clabel->column);
1357 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1358 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1359 printf("raid%d: Status: %d\n", raidid, clabel->status);
1360 #endif
1361 clabel->row = 0;
1362 column = clabel->column;
1363
1364 if ((column < 0) || (column >= raidPtr->numCol)) {
1365 return(EINVAL);
1366 }
1367
1368 /* XXX this isn't allowed to do anything for now :-) */
1369
1370 /* XXX and before it is, we need to fill in the rest
1371 of the fields!?!?!?! */
1372 memcpy(raidget_component_label(raidPtr, column),
1373 clabel, sizeof(*clabel));
1374 raidflush_component_label(raidPtr, column);
1375 return (0);
1376 #endif
1377
1378 case RAIDFRAME_INIT_LABELS:
1379 clabel = (RF_ComponentLabel_t *) data;
1380 /*
1381 we only want the serial number from
1382 the above. We get all the rest of the information
1383 from the config that was used to create this RAID
1384 set.
1385 */
1386
1387 raidPtr->serial_number = clabel->serial_number;
1388
1389 for(column=0;column<raidPtr->numCol;column++) {
1390 diskPtr = &raidPtr->Disks[column];
1391 if (!RF_DEAD_DISK(diskPtr->status)) {
1392 ci_label = raidget_component_label(raidPtr,
1393 column);
1394 /* Zeroing this is important. */
1395 memset(ci_label, 0, sizeof(*ci_label));
1396 raid_init_component_label(raidPtr, ci_label);
1397 ci_label->serial_number =
1398 raidPtr->serial_number;
1399 ci_label->row = 0; /* we dont' pretend to support more */
1400 rf_component_label_set_partitionsize(ci_label,
1401 diskPtr->partitionSize);
1402 ci_label->column = column;
1403 raidflush_component_label(raidPtr, column);
1404 }
1405 /* XXXjld what about the spares? */
1406 }
1407
1408 return (retcode);
1409 case RAIDFRAME_SET_AUTOCONFIG:
1410 d = rf_set_autoconfig(raidPtr, *(int *) data);
1411 printf("raid%d: New autoconfig value is: %d\n",
1412 raidPtr->raidid, d);
1413 *(int *) data = d;
1414 return (retcode);
1415
1416 case RAIDFRAME_SET_ROOT:
1417 d = rf_set_rootpartition(raidPtr, *(int *) data);
1418 printf("raid%d: New rootpartition value is: %d\n",
1419 raidPtr->raidid, d);
1420 *(int *) data = d;
1421 return (retcode);
1422
1423 /* initialize all parity */
1424 case RAIDFRAME_REWRITEPARITY:
1425
1426 if (raidPtr->Layout.map->faultsTolerated == 0) {
1427 /* Parity for RAID 0 is trivially correct */
1428 raidPtr->parity_good = RF_RAID_CLEAN;
1429 return(0);
1430 }
1431
1432 if (raidPtr->parity_rewrite_in_progress == 1) {
1433 /* Re-write is already in progress! */
1434 return(EINVAL);
1435 }
1436
1437 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1438 rf_RewriteParityThread,
1439 raidPtr,"raid_parity");
1440 return (retcode);
1441
1442
1443 case RAIDFRAME_ADD_HOT_SPARE:
1444 sparePtr = (RF_SingleComponent_t *) data;
1445 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1446 retcode = rf_add_hot_spare(raidPtr, &component);
1447 return(retcode);
1448
1449 case RAIDFRAME_REMOVE_HOT_SPARE:
1450 return(retcode);
1451
1452 case RAIDFRAME_DELETE_COMPONENT:
1453 componentPtr = (RF_SingleComponent_t *)data;
1454 memcpy( &component, componentPtr,
1455 sizeof(RF_SingleComponent_t));
1456 retcode = rf_delete_component(raidPtr, &component);
1457 return(retcode);
1458
1459 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1460 componentPtr = (RF_SingleComponent_t *)data;
1461 memcpy( &component, componentPtr,
1462 sizeof(RF_SingleComponent_t));
1463 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1464 return(retcode);
1465
1466 case RAIDFRAME_REBUILD_IN_PLACE:
1467
1468 if (raidPtr->Layout.map->faultsTolerated == 0) {
1469 /* Can't do this on a RAID 0!! */
1470 return(EINVAL);
1471 }
1472
1473 if (raidPtr->recon_in_progress == 1) {
1474 /* a reconstruct is already in progress! */
1475 return(EINVAL);
1476 }
1477
1478 componentPtr = (RF_SingleComponent_t *) data;
1479 memcpy( &component, componentPtr,
1480 sizeof(RF_SingleComponent_t));
1481 component.row = 0; /* we don't support any more */
1482 column = component.column;
1483
1484 if ((column < 0) || (column >= raidPtr->numCol)) {
1485 return(EINVAL);
1486 }
1487
1488 rf_lock_mutex2(raidPtr->mutex);
1489 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1490 (raidPtr->numFailures > 0)) {
1491 /* XXX 0 above shouldn't be constant!!! */
1492 /* some component other than this has failed.
1493 Let's not make things worse than they already
1494 are... */
1495 printf("raid%d: Unable to reconstruct to disk at:\n",
1496 raidPtr->raidid);
1497 printf("raid%d: Col: %d Too many failures.\n",
1498 raidPtr->raidid, column);
1499 rf_unlock_mutex2(raidPtr->mutex);
1500 return (EINVAL);
1501 }
1502 if (raidPtr->Disks[column].status ==
1503 rf_ds_reconstructing) {
1504 printf("raid%d: Unable to reconstruct to disk at:\n",
1505 raidPtr->raidid);
1506 printf("raid%d: Col: %d Reconstruction already occurring!\n", raidPtr->raidid, column);
1507
1508 rf_unlock_mutex2(raidPtr->mutex);
1509 return (EINVAL);
1510 }
1511 if (raidPtr->Disks[column].status == rf_ds_spared) {
1512 rf_unlock_mutex2(raidPtr->mutex);
1513 return (EINVAL);
1514 }
1515 rf_unlock_mutex2(raidPtr->mutex);
1516
1517 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1518 if (rrcopy == NULL)
1519 return(ENOMEM);
1520
1521 rrcopy->raidPtr = (void *) raidPtr;
1522 rrcopy->col = column;
1523
1524 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1525 rf_ReconstructInPlaceThread,
1526 rrcopy,"raid_reconip");
1527 return(retcode);
1528
1529 case RAIDFRAME_GET_INFO:
1530 if (!raidPtr->valid)
1531 return (ENODEV);
1532 ucfgp = (RF_DeviceConfig_t **) data;
1533 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1534 (RF_DeviceConfig_t *));
1535 if (d_cfg == NULL)
1536 return (ENOMEM);
1537 d_cfg->rows = 1; /* there is only 1 row now */
1538 d_cfg->cols = raidPtr->numCol;
1539 d_cfg->ndevs = raidPtr->numCol;
1540 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1541 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1542 return (ENOMEM);
1543 }
1544 d_cfg->nspares = raidPtr->numSpare;
1545 if (d_cfg->nspares >= RF_MAX_DISKS) {
1546 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1547 return (ENOMEM);
1548 }
1549 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1550 d = 0;
1551 for (j = 0; j < d_cfg->cols; j++) {
1552 d_cfg->devs[d] = raidPtr->Disks[j];
1553 d++;
1554 }
1555 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1556 d_cfg->spares[i] = raidPtr->Disks[j];
1557 if (d_cfg->spares[i].status == rf_ds_rebuilding_spare) {
1558 /* XXX: raidctl(8) expects to see this as a used spare */
1559 d_cfg->spares[i].status = rf_ds_used_spare;
1560 }
1561 }
1562 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1563 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1564
1565 return (retcode);
1566
1567 case RAIDFRAME_CHECK_PARITY:
1568 *(int *) data = raidPtr->parity_good;
1569 return (0);
1570
1571 case RAIDFRAME_PARITYMAP_STATUS:
1572 if (rf_paritymap_ineligible(raidPtr))
1573 return EINVAL;
1574 rf_paritymap_status(raidPtr->parity_map,
1575 (struct rf_pmstat *)data);
1576 return 0;
1577
1578 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1579 if (rf_paritymap_ineligible(raidPtr))
1580 return EINVAL;
1581 if (raidPtr->parity_map == NULL)
1582 return ENOENT; /* ??? */
1583 if (0 != rf_paritymap_set_params(raidPtr->parity_map,
1584 (struct rf_pmparams *)data, 1))
1585 return EINVAL;
1586 return 0;
1587
1588 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1589 if (rf_paritymap_ineligible(raidPtr))
1590 return EINVAL;
1591 *(int *) data = rf_paritymap_get_disable(raidPtr);
1592 return 0;
1593
1594 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1595 if (rf_paritymap_ineligible(raidPtr))
1596 return EINVAL;
1597 rf_paritymap_set_disable(raidPtr, *(int *)data);
1598 /* XXX should errors be passed up? */
1599 return 0;
1600
1601 case RAIDFRAME_RESET_ACCTOTALS:
1602 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1603 return (0);
1604
1605 case RAIDFRAME_GET_ACCTOTALS:
1606 totals = (RF_AccTotals_t *) data;
1607 *totals = raidPtr->acc_totals;
1608 return (0);
1609
1610 case RAIDFRAME_KEEP_ACCTOTALS:
1611 raidPtr->keep_acc_totals = *(int *)data;
1612 return (0);
1613
1614 case RAIDFRAME_GET_SIZE:
1615 *(int *) data = raidPtr->totalSectors;
1616 return (0);
1617
1618 /* fail a disk & optionally start reconstruction */
1619 case RAIDFRAME_FAIL_DISK:
1620
1621 if (raidPtr->Layout.map->faultsTolerated == 0) {
1622 /* Can't do this on a RAID 0!! */
1623 return(EINVAL);
1624 }
1625
1626 rr = (struct rf_recon_req *) data;
1627 rr->row = 0;
1628 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1629 return (EINVAL);
1630
1631
1632 rf_lock_mutex2(raidPtr->mutex);
1633 if (raidPtr->status == rf_rs_reconstructing) {
1634 /* you can't fail a disk while we're reconstructing! */
1635 /* XXX wrong for RAID6 */
1636 rf_unlock_mutex2(raidPtr->mutex);
1637 return (EINVAL);
1638 }
1639 if ((raidPtr->Disks[rr->col].status ==
1640 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1641 /* some other component has failed. Let's not make
1642 things worse. XXX wrong for RAID6 */
1643 rf_unlock_mutex2(raidPtr->mutex);
1644 return (EINVAL);
1645 }
1646 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1647 /* Can't fail a spared disk! */
1648 rf_unlock_mutex2(raidPtr->mutex);
1649 return (EINVAL);
1650 }
1651 rf_unlock_mutex2(raidPtr->mutex);
1652
1653 /* make a copy of the recon request so that we don't rely on
1654 * the user's buffer */
1655 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1656 if (rrcopy == NULL)
1657 return(ENOMEM);
1658 memcpy(rrcopy, rr, sizeof(*rr));
1659 rrcopy->raidPtr = (void *) raidPtr;
1660
1661 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1662 rf_ReconThread,
1663 rrcopy,"raid_recon");
1664 return (0);
1665
1666 /* invoke a copyback operation after recon on whatever disk
1667 * needs it, if any */
1668 case RAIDFRAME_COPYBACK:
1669
1670 if (raidPtr->Layout.map->faultsTolerated == 0) {
1671 /* This makes no sense on a RAID 0!! */
1672 return(EINVAL);
1673 }
1674
1675 if (raidPtr->copyback_in_progress == 1) {
1676 /* Copyback is already in progress! */
1677 return(EINVAL);
1678 }
1679
1680 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1681 rf_CopybackThread,
1682 raidPtr,"raid_copyback");
1683 return (retcode);
1684
1685 /* return the percentage completion of reconstruction */
1686 case RAIDFRAME_CHECK_RECON_STATUS:
1687 if (raidPtr->Layout.map->faultsTolerated == 0) {
1688 /* This makes no sense on a RAID 0, so tell the
1689 user it's done. */
1690 *(int *) data = 100;
1691 return(0);
1692 }
1693 if (raidPtr->status != rf_rs_reconstructing)
1694 *(int *) data = 100;
1695 else {
1696 if (raidPtr->reconControl->numRUsTotal > 0) {
1697 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1698 } else {
1699 *(int *) data = 0;
1700 }
1701 }
1702 return (0);
1703 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1704 progressInfoPtr = (RF_ProgressInfo_t **) data;
1705 if (raidPtr->status != rf_rs_reconstructing) {
1706 progressInfo.remaining = 0;
1707 progressInfo.completed = 100;
1708 progressInfo.total = 100;
1709 } else {
1710 progressInfo.total =
1711 raidPtr->reconControl->numRUsTotal;
1712 progressInfo.completed =
1713 raidPtr->reconControl->numRUsComplete;
1714 progressInfo.remaining = progressInfo.total -
1715 progressInfo.completed;
1716 }
1717 retcode = copyout(&progressInfo, *progressInfoPtr,
1718 sizeof(RF_ProgressInfo_t));
1719 return (retcode);
1720
1721 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1722 if (raidPtr->Layout.map->faultsTolerated == 0) {
1723 /* This makes no sense on a RAID 0, so tell the
1724 user it's done. */
1725 *(int *) data = 100;
1726 return(0);
1727 }
1728 if (raidPtr->parity_rewrite_in_progress == 1) {
1729 *(int *) data = 100 *
1730 raidPtr->parity_rewrite_stripes_done /
1731 raidPtr->Layout.numStripe;
1732 } else {
1733 *(int *) data = 100;
1734 }
1735 return (0);
1736
1737 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1738 progressInfoPtr = (RF_ProgressInfo_t **) data;
1739 if (raidPtr->parity_rewrite_in_progress == 1) {
1740 progressInfo.total = raidPtr->Layout.numStripe;
1741 progressInfo.completed =
1742 raidPtr->parity_rewrite_stripes_done;
1743 progressInfo.remaining = progressInfo.total -
1744 progressInfo.completed;
1745 } else {
1746 progressInfo.remaining = 0;
1747 progressInfo.completed = 100;
1748 progressInfo.total = 100;
1749 }
1750 retcode = copyout(&progressInfo, *progressInfoPtr,
1751 sizeof(RF_ProgressInfo_t));
1752 return (retcode);
1753
1754 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1755 if (raidPtr->Layout.map->faultsTolerated == 0) {
1756 /* This makes no sense on a RAID 0 */
1757 *(int *) data = 100;
1758 return(0);
1759 }
1760 if (raidPtr->copyback_in_progress == 1) {
1761 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1762 raidPtr->Layout.numStripe;
1763 } else {
1764 *(int *) data = 100;
1765 }
1766 return (0);
1767
1768 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1769 progressInfoPtr = (RF_ProgressInfo_t **) data;
1770 if (raidPtr->copyback_in_progress == 1) {
1771 progressInfo.total = raidPtr->Layout.numStripe;
1772 progressInfo.completed =
1773 raidPtr->copyback_stripes_done;
1774 progressInfo.remaining = progressInfo.total -
1775 progressInfo.completed;
1776 } else {
1777 progressInfo.remaining = 0;
1778 progressInfo.completed = 100;
1779 progressInfo.total = 100;
1780 }
1781 retcode = copyout(&progressInfo, *progressInfoPtr,
1782 sizeof(RF_ProgressInfo_t));
1783 return (retcode);
1784
1785 /* the sparetable daemon calls this to wait for the kernel to
1786 * need a spare table. this ioctl does not return until a
1787 * spare table is needed. XXX -- calling mpsleep here in the
1788 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1789 * -- I should either compute the spare table in the kernel,
1790 * or have a different -- XXX XXX -- interface (a different
1791 * character device) for delivering the table -- XXX */
1792 #if 0
1793 case RAIDFRAME_SPARET_WAIT:
1794 rf_lock_mutex2(rf_sparet_wait_mutex);
1795 while (!rf_sparet_wait_queue)
1796 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1797 waitreq = rf_sparet_wait_queue;
1798 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1799 rf_unlock_mutex2(rf_sparet_wait_mutex);
1800
1801 /* structure assignment */
1802 *((RF_SparetWait_t *) data) = *waitreq;
1803
1804 RF_Free(waitreq, sizeof(*waitreq));
1805 return (0);
1806
1807 /* wakes up a process waiting on SPARET_WAIT and puts an error
1808 * code in it that will cause the dameon to exit */
1809 case RAIDFRAME_ABORT_SPARET_WAIT:
1810 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1811 waitreq->fcol = -1;
1812 rf_lock_mutex2(rf_sparet_wait_mutex);
1813 waitreq->next = rf_sparet_wait_queue;
1814 rf_sparet_wait_queue = waitreq;
1815 rf_broadcast_conf2(rf_sparet_wait_cv);
1816 rf_unlock_mutex2(rf_sparet_wait_mutex);
1817 return (0);
1818
1819 /* used by the spare table daemon to deliver a spare table
1820 * into the kernel */
1821 case RAIDFRAME_SEND_SPARET:
1822
1823 /* install the spare table */
1824 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1825
1826 /* respond to the requestor. the return status of the spare
1827 * table installation is passed in the "fcol" field */
1828 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1829 waitreq->fcol = retcode;
1830 rf_lock_mutex2(rf_sparet_wait_mutex);
1831 waitreq->next = rf_sparet_resp_queue;
1832 rf_sparet_resp_queue = waitreq;
1833 rf_broadcast_cond2(rf_sparet_resp_cv);
1834 rf_unlock_mutex2(rf_sparet_wait_mutex);
1835
1836 return (retcode);
1837 #endif
1838
1839 default:
1840 break; /* fall through to the os-specific code below */
1841
1842 }
1843
1844 if (!raidPtr->valid)
1845 return (EINVAL);
1846
1847 /*
1848 * Add support for "regular" device ioctls here.
1849 */
1850
1851 error = disk_ioctl(&rs->sc_dkdev, dev, cmd, data, flag, l);
1852 if (error != EPASSTHROUGH)
1853 return (error);
1854
1855 switch (cmd) {
1856 case DIOCWDINFO:
1857 case DIOCSDINFO:
1858 #ifdef __HAVE_OLD_DISKLABEL
1859 case ODIOCWDINFO:
1860 case ODIOCSDINFO:
1861 #endif
1862 {
1863 struct disklabel *lp;
1864 #ifdef __HAVE_OLD_DISKLABEL
1865 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1866 memset(&newlabel, 0, sizeof newlabel);
1867 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1868 lp = &newlabel;
1869 } else
1870 #endif
1871 lp = (struct disklabel *)data;
1872
1873 if ((error = raidlock(rs)) != 0)
1874 return (error);
1875
1876 rs->sc_flags |= RAIDF_LABELLING;
1877
1878 error = setdisklabel(rs->sc_dkdev.dk_label,
1879 lp, 0, rs->sc_dkdev.dk_cpulabel);
1880 if (error == 0) {
1881 if (cmd == DIOCWDINFO
1882 #ifdef __HAVE_OLD_DISKLABEL
1883 || cmd == ODIOCWDINFO
1884 #endif
1885 )
1886 error = writedisklabel(RAIDLABELDEV(dev),
1887 raidstrategy, rs->sc_dkdev.dk_label,
1888 rs->sc_dkdev.dk_cpulabel);
1889 }
1890 rs->sc_flags &= ~RAIDF_LABELLING;
1891
1892 raidunlock(rs);
1893
1894 if (error)
1895 return (error);
1896 break;
1897 }
1898
1899 case DIOCWLABEL:
1900 if (*(int *) data != 0)
1901 rs->sc_flags |= RAIDF_WLABEL;
1902 else
1903 rs->sc_flags &= ~RAIDF_WLABEL;
1904 break;
1905
1906 case DIOCGDEFLABEL:
1907 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1908 break;
1909
1910 #ifdef __HAVE_OLD_DISKLABEL
1911 case ODIOCGDEFLABEL:
1912 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1913 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1914 return ENOTTY;
1915 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1916 break;
1917 #endif
1918
1919 case DIOCCACHESYNC:
1920 return rf_sync_component_caches(raidPtr);
1921
1922 case DIOCGSTRATEGY:
1923 {
1924 struct disk_strategy *dks = (void *)data;
1925
1926 s = splbio();
1927 strlcpy(dks->dks_name, bufq_getstrategyname(rs->buf_queue),
1928 sizeof(dks->dks_name));
1929 splx(s);
1930 dks->dks_paramlen = 0;
1931
1932 return 0;
1933 }
1934
1935 case DIOCSSTRATEGY:
1936 {
1937 struct disk_strategy *dks = (void *)data;
1938 struct bufq_state *new;
1939 struct bufq_state *old;
1940
1941 if (dks->dks_param != NULL) {
1942 return EINVAL;
1943 }
1944 dks->dks_name[sizeof(dks->dks_name) - 1] = 0; /* ensure term */
1945 error = bufq_alloc(&new, dks->dks_name,
1946 BUFQ_EXACT|BUFQ_SORT_RAWBLOCK);
1947 if (error) {
1948 return error;
1949 }
1950 s = splbio();
1951 old = rs->buf_queue;
1952 bufq_move(new, old);
1953 rs->buf_queue = new;
1954 splx(s);
1955 bufq_free(old);
1956
1957 return 0;
1958 }
1959
1960 default:
1961 retcode = ENOTTY;
1962 }
1963 return (retcode);
1964
1965 }
1966
1967
1968 /* raidinit -- complete the rest of the initialization for the
1969 RAIDframe device. */
1970
1971
1972 static void
1973 raidinit(struct raid_softc *rs)
1974 {
1975 cfdata_t cf;
1976 int unit;
1977 RF_Raid_t *raidPtr = &rs->sc_r;
1978
1979 unit = raidPtr->raidid;
1980
1981
1982 /* XXX should check return code first... */
1983 rs->sc_flags |= RAIDF_INITED;
1984
1985 /* XXX doesn't check bounds. */
1986 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1987
1988 /* attach the pseudo device */
1989 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1990 cf->cf_name = raid_cd.cd_name;
1991 cf->cf_atname = raid_cd.cd_name;
1992 cf->cf_unit = unit;
1993 cf->cf_fstate = FSTATE_STAR;
1994
1995 rs->sc_dev = config_attach_pseudo(cf);
1996
1997 if (rs->sc_dev == NULL) {
1998 printf("raid%d: config_attach_pseudo failed\n",
1999 raidPtr->raidid);
2000 rs->sc_flags &= ~RAIDF_INITED;
2001 free(cf, M_RAIDFRAME);
2002 return;
2003 }
2004
2005 /* disk_attach actually creates space for the CPU disklabel, among
2006 * other things, so it's critical to call this *BEFORE* we try putzing
2007 * with disklabels. */
2008
2009 disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver);
2010 disk_attach(&rs->sc_dkdev);
2011
2012 /* XXX There may be a weird interaction here between this, and
2013 * protectedSectors, as used in RAIDframe. */
2014
2015 rs->sc_size = raidPtr->totalSectors;
2016
2017 rf_set_geometry(rs, raidPtr);
2018
2019 dkwedge_discover(&rs->sc_dkdev);
2020
2021 }
2022 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
2023 /* wake up the daemon & tell it to get us a spare table
2024 * XXX
2025 * the entries in the queues should be tagged with the raidPtr
2026 * so that in the extremely rare case that two recons happen at once,
2027 * we know for which device were requesting a spare table
2028 * XXX
2029 *
2030 * XXX This code is not currently used. GO
2031 */
2032 int
2033 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
2034 {
2035 int retcode;
2036
2037 rf_lock_mutex2(rf_sparet_wait_mutex);
2038 req->next = rf_sparet_wait_queue;
2039 rf_sparet_wait_queue = req;
2040 rf_broadcast_cond2(rf_sparet_wait_cv);
2041
2042 /* mpsleep unlocks the mutex */
2043 while (!rf_sparet_resp_queue) {
2044 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
2045 }
2046 req = rf_sparet_resp_queue;
2047 rf_sparet_resp_queue = req->next;
2048 rf_unlock_mutex2(rf_sparet_wait_mutex);
2049
2050 retcode = req->fcol;
2051 RF_Free(req, sizeof(*req)); /* this is not the same req as we
2052 * alloc'd */
2053 return (retcode);
2054 }
2055 #endif
2056
2057 /* a wrapper around rf_DoAccess that extracts appropriate info from the
2058 * bp & passes it down.
2059 * any calls originating in the kernel must use non-blocking I/O
2060 * do some extra sanity checking to return "appropriate" error values for
2061 * certain conditions (to make some standard utilities work)
2062 *
2063 * Formerly known as: rf_DoAccessKernel
2064 */
2065 void
2066 raidstart(RF_Raid_t *raidPtr)
2067 {
2068 RF_SectorCount_t num_blocks, pb, sum;
2069 RF_RaidAddr_t raid_addr;
2070 struct partition *pp;
2071 daddr_t blocknum;
2072 struct raid_softc *rs;
2073 int do_async;
2074 struct buf *bp;
2075 int rc;
2076
2077 rs = raidPtr->softc;
2078 /* quick check to see if anything has died recently */
2079 rf_lock_mutex2(raidPtr->mutex);
2080 if (raidPtr->numNewFailures > 0) {
2081 rf_unlock_mutex2(raidPtr->mutex);
2082 rf_update_component_labels(raidPtr,
2083 RF_NORMAL_COMPONENT_UPDATE);
2084 rf_lock_mutex2(raidPtr->mutex);
2085 raidPtr->numNewFailures--;
2086 }
2087
2088 /* Check to see if we're at the limit... */
2089 while (raidPtr->openings > 0) {
2090 rf_unlock_mutex2(raidPtr->mutex);
2091
2092 /* get the next item, if any, from the queue */
2093 if ((bp = bufq_get(rs->buf_queue)) == NULL) {
2094 /* nothing more to do */
2095 return;
2096 }
2097
2098 /* Ok, for the bp we have here, bp->b_blkno is relative to the
2099 * partition.. Need to make it absolute to the underlying
2100 * device.. */
2101
2102 blocknum = bp->b_blkno << DEV_BSHIFT >> raidPtr->logBytesPerSector;
2103 if (DISKPART(bp->b_dev) != RAW_PART) {
2104 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
2105 blocknum += pp->p_offset;
2106 }
2107
2108 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
2109 (int) blocknum));
2110
2111 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
2112 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
2113
2114 /* *THIS* is where we adjust what block we're going to...
2115 * but DO NOT TOUCH bp->b_blkno!!! */
2116 raid_addr = blocknum;
2117
2118 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
2119 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
2120 sum = raid_addr + num_blocks + pb;
2121 if (1 || rf_debugKernelAccess) {
2122 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
2123 (int) raid_addr, (int) sum, (int) num_blocks,
2124 (int) pb, (int) bp->b_resid));
2125 }
2126 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
2127 || (sum < num_blocks) || (sum < pb)) {
2128 bp->b_error = ENOSPC;
2129 bp->b_resid = bp->b_bcount;
2130 biodone(bp);
2131 rf_lock_mutex2(raidPtr->mutex);
2132 continue;
2133 }
2134 /*
2135 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
2136 */
2137
2138 if (bp->b_bcount & raidPtr->sectorMask) {
2139 bp->b_error = EINVAL;
2140 bp->b_resid = bp->b_bcount;
2141 biodone(bp);
2142 rf_lock_mutex2(raidPtr->mutex);
2143 continue;
2144
2145 }
2146 db1_printf(("Calling DoAccess..\n"));
2147
2148
2149 rf_lock_mutex2(raidPtr->mutex);
2150 raidPtr->openings--;
2151 rf_unlock_mutex2(raidPtr->mutex);
2152
2153 /*
2154 * Everything is async.
2155 */
2156 do_async = 1;
2157
2158 disk_busy(&rs->sc_dkdev);
2159
2160 /* XXX we're still at splbio() here... do we *really*
2161 need to be? */
2162
2163 /* don't ever condition on bp->b_flags & B_WRITE.
2164 * always condition on B_READ instead */
2165
2166 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
2167 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
2168 do_async, raid_addr, num_blocks,
2169 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
2170
2171 if (rc) {
2172 bp->b_error = rc;
2173 bp->b_resid = bp->b_bcount;
2174 biodone(bp);
2175 /* continue loop */
2176 }
2177
2178 rf_lock_mutex2(raidPtr->mutex);
2179 }
2180 rf_unlock_mutex2(raidPtr->mutex);
2181 }
2182
2183
2184
2185
2186 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
2187
2188 int
2189 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
2190 {
2191 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2192 struct buf *bp;
2193
2194 req->queue = queue;
2195 bp = req->bp;
2196
2197 switch (req->type) {
2198 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2199 /* XXX need to do something extra here.. */
2200 /* I'm leaving this in, as I've never actually seen it used,
2201 * and I'd like folks to report it... GO */
2202 printf(("WAKEUP CALLED\n"));
2203 queue->numOutstanding++;
2204
2205 bp->b_flags = 0;
2206 bp->b_private = req;
2207
2208 KernelWakeupFunc(bp);
2209 break;
2210
2211 case RF_IO_TYPE_READ:
2212 case RF_IO_TYPE_WRITE:
2213 #if RF_ACC_TRACE > 0
2214 if (req->tracerec) {
2215 RF_ETIMER_START(req->tracerec->timer);
2216 }
2217 #endif
2218 InitBP(bp, queue->rf_cinfo->ci_vp,
2219 op, queue->rf_cinfo->ci_dev,
2220 req->sectorOffset, req->numSector,
2221 req->buf, KernelWakeupFunc, (void *) req,
2222 queue->raidPtr->logBytesPerSector, req->b_proc);
2223
2224 if (rf_debugKernelAccess) {
2225 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2226 (long) bp->b_blkno));
2227 }
2228 queue->numOutstanding++;
2229 queue->last_deq_sector = req->sectorOffset;
2230 /* acc wouldn't have been let in if there were any pending
2231 * reqs at any other priority */
2232 queue->curPriority = req->priority;
2233
2234 db1_printf(("Going for %c to unit %d col %d\n",
2235 req->type, queue->raidPtr->raidid,
2236 queue->col));
2237 db1_printf(("sector %d count %d (%d bytes) %d\n",
2238 (int) req->sectorOffset, (int) req->numSector,
2239 (int) (req->numSector <<
2240 queue->raidPtr->logBytesPerSector),
2241 (int) queue->raidPtr->logBytesPerSector));
2242
2243 /*
2244 * XXX: drop lock here since this can block at
2245 * least with backing SCSI devices. Retake it
2246 * to minimize fuss with calling interfaces.
2247 */
2248
2249 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2250 bdev_strategy(bp);
2251 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2252 break;
2253
2254 default:
2255 panic("bad req->type in rf_DispatchKernelIO");
2256 }
2257 db1_printf(("Exiting from DispatchKernelIO\n"));
2258
2259 return (0);
2260 }
2261 /* this is the callback function associated with a I/O invoked from
2262 kernel code.
2263 */
2264 static void
2265 KernelWakeupFunc(struct buf *bp)
2266 {
2267 RF_DiskQueueData_t *req = NULL;
2268 RF_DiskQueue_t *queue;
2269
2270 db1_printf(("recovering the request queue:\n"));
2271
2272 req = bp->b_private;
2273
2274 queue = (RF_DiskQueue_t *) req->queue;
2275
2276 rf_lock_mutex2(queue->raidPtr->iodone_lock);
2277
2278 #if RF_ACC_TRACE > 0
2279 if (req->tracerec) {
2280 RF_ETIMER_STOP(req->tracerec->timer);
2281 RF_ETIMER_EVAL(req->tracerec->timer);
2282 rf_lock_mutex2(rf_tracing_mutex);
2283 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2284 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2285 req->tracerec->num_phys_ios++;
2286 rf_unlock_mutex2(rf_tracing_mutex);
2287 }
2288 #endif
2289
2290 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2291 * ballistic, and mark the component as hosed... */
2292
2293 if (bp->b_error != 0) {
2294 /* Mark the disk as dead */
2295 /* but only mark it once... */
2296 /* and only if it wouldn't leave this RAID set
2297 completely broken */
2298 if (((queue->raidPtr->Disks[queue->col].status ==
2299 rf_ds_optimal) ||
2300 (queue->raidPtr->Disks[queue->col].status ==
2301 rf_ds_used_spare)) &&
2302 (queue->raidPtr->numFailures <
2303 queue->raidPtr->Layout.map->faultsTolerated)) {
2304 printf("raid%d: IO Error (%d). Marking %s as failed.\n",
2305 queue->raidPtr->raidid,
2306 bp->b_error,
2307 queue->raidPtr->Disks[queue->col].devname);
2308 queue->raidPtr->Disks[queue->col].status =
2309 rf_ds_failed;
2310 queue->raidPtr->status = rf_rs_degraded;
2311 queue->raidPtr->numFailures++;
2312 queue->raidPtr->numNewFailures++;
2313 } else { /* Disk is already dead... */
2314 /* printf("Disk already marked as dead!\n"); */
2315 }
2316
2317 }
2318
2319 /* Fill in the error value */
2320 req->error = bp->b_error;
2321
2322 /* Drop this one on the "finished" queue... */
2323 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2324
2325 /* Let the raidio thread know there is work to be done. */
2326 rf_signal_cond2(queue->raidPtr->iodone_cv);
2327
2328 rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2329 }
2330
2331
2332 /*
2333 * initialize a buf structure for doing an I/O in the kernel.
2334 */
2335 static void
2336 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2337 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2338 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2339 struct proc *b_proc)
2340 {
2341 /* bp->b_flags = B_PHYS | rw_flag; */
2342 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */
2343 bp->b_oflags = 0;
2344 bp->b_cflags = 0;
2345 bp->b_bcount = numSect << logBytesPerSector;
2346 bp->b_bufsize = bp->b_bcount;
2347 bp->b_error = 0;
2348 bp->b_dev = dev;
2349 bp->b_data = bf;
2350 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2351 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2352 if (bp->b_bcount == 0) {
2353 panic("bp->b_bcount is zero in InitBP!!");
2354 }
2355 bp->b_proc = b_proc;
2356 bp->b_iodone = cbFunc;
2357 bp->b_private = cbArg;
2358 }
2359
2360 static void
2361 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2362 struct disklabel *lp)
2363 {
2364 memset(lp, 0, sizeof(*lp));
2365
2366 /* fabricate a label... */
2367 if (raidPtr->totalSectors > UINT32_MAX)
2368 lp->d_secperunit = UINT32_MAX;
2369 else
2370 lp->d_secperunit = raidPtr->totalSectors;
2371 lp->d_secsize = raidPtr->bytesPerSector;
2372 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2373 lp->d_ntracks = 4 * raidPtr->numCol;
2374 lp->d_ncylinders = raidPtr->totalSectors /
2375 (lp->d_nsectors * lp->d_ntracks);
2376 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2377
2378 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2379 lp->d_type = DKTYPE_RAID;
2380 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2381 lp->d_rpm = 3600;
2382 lp->d_interleave = 1;
2383 lp->d_flags = 0;
2384
2385 lp->d_partitions[RAW_PART].p_offset = 0;
2386 lp->d_partitions[RAW_PART].p_size = lp->d_secperunit;
2387 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2388 lp->d_npartitions = RAW_PART + 1;
2389
2390 lp->d_magic = DISKMAGIC;
2391 lp->d_magic2 = DISKMAGIC;
2392 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2393
2394 }
2395 /*
2396 * Read the disklabel from the raid device. If one is not present, fake one
2397 * up.
2398 */
2399 static void
2400 raidgetdisklabel(dev_t dev)
2401 {
2402 int unit = raidunit(dev);
2403 struct raid_softc *rs;
2404 const char *errstring;
2405 struct disklabel *lp;
2406 struct cpu_disklabel *clp;
2407 RF_Raid_t *raidPtr;
2408
2409 if ((rs = raidget(unit, false)) == NULL)
2410 return;
2411
2412 lp = rs->sc_dkdev.dk_label;
2413 clp = rs->sc_dkdev.dk_cpulabel;
2414
2415 db1_printf(("Getting the disklabel...\n"));
2416
2417 memset(clp, 0, sizeof(*clp));
2418
2419 raidPtr = &rs->sc_r;
2420
2421 raidgetdefaultlabel(raidPtr, rs, lp);
2422
2423 /*
2424 * Call the generic disklabel extraction routine.
2425 */
2426 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2427 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2428 if (errstring)
2429 raidmakedisklabel(rs);
2430 else {
2431 int i;
2432 struct partition *pp;
2433
2434 /*
2435 * Sanity check whether the found disklabel is valid.
2436 *
2437 * This is necessary since total size of the raid device
2438 * may vary when an interleave is changed even though exactly
2439 * same components are used, and old disklabel may used
2440 * if that is found.
2441 */
2442 if (lp->d_secperunit < UINT32_MAX ?
2443 lp->d_secperunit != rs->sc_size :
2444 lp->d_secperunit > rs->sc_size)
2445 printf("raid%d: WARNING: %s: "
2446 "total sector size in disklabel (%ju) != "
2447 "the size of raid (%ju)\n", unit, rs->sc_xname,
2448 (uintmax_t)lp->d_secperunit,
2449 (uintmax_t)rs->sc_size);
2450 for (i = 0; i < lp->d_npartitions; i++) {
2451 pp = &lp->d_partitions[i];
2452 if (pp->p_offset + pp->p_size > rs->sc_size)
2453 printf("raid%d: WARNING: %s: end of partition `%c' "
2454 "exceeds the size of raid (%ju)\n",
2455 unit, rs->sc_xname, 'a' + i,
2456 (uintmax_t)rs->sc_size);
2457 }
2458 }
2459
2460 }
2461 /*
2462 * Take care of things one might want to take care of in the event
2463 * that a disklabel isn't present.
2464 */
2465 static void
2466 raidmakedisklabel(struct raid_softc *rs)
2467 {
2468 struct disklabel *lp = rs->sc_dkdev.dk_label;
2469 db1_printf(("Making a label..\n"));
2470
2471 /*
2472 * For historical reasons, if there's no disklabel present
2473 * the raw partition must be marked FS_BSDFFS.
2474 */
2475
2476 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2477
2478 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2479
2480 lp->d_checksum = dkcksum(lp);
2481 }
2482 /*
2483 * Wait interruptibly for an exclusive lock.
2484 *
2485 * XXX
2486 * Several drivers do this; it should be abstracted and made MP-safe.
2487 * (Hmm... where have we seen this warning before :-> GO )
2488 */
2489 static int
2490 raidlock(struct raid_softc *rs)
2491 {
2492 int error;
2493
2494 mutex_enter(&rs->sc_mutex);
2495 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2496 rs->sc_flags |= RAIDF_WANTED;
2497 error = cv_wait_sig(&rs->sc_cv, &rs->sc_mutex);
2498 if (error != 0)
2499 return (error);
2500 }
2501 rs->sc_flags |= RAIDF_LOCKED;
2502 mutex_exit(&rs->sc_mutex);
2503 return (0);
2504 }
2505 /*
2506 * Unlock and wake up any waiters.
2507 */
2508 static void
2509 raidunlock(struct raid_softc *rs)
2510 {
2511
2512 mutex_enter(&rs->sc_mutex);
2513 rs->sc_flags &= ~RAIDF_LOCKED;
2514 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2515 rs->sc_flags &= ~RAIDF_WANTED;
2516 cv_broadcast(&rs->sc_cv);
2517 }
2518 mutex_exit(&rs->sc_mutex);
2519 }
2520
2521
2522 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2523 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2524 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2525
2526 static daddr_t
2527 rf_component_info_offset(void)
2528 {
2529
2530 return RF_COMPONENT_INFO_OFFSET;
2531 }
2532
2533 static daddr_t
2534 rf_component_info_size(unsigned secsize)
2535 {
2536 daddr_t info_size;
2537
2538 KASSERT(secsize);
2539 if (secsize > RF_COMPONENT_INFO_SIZE)
2540 info_size = secsize;
2541 else
2542 info_size = RF_COMPONENT_INFO_SIZE;
2543
2544 return info_size;
2545 }
2546
2547 static daddr_t
2548 rf_parity_map_offset(RF_Raid_t *raidPtr)
2549 {
2550 daddr_t map_offset;
2551
2552 KASSERT(raidPtr->bytesPerSector);
2553 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2554 map_offset = raidPtr->bytesPerSector;
2555 else
2556 map_offset = RF_COMPONENT_INFO_SIZE;
2557 map_offset += rf_component_info_offset();
2558
2559 return map_offset;
2560 }
2561
2562 static daddr_t
2563 rf_parity_map_size(RF_Raid_t *raidPtr)
2564 {
2565 daddr_t map_size;
2566
2567 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2568 map_size = raidPtr->bytesPerSector;
2569 else
2570 map_size = RF_PARITY_MAP_SIZE;
2571
2572 return map_size;
2573 }
2574
2575 int
2576 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2577 {
2578 RF_ComponentLabel_t *clabel;
2579
2580 clabel = raidget_component_label(raidPtr, col);
2581 clabel->clean = RF_RAID_CLEAN;
2582 raidflush_component_label(raidPtr, col);
2583 return(0);
2584 }
2585
2586
2587 int
2588 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2589 {
2590 RF_ComponentLabel_t *clabel;
2591
2592 clabel = raidget_component_label(raidPtr, col);
2593 clabel->clean = RF_RAID_DIRTY;
2594 raidflush_component_label(raidPtr, col);
2595 return(0);
2596 }
2597
2598 int
2599 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2600 {
2601 KASSERT(raidPtr->bytesPerSector);
2602 return raidread_component_label(raidPtr->bytesPerSector,
2603 raidPtr->Disks[col].dev,
2604 raidPtr->raid_cinfo[col].ci_vp,
2605 &raidPtr->raid_cinfo[col].ci_label);
2606 }
2607
2608 RF_ComponentLabel_t *
2609 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2610 {
2611 return &raidPtr->raid_cinfo[col].ci_label;
2612 }
2613
2614 int
2615 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2616 {
2617 RF_ComponentLabel_t *label;
2618
2619 label = &raidPtr->raid_cinfo[col].ci_label;
2620 label->mod_counter = raidPtr->mod_counter;
2621 #ifndef RF_NO_PARITY_MAP
2622 label->parity_map_modcount = label->mod_counter;
2623 #endif
2624 return raidwrite_component_label(raidPtr->bytesPerSector,
2625 raidPtr->Disks[col].dev,
2626 raidPtr->raid_cinfo[col].ci_vp, label);
2627 }
2628
2629
2630 static int
2631 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2632 RF_ComponentLabel_t *clabel)
2633 {
2634 return raidread_component_area(dev, b_vp, clabel,
2635 sizeof(RF_ComponentLabel_t),
2636 rf_component_info_offset(),
2637 rf_component_info_size(secsize));
2638 }
2639
2640 /* ARGSUSED */
2641 static int
2642 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2643 size_t msize, daddr_t offset, daddr_t dsize)
2644 {
2645 struct buf *bp;
2646 const struct bdevsw *bdev;
2647 int error;
2648
2649 /* XXX should probably ensure that we don't try to do this if
2650 someone has changed rf_protected_sectors. */
2651
2652 if (b_vp == NULL) {
2653 /* For whatever reason, this component is not valid.
2654 Don't try to read a component label from it. */
2655 return(EINVAL);
2656 }
2657
2658 /* get a block of the appropriate size... */
2659 bp = geteblk((int)dsize);
2660 bp->b_dev = dev;
2661
2662 /* get our ducks in a row for the read */
2663 bp->b_blkno = offset / DEV_BSIZE;
2664 bp->b_bcount = dsize;
2665 bp->b_flags |= B_READ;
2666 bp->b_resid = dsize;
2667
2668 bdev = bdevsw_lookup(bp->b_dev);
2669 if (bdev == NULL)
2670 return (ENXIO);
2671 (*bdev->d_strategy)(bp);
2672
2673 error = biowait(bp);
2674
2675 if (!error) {
2676 memcpy(data, bp->b_data, msize);
2677 }
2678
2679 brelse(bp, 0);
2680 return(error);
2681 }
2682
2683
2684 static int
2685 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2686 RF_ComponentLabel_t *clabel)
2687 {
2688 return raidwrite_component_area(dev, b_vp, clabel,
2689 sizeof(RF_ComponentLabel_t),
2690 rf_component_info_offset(),
2691 rf_component_info_size(secsize), 0);
2692 }
2693
2694 /* ARGSUSED */
2695 static int
2696 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2697 size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2698 {
2699 struct buf *bp;
2700 const struct bdevsw *bdev;
2701 int error;
2702
2703 /* get a block of the appropriate size... */
2704 bp = geteblk((int)dsize);
2705 bp->b_dev = dev;
2706
2707 /* get our ducks in a row for the write */
2708 bp->b_blkno = offset / DEV_BSIZE;
2709 bp->b_bcount = dsize;
2710 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2711 bp->b_resid = dsize;
2712
2713 memset(bp->b_data, 0, dsize);
2714 memcpy(bp->b_data, data, msize);
2715
2716 bdev = bdevsw_lookup(bp->b_dev);
2717 if (bdev == NULL)
2718 return (ENXIO);
2719 (*bdev->d_strategy)(bp);
2720 if (asyncp)
2721 return 0;
2722 error = biowait(bp);
2723 brelse(bp, 0);
2724 if (error) {
2725 #if 1
2726 printf("Failed to write RAID component info!\n");
2727 #endif
2728 }
2729
2730 return(error);
2731 }
2732
2733 void
2734 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2735 {
2736 int c;
2737
2738 for (c = 0; c < raidPtr->numCol; c++) {
2739 /* Skip dead disks. */
2740 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2741 continue;
2742 /* XXXjld: what if an error occurs here? */
2743 raidwrite_component_area(raidPtr->Disks[c].dev,
2744 raidPtr->raid_cinfo[c].ci_vp, map,
2745 RF_PARITYMAP_NBYTE,
2746 rf_parity_map_offset(raidPtr),
2747 rf_parity_map_size(raidPtr), 0);
2748 }
2749 }
2750
2751 void
2752 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2753 {
2754 struct rf_paritymap_ondisk tmp;
2755 int c,first;
2756
2757 first=1;
2758 for (c = 0; c < raidPtr->numCol; c++) {
2759 /* Skip dead disks. */
2760 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2761 continue;
2762 raidread_component_area(raidPtr->Disks[c].dev,
2763 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2764 RF_PARITYMAP_NBYTE,
2765 rf_parity_map_offset(raidPtr),
2766 rf_parity_map_size(raidPtr));
2767 if (first) {
2768 memcpy(map, &tmp, sizeof(*map));
2769 first = 0;
2770 } else {
2771 rf_paritymap_merge(map, &tmp);
2772 }
2773 }
2774 }
2775
2776 void
2777 rf_markalldirty(RF_Raid_t *raidPtr)
2778 {
2779 RF_ComponentLabel_t *clabel;
2780 int sparecol;
2781 int c;
2782 int j;
2783 int scol = -1;
2784
2785 raidPtr->mod_counter++;
2786 for (c = 0; c < raidPtr->numCol; c++) {
2787 /* we don't want to touch (at all) a disk that has
2788 failed */
2789 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2790 clabel = raidget_component_label(raidPtr, c);
2791 if (clabel->status == rf_ds_spared) {
2792 /* XXX do something special...
2793 but whatever you do, don't
2794 try to access it!! */
2795 } else {
2796 raidmarkdirty(raidPtr, c);
2797 }
2798 }
2799 }
2800
2801 for( c = 0; c < raidPtr->numSpare ; c++) {
2802 sparecol = raidPtr->numCol + c;
2803 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2804 /*
2805
2806 we claim this disk is "optimal" if it's
2807 rf_ds_used_spare, as that means it should be
2808 directly substitutable for the disk it replaced.
2809 We note that too...
2810
2811 */
2812
2813 for(j=0;j<raidPtr->numCol;j++) {
2814 if (raidPtr->Disks[j].spareCol == sparecol) {
2815 scol = j;
2816 break;
2817 }
2818 }
2819
2820 clabel = raidget_component_label(raidPtr, sparecol);
2821 /* make sure status is noted */
2822
2823 raid_init_component_label(raidPtr, clabel);
2824
2825 clabel->row = 0;
2826 clabel->column = scol;
2827 /* Note: we *don't* change status from rf_ds_used_spare
2828 to rf_ds_optimal */
2829 /* clabel.status = rf_ds_optimal; */
2830
2831 raidmarkdirty(raidPtr, sparecol);
2832 }
2833 }
2834 }
2835
2836
2837 void
2838 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2839 {
2840 RF_ComponentLabel_t *clabel;
2841 int sparecol;
2842 int c;
2843 int j;
2844 int scol;
2845
2846 scol = -1;
2847
2848 /* XXX should do extra checks to make sure things really are clean,
2849 rather than blindly setting the clean bit... */
2850
2851 raidPtr->mod_counter++;
2852
2853 for (c = 0; c < raidPtr->numCol; c++) {
2854 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2855 clabel = raidget_component_label(raidPtr, c);
2856 /* make sure status is noted */
2857 clabel->status = rf_ds_optimal;
2858
2859 /* note what unit we are configured as */
2860 clabel->last_unit = raidPtr->raidid;
2861
2862 raidflush_component_label(raidPtr, c);
2863 if (final == RF_FINAL_COMPONENT_UPDATE) {
2864 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2865 raidmarkclean(raidPtr, c);
2866 }
2867 }
2868 }
2869 /* else we don't touch it.. */
2870 }
2871
2872 for( c = 0; c < raidPtr->numSpare ; c++) {
2873 sparecol = raidPtr->numCol + c;
2874 /* Need to ensure that the reconstruct actually completed! */
2875 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2876 /*
2877
2878 we claim this disk is "optimal" if it's
2879 rf_ds_used_spare, as that means it should be
2880 directly substitutable for the disk it replaced.
2881 We note that too...
2882
2883 */
2884
2885 for(j=0;j<raidPtr->numCol;j++) {
2886 if (raidPtr->Disks[j].spareCol == sparecol) {
2887 scol = j;
2888 break;
2889 }
2890 }
2891
2892 /* XXX shouldn't *really* need this... */
2893 clabel = raidget_component_label(raidPtr, sparecol);
2894 /* make sure status is noted */
2895
2896 raid_init_component_label(raidPtr, clabel);
2897
2898 clabel->column = scol;
2899 clabel->status = rf_ds_optimal;
2900 clabel->last_unit = raidPtr->raidid;
2901
2902 raidflush_component_label(raidPtr, sparecol);
2903 if (final == RF_FINAL_COMPONENT_UPDATE) {
2904 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2905 raidmarkclean(raidPtr, sparecol);
2906 }
2907 }
2908 }
2909 }
2910 }
2911
2912 void
2913 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2914 {
2915
2916 if (vp != NULL) {
2917 if (auto_configured == 1) {
2918 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2919 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2920 vput(vp);
2921
2922 } else {
2923 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2924 }
2925 }
2926 }
2927
2928
2929 void
2930 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2931 {
2932 int r,c;
2933 struct vnode *vp;
2934 int acd;
2935
2936
2937 /* We take this opportunity to close the vnodes like we should.. */
2938
2939 for (c = 0; c < raidPtr->numCol; c++) {
2940 vp = raidPtr->raid_cinfo[c].ci_vp;
2941 acd = raidPtr->Disks[c].auto_configured;
2942 rf_close_component(raidPtr, vp, acd);
2943 raidPtr->raid_cinfo[c].ci_vp = NULL;
2944 raidPtr->Disks[c].auto_configured = 0;
2945 }
2946
2947 for (r = 0; r < raidPtr->numSpare; r++) {
2948 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2949 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2950 rf_close_component(raidPtr, vp, acd);
2951 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2952 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2953 }
2954 }
2955
2956
2957 void
2958 rf_ReconThread(struct rf_recon_req *req)
2959 {
2960 int s;
2961 RF_Raid_t *raidPtr;
2962
2963 s = splbio();
2964 raidPtr = (RF_Raid_t *) req->raidPtr;
2965 raidPtr->recon_in_progress = 1;
2966
2967 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2968 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2969
2970 RF_Free(req, sizeof(*req));
2971
2972 raidPtr->recon_in_progress = 0;
2973 splx(s);
2974
2975 /* That's all... */
2976 kthread_exit(0); /* does not return */
2977 }
2978
2979 void
2980 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2981 {
2982 int retcode;
2983 int s;
2984
2985 raidPtr->parity_rewrite_stripes_done = 0;
2986 raidPtr->parity_rewrite_in_progress = 1;
2987 s = splbio();
2988 retcode = rf_RewriteParity(raidPtr);
2989 splx(s);
2990 if (retcode) {
2991 printf("raid%d: Error re-writing parity (%d)!\n",
2992 raidPtr->raidid, retcode);
2993 } else {
2994 /* set the clean bit! If we shutdown correctly,
2995 the clean bit on each component label will get
2996 set */
2997 raidPtr->parity_good = RF_RAID_CLEAN;
2998 }
2999 raidPtr->parity_rewrite_in_progress = 0;
3000
3001 /* Anyone waiting for us to stop? If so, inform them... */
3002 if (raidPtr->waitShutdown) {
3003 wakeup(&raidPtr->parity_rewrite_in_progress);
3004 }
3005
3006 /* That's all... */
3007 kthread_exit(0); /* does not return */
3008 }
3009
3010
3011 void
3012 rf_CopybackThread(RF_Raid_t *raidPtr)
3013 {
3014 int s;
3015
3016 raidPtr->copyback_in_progress = 1;
3017 s = splbio();
3018 rf_CopybackReconstructedData(raidPtr);
3019 splx(s);
3020 raidPtr->copyback_in_progress = 0;
3021
3022 /* That's all... */
3023 kthread_exit(0); /* does not return */
3024 }
3025
3026
3027 void
3028 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
3029 {
3030 int s;
3031 RF_Raid_t *raidPtr;
3032
3033 s = splbio();
3034 raidPtr = req->raidPtr;
3035 raidPtr->recon_in_progress = 1;
3036 rf_ReconstructInPlace(raidPtr, req->col);
3037 RF_Free(req, sizeof(*req));
3038 raidPtr->recon_in_progress = 0;
3039 splx(s);
3040
3041 /* That's all... */
3042 kthread_exit(0); /* does not return */
3043 }
3044
3045 static RF_AutoConfig_t *
3046 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
3047 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
3048 unsigned secsize)
3049 {
3050 int good_one = 0;
3051 RF_ComponentLabel_t *clabel;
3052 RF_AutoConfig_t *ac;
3053
3054 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
3055 if (clabel == NULL) {
3056 oomem:
3057 while(ac_list) {
3058 ac = ac_list;
3059 if (ac->clabel)
3060 free(ac->clabel, M_RAIDFRAME);
3061 ac_list = ac_list->next;
3062 free(ac, M_RAIDFRAME);
3063 }
3064 printf("RAID auto config: out of memory!\n");
3065 return NULL; /* XXX probably should panic? */
3066 }
3067
3068 if (!raidread_component_label(secsize, dev, vp, clabel)) {
3069 /* Got the label. Does it look reasonable? */
3070 if (rf_reasonable_label(clabel, numsecs) &&
3071 (rf_component_label_partitionsize(clabel) <= size)) {
3072 #ifdef DEBUG
3073 printf("Component on: %s: %llu\n",
3074 cname, (unsigned long long)size);
3075 rf_print_component_label(clabel);
3076 #endif
3077 /* if it's reasonable, add it, else ignore it. */
3078 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
3079 M_NOWAIT);
3080 if (ac == NULL) {
3081 free(clabel, M_RAIDFRAME);
3082 goto oomem;
3083 }
3084 strlcpy(ac->devname, cname, sizeof(ac->devname));
3085 ac->dev = dev;
3086 ac->vp = vp;
3087 ac->clabel = clabel;
3088 ac->next = ac_list;
3089 ac_list = ac;
3090 good_one = 1;
3091 }
3092 }
3093 if (!good_one) {
3094 /* cleanup */
3095 free(clabel, M_RAIDFRAME);
3096 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3097 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3098 vput(vp);
3099 }
3100 return ac_list;
3101 }
3102
3103 RF_AutoConfig_t *
3104 rf_find_raid_components(void)
3105 {
3106 struct vnode *vp;
3107 struct disklabel label;
3108 device_t dv;
3109 deviter_t di;
3110 dev_t dev;
3111 int bmajor, bminor, wedge, rf_part_found;
3112 int error;
3113 int i;
3114 RF_AutoConfig_t *ac_list;
3115 uint64_t numsecs;
3116 unsigned secsize;
3117
3118 /* initialize the AutoConfig list */
3119 ac_list = NULL;
3120
3121 /* we begin by trolling through *all* the devices on the system */
3122
3123 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
3124 dv = deviter_next(&di)) {
3125
3126 /* we are only interested in disks... */
3127 if (device_class(dv) != DV_DISK)
3128 continue;
3129
3130 /* we don't care about floppies... */
3131 if (device_is_a(dv, "fd")) {
3132 continue;
3133 }
3134
3135 /* we don't care about CD's... */
3136 if (device_is_a(dv, "cd")) {
3137 continue;
3138 }
3139
3140 /* we don't care about md's... */
3141 if (device_is_a(dv, "md")) {
3142 continue;
3143 }
3144
3145 /* hdfd is the Atari/Hades floppy driver */
3146 if (device_is_a(dv, "hdfd")) {
3147 continue;
3148 }
3149
3150 /* fdisa is the Atari/Milan floppy driver */
3151 if (device_is_a(dv, "fdisa")) {
3152 continue;
3153 }
3154
3155 /* need to find the device_name_to_block_device_major stuff */
3156 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
3157
3158 rf_part_found = 0; /*No raid partition as yet*/
3159
3160 /* get a vnode for the raw partition of this disk */
3161
3162 wedge = device_is_a(dv, "dk");
3163 bminor = minor(device_unit(dv));
3164 dev = wedge ? makedev(bmajor, bminor) :
3165 MAKEDISKDEV(bmajor, bminor, RAW_PART);
3166 if (bdevvp(dev, &vp))
3167 panic("RAID can't alloc vnode");
3168
3169 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
3170
3171 if (error) {
3172 /* "Who cares." Continue looking
3173 for something that exists*/
3174 vput(vp);
3175 continue;
3176 }
3177
3178 error = getdisksize(vp, &numsecs, &secsize);
3179 if (error) {
3180 vput(vp);
3181 continue;
3182 }
3183 if (wedge) {
3184 struct dkwedge_info dkw;
3185 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
3186 NOCRED);
3187 if (error) {
3188 printf("RAIDframe: can't get wedge info for "
3189 "dev %s (%d)\n", device_xname(dv), error);
3190 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3191 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3192 vput(vp);
3193 continue;
3194 }
3195
3196 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
3197 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3198 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3199 vput(vp);
3200 continue;
3201 }
3202
3203 ac_list = rf_get_component(ac_list, dev, vp,
3204 device_xname(dv), dkw.dkw_size, numsecs, secsize);
3205 rf_part_found = 1; /*There is a raid component on this disk*/
3206 continue;
3207 }
3208
3209 /* Ok, the disk exists. Go get the disklabel. */
3210 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
3211 if (error) {
3212 /*
3213 * XXX can't happen - open() would
3214 * have errored out (or faked up one)
3215 */
3216 if (error != ENOTTY)
3217 printf("RAIDframe: can't get label for dev "
3218 "%s (%d)\n", device_xname(dv), error);
3219 }
3220
3221 /* don't need this any more. We'll allocate it again
3222 a little later if we really do... */
3223 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3224 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3225 vput(vp);
3226
3227 if (error)
3228 continue;
3229
3230 rf_part_found = 0; /*No raid partitions yet*/
3231 for (i = 0; i < label.d_npartitions; i++) {
3232 char cname[sizeof(ac_list->devname)];
3233
3234 /* We only support partitions marked as RAID */
3235 if (label.d_partitions[i].p_fstype != FS_RAID)
3236 continue;
3237
3238 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
3239 if (bdevvp(dev, &vp))
3240 panic("RAID can't alloc vnode");
3241
3242 error = VOP_OPEN(vp, FREAD, NOCRED);
3243 if (error) {
3244 /* Whatever... */
3245 vput(vp);
3246 continue;
3247 }
3248 snprintf(cname, sizeof(cname), "%s%c",
3249 device_xname(dv), 'a' + i);
3250 ac_list = rf_get_component(ac_list, dev, vp, cname,
3251 label.d_partitions[i].p_size, numsecs, secsize);
3252 rf_part_found = 1; /*There is at least one raid partition on this disk*/
3253 }
3254
3255 /*
3256 *If there is no raid component on this disk, either in a
3257 *disklabel or inside a wedge, check the raw partition as well,
3258 *as it is possible to configure raid components on raw disk
3259 *devices.
3260 */
3261
3262 if (!rf_part_found) {
3263 char cname[sizeof(ac_list->devname)];
3264
3265 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
3266 if (bdevvp(dev, &vp))
3267 panic("RAID can't alloc vnode");
3268
3269 error = VOP_OPEN(vp, FREAD, NOCRED);
3270 if (error) {
3271 /* Whatever... */
3272 vput(vp);
3273 continue;
3274 }
3275 snprintf(cname, sizeof(cname), "%s%c",
3276 device_xname(dv), 'a' + RAW_PART);
3277 ac_list = rf_get_component(ac_list, dev, vp, cname,
3278 label.d_partitions[RAW_PART].p_size, numsecs, secsize);
3279 }
3280 }
3281 deviter_release(&di);
3282 return ac_list;
3283 }
3284
3285
3286 int
3287 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3288 {
3289
3290 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
3291 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
3292 ((clabel->clean == RF_RAID_CLEAN) ||
3293 (clabel->clean == RF_RAID_DIRTY)) &&
3294 clabel->row >=0 &&
3295 clabel->column >= 0 &&
3296 clabel->num_rows > 0 &&
3297 clabel->num_columns > 0 &&
3298 clabel->row < clabel->num_rows &&
3299 clabel->column < clabel->num_columns &&
3300 clabel->blockSize > 0 &&
3301 /*
3302 * numBlocksHi may contain garbage, but it is ok since
3303 * the type is unsigned. If it is really garbage,
3304 * rf_fix_old_label_size() will fix it.
3305 */
3306 rf_component_label_numblocks(clabel) > 0) {
3307 /*
3308 * label looks reasonable enough...
3309 * let's make sure it has no old garbage.
3310 */
3311 if (numsecs)
3312 rf_fix_old_label_size(clabel, numsecs);
3313 return(1);
3314 }
3315 return(0);
3316 }
3317
3318
3319 /*
3320 * For reasons yet unknown, some old component labels have garbage in
3321 * the newer numBlocksHi region, and this causes lossage. Since those
3322 * disks will also have numsecs set to less than 32 bits of sectors,
3323 * we can determine when this corruption has occurred, and fix it.
3324 *
3325 * The exact same problem, with the same unknown reason, happens to
3326 * the partitionSizeHi member as well.
3327 */
3328 static void
3329 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3330 {
3331
3332 if (numsecs < ((uint64_t)1 << 32)) {
3333 if (clabel->numBlocksHi) {
3334 printf("WARNING: total sectors < 32 bits, yet "
3335 "numBlocksHi set\n"
3336 "WARNING: resetting numBlocksHi to zero.\n");
3337 clabel->numBlocksHi = 0;
3338 }
3339
3340 if (clabel->partitionSizeHi) {
3341 printf("WARNING: total sectors < 32 bits, yet "
3342 "partitionSizeHi set\n"
3343 "WARNING: resetting partitionSizeHi to zero.\n");
3344 clabel->partitionSizeHi = 0;
3345 }
3346 }
3347 }
3348
3349
3350 #ifdef DEBUG
3351 void
3352 rf_print_component_label(RF_ComponentLabel_t *clabel)
3353 {
3354 uint64_t numBlocks;
3355 static const char *rp[] = {
3356 "No", "Force", "Soft", "*invalid*"
3357 };
3358
3359
3360 numBlocks = rf_component_label_numblocks(clabel);
3361
3362 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3363 clabel->row, clabel->column,
3364 clabel->num_rows, clabel->num_columns);
3365 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3366 clabel->version, clabel->serial_number,
3367 clabel->mod_counter);
3368 printf(" Clean: %s Status: %d\n",
3369 clabel->clean ? "Yes" : "No", clabel->status);
3370 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3371 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3372 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3373 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3374 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3375 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]);
3376 printf(" Last configured as: raid%d\n", clabel->last_unit);
3377 #if 0
3378 printf(" Config order: %d\n", clabel->config_order);
3379 #endif
3380
3381 }
3382 #endif
3383
3384 RF_ConfigSet_t *
3385 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3386 {
3387 RF_AutoConfig_t *ac;
3388 RF_ConfigSet_t *config_sets;
3389 RF_ConfigSet_t *cset;
3390 RF_AutoConfig_t *ac_next;
3391
3392
3393 config_sets = NULL;
3394
3395 /* Go through the AutoConfig list, and figure out which components
3396 belong to what sets. */
3397 ac = ac_list;
3398 while(ac!=NULL) {
3399 /* we're going to putz with ac->next, so save it here
3400 for use at the end of the loop */
3401 ac_next = ac->next;
3402
3403 if (config_sets == NULL) {
3404 /* will need at least this one... */
3405 config_sets = (RF_ConfigSet_t *)
3406 malloc(sizeof(RF_ConfigSet_t),
3407 M_RAIDFRAME, M_NOWAIT);
3408 if (config_sets == NULL) {
3409 panic("rf_create_auto_sets: No memory!");
3410 }
3411 /* this one is easy :) */
3412 config_sets->ac = ac;
3413 config_sets->next = NULL;
3414 config_sets->rootable = 0;
3415 ac->next = NULL;
3416 } else {
3417 /* which set does this component fit into? */
3418 cset = config_sets;
3419 while(cset!=NULL) {
3420 if (rf_does_it_fit(cset, ac)) {
3421 /* looks like it matches... */
3422 ac->next = cset->ac;
3423 cset->ac = ac;
3424 break;
3425 }
3426 cset = cset->next;
3427 }
3428 if (cset==NULL) {
3429 /* didn't find a match above... new set..*/
3430 cset = (RF_ConfigSet_t *)
3431 malloc(sizeof(RF_ConfigSet_t),
3432 M_RAIDFRAME, M_NOWAIT);
3433 if (cset == NULL) {
3434 panic("rf_create_auto_sets: No memory!");
3435 }
3436 cset->ac = ac;
3437 ac->next = NULL;
3438 cset->next = config_sets;
3439 cset->rootable = 0;
3440 config_sets = cset;
3441 }
3442 }
3443 ac = ac_next;
3444 }
3445
3446
3447 return(config_sets);
3448 }
3449
3450 static int
3451 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3452 {
3453 RF_ComponentLabel_t *clabel1, *clabel2;
3454
3455 /* If this one matches the *first* one in the set, that's good
3456 enough, since the other members of the set would have been
3457 through here too... */
3458 /* note that we are not checking partitionSize here..
3459
3460 Note that we are also not checking the mod_counters here.
3461 If everything else matches except the mod_counter, that's
3462 good enough for this test. We will deal with the mod_counters
3463 a little later in the autoconfiguration process.
3464
3465 (clabel1->mod_counter == clabel2->mod_counter) &&
3466
3467 The reason we don't check for this is that failed disks
3468 will have lower modification counts. If those disks are
3469 not added to the set they used to belong to, then they will
3470 form their own set, which may result in 2 different sets,
3471 for example, competing to be configured at raid0, and
3472 perhaps competing to be the root filesystem set. If the
3473 wrong ones get configured, or both attempt to become /,
3474 weird behaviour and or serious lossage will occur. Thus we
3475 need to bring them into the fold here, and kick them out at
3476 a later point.
3477
3478 */
3479
3480 clabel1 = cset->ac->clabel;
3481 clabel2 = ac->clabel;
3482 if ((clabel1->version == clabel2->version) &&
3483 (clabel1->serial_number == clabel2->serial_number) &&
3484 (clabel1->num_rows == clabel2->num_rows) &&
3485 (clabel1->num_columns == clabel2->num_columns) &&
3486 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3487 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3488 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3489 (clabel1->parityConfig == clabel2->parityConfig) &&
3490 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3491 (clabel1->blockSize == clabel2->blockSize) &&
3492 rf_component_label_numblocks(clabel1) ==
3493 rf_component_label_numblocks(clabel2) &&
3494 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3495 (clabel1->root_partition == clabel2->root_partition) &&
3496 (clabel1->last_unit == clabel2->last_unit) &&
3497 (clabel1->config_order == clabel2->config_order)) {
3498 /* if it get's here, it almost *has* to be a match */
3499 } else {
3500 /* it's not consistent with somebody in the set..
3501 punt */
3502 return(0);
3503 }
3504 /* all was fine.. it must fit... */
3505 return(1);
3506 }
3507
3508 int
3509 rf_have_enough_components(RF_ConfigSet_t *cset)
3510 {
3511 RF_AutoConfig_t *ac;
3512 RF_AutoConfig_t *auto_config;
3513 RF_ComponentLabel_t *clabel;
3514 int c;
3515 int num_cols;
3516 int num_missing;
3517 int mod_counter;
3518 int mod_counter_found;
3519 int even_pair_failed;
3520 char parity_type;
3521
3522
3523 /* check to see that we have enough 'live' components
3524 of this set. If so, we can configure it if necessary */
3525
3526 num_cols = cset->ac->clabel->num_columns;
3527 parity_type = cset->ac->clabel->parityConfig;
3528
3529 /* XXX Check for duplicate components!?!?!? */
3530
3531 /* Determine what the mod_counter is supposed to be for this set. */
3532
3533 mod_counter_found = 0;
3534 mod_counter = 0;
3535 ac = cset->ac;
3536 while(ac!=NULL) {
3537 if (mod_counter_found==0) {
3538 mod_counter = ac->clabel->mod_counter;
3539 mod_counter_found = 1;
3540 } else {
3541 if (ac->clabel->mod_counter > mod_counter) {
3542 mod_counter = ac->clabel->mod_counter;
3543 }
3544 }
3545 ac = ac->next;
3546 }
3547
3548 num_missing = 0;
3549 auto_config = cset->ac;
3550
3551 even_pair_failed = 0;
3552 for(c=0; c<num_cols; c++) {
3553 ac = auto_config;
3554 while(ac!=NULL) {
3555 if ((ac->clabel->column == c) &&
3556 (ac->clabel->mod_counter == mod_counter)) {
3557 /* it's this one... */
3558 #ifdef DEBUG
3559 printf("Found: %s at %d\n",
3560 ac->devname,c);
3561 #endif
3562 break;
3563 }
3564 ac=ac->next;
3565 }
3566 if (ac==NULL) {
3567 /* Didn't find one here! */
3568 /* special case for RAID 1, especially
3569 where there are more than 2
3570 components (where RAIDframe treats
3571 things a little differently :( ) */
3572 if (parity_type == '1') {
3573 if (c%2 == 0) { /* even component */
3574 even_pair_failed = 1;
3575 } else { /* odd component. If
3576 we're failed, and
3577 so is the even
3578 component, it's
3579 "Good Night, Charlie" */
3580 if (even_pair_failed == 1) {
3581 return(0);
3582 }
3583 }
3584 } else {
3585 /* normal accounting */
3586 num_missing++;
3587 }
3588 }
3589 if ((parity_type == '1') && (c%2 == 1)) {
3590 /* Just did an even component, and we didn't
3591 bail.. reset the even_pair_failed flag,
3592 and go on to the next component.... */
3593 even_pair_failed = 0;
3594 }
3595 }
3596
3597 clabel = cset->ac->clabel;
3598
3599 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3600 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3601 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3602 /* XXX this needs to be made *much* more general */
3603 /* Too many failures */
3604 return(0);
3605 }
3606 /* otherwise, all is well, and we've got enough to take a kick
3607 at autoconfiguring this set */
3608 return(1);
3609 }
3610
3611 void
3612 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3613 RF_Raid_t *raidPtr)
3614 {
3615 RF_ComponentLabel_t *clabel;
3616 int i;
3617
3618 clabel = ac->clabel;
3619
3620 /* 1. Fill in the common stuff */
3621 config->numRow = clabel->num_rows = 1;
3622 config->numCol = clabel->num_columns;
3623 config->numSpare = 0; /* XXX should this be set here? */
3624 config->sectPerSU = clabel->sectPerSU;
3625 config->SUsPerPU = clabel->SUsPerPU;
3626 config->SUsPerRU = clabel->SUsPerRU;
3627 config->parityConfig = clabel->parityConfig;
3628 /* XXX... */
3629 strcpy(config->diskQueueType,"fifo");
3630 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3631 config->layoutSpecificSize = 0; /* XXX ?? */
3632
3633 while(ac!=NULL) {
3634 /* row/col values will be in range due to the checks
3635 in reasonable_label() */
3636 strcpy(config->devnames[0][ac->clabel->column],
3637 ac->devname);
3638 ac = ac->next;
3639 }
3640
3641 for(i=0;i<RF_MAXDBGV;i++) {
3642 config->debugVars[i][0] = 0;
3643 }
3644 }
3645
3646 int
3647 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3648 {
3649 RF_ComponentLabel_t *clabel;
3650 int column;
3651 int sparecol;
3652
3653 raidPtr->autoconfigure = new_value;
3654
3655 for(column=0; column<raidPtr->numCol; column++) {
3656 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3657 clabel = raidget_component_label(raidPtr, column);
3658 clabel->autoconfigure = new_value;
3659 raidflush_component_label(raidPtr, column);
3660 }
3661 }
3662 for(column = 0; column < raidPtr->numSpare ; column++) {
3663 sparecol = raidPtr->numCol + column;
3664 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3665 clabel = raidget_component_label(raidPtr, sparecol);
3666 clabel->autoconfigure = new_value;
3667 raidflush_component_label(raidPtr, sparecol);
3668 }
3669 }
3670 return(new_value);
3671 }
3672
3673 int
3674 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3675 {
3676 RF_ComponentLabel_t *clabel;
3677 int column;
3678 int sparecol;
3679
3680 raidPtr->root_partition = new_value;
3681 for(column=0; column<raidPtr->numCol; column++) {
3682 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3683 clabel = raidget_component_label(raidPtr, column);
3684 clabel->root_partition = new_value;
3685 raidflush_component_label(raidPtr, column);
3686 }
3687 }
3688 for(column = 0; column < raidPtr->numSpare ; column++) {
3689 sparecol = raidPtr->numCol + column;
3690 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3691 clabel = raidget_component_label(raidPtr, sparecol);
3692 clabel->root_partition = new_value;
3693 raidflush_component_label(raidPtr, sparecol);
3694 }
3695 }
3696 return(new_value);
3697 }
3698
3699 void
3700 rf_release_all_vps(RF_ConfigSet_t *cset)
3701 {
3702 RF_AutoConfig_t *ac;
3703
3704 ac = cset->ac;
3705 while(ac!=NULL) {
3706 /* Close the vp, and give it back */
3707 if (ac->vp) {
3708 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3709 VOP_CLOSE(ac->vp, FREAD, NOCRED);
3710 vput(ac->vp);
3711 ac->vp = NULL;
3712 }
3713 ac = ac->next;
3714 }
3715 }
3716
3717
3718 void
3719 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3720 {
3721 RF_AutoConfig_t *ac;
3722 RF_AutoConfig_t *next_ac;
3723
3724 ac = cset->ac;
3725 while(ac!=NULL) {
3726 next_ac = ac->next;
3727 /* nuke the label */
3728 free(ac->clabel, M_RAIDFRAME);
3729 /* cleanup the config structure */
3730 free(ac, M_RAIDFRAME);
3731 /* "next.." */
3732 ac = next_ac;
3733 }
3734 /* and, finally, nuke the config set */
3735 free(cset, M_RAIDFRAME);
3736 }
3737
3738
3739 void
3740 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3741 {
3742 /* current version number */
3743 clabel->version = RF_COMPONENT_LABEL_VERSION;
3744 clabel->serial_number = raidPtr->serial_number;
3745 clabel->mod_counter = raidPtr->mod_counter;
3746
3747 clabel->num_rows = 1;
3748 clabel->num_columns = raidPtr->numCol;
3749 clabel->clean = RF_RAID_DIRTY; /* not clean */
3750 clabel->status = rf_ds_optimal; /* "It's good!" */
3751
3752 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3753 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3754 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3755
3756 clabel->blockSize = raidPtr->bytesPerSector;
3757 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3758
3759 /* XXX not portable */
3760 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3761 clabel->maxOutstanding = raidPtr->maxOutstanding;
3762 clabel->autoconfigure = raidPtr->autoconfigure;
3763 clabel->root_partition = raidPtr->root_partition;
3764 clabel->last_unit = raidPtr->raidid;
3765 clabel->config_order = raidPtr->config_order;
3766
3767 #ifndef RF_NO_PARITY_MAP
3768 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3769 #endif
3770 }
3771
3772 struct raid_softc *
3773 rf_auto_config_set(RF_ConfigSet_t *cset)
3774 {
3775 RF_Raid_t *raidPtr;
3776 RF_Config_t *config;
3777 int raidID;
3778 struct raid_softc *sc;
3779
3780 #ifdef DEBUG
3781 printf("RAID autoconfigure\n");
3782 #endif
3783
3784 /* 1. Create a config structure */
3785 config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO);
3786 if (config == NULL) {
3787 printf("%s: Out of mem - config!?!?\n", __func__);
3788 /* XXX do something more intelligent here. */
3789 return NULL;
3790 }
3791
3792 /*
3793 2. Figure out what RAID ID this one is supposed to live at
3794 See if we can get the same RAID dev that it was configured
3795 on last time..
3796 */
3797
3798 raidID = cset->ac->clabel->last_unit;
3799 for (sc = raidget(raidID, false); sc && sc->sc_r.valid != 0;
3800 sc = raidget(++raidID, false))
3801 continue;
3802 #ifdef DEBUG
3803 printf("Configuring raid%d:\n",raidID);
3804 #endif
3805
3806 if (sc == NULL)
3807 sc = raidget(raidID, true);
3808 if (sc == NULL) {
3809 printf("%s: Out of mem - softc!?!?\n", __func__);
3810 /* XXX do something more intelligent here. */
3811 free(config, M_RAIDFRAME);
3812 return NULL;
3813 }
3814
3815 raidPtr = &sc->sc_r;
3816
3817 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3818 raidPtr->softc = sc;
3819 raidPtr->raidid = raidID;
3820 raidPtr->openings = RAIDOUTSTANDING;
3821
3822 /* 3. Build the configuration structure */
3823 rf_create_configuration(cset->ac, config, raidPtr);
3824
3825 /* 4. Do the configuration */
3826 if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3827 raidinit(sc);
3828
3829 rf_markalldirty(raidPtr);
3830 raidPtr->autoconfigure = 1; /* XXX do this here? */
3831 switch (cset->ac->clabel->root_partition) {
3832 case 1: /* Force Root */
3833 case 2: /* Soft Root: root when boot partition part of raid */
3834 /*
3835 * everything configured just fine. Make a note
3836 * that this set is eligible to be root,
3837 * or forced to be root
3838 */
3839 cset->rootable = cset->ac->clabel->root_partition;
3840 /* XXX do this here? */
3841 raidPtr->root_partition = cset->rootable;
3842 break;
3843 default:
3844 break;
3845 }
3846 } else {
3847 raidput(sc);
3848 sc = NULL;
3849 }
3850
3851 /* 5. Cleanup */
3852 free(config, M_RAIDFRAME);
3853 return sc;
3854 }
3855
3856 void
3857 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3858 {
3859 struct buf *bp;
3860 struct raid_softc *rs;
3861
3862 bp = (struct buf *)desc->bp;
3863 rs = desc->raidPtr->softc;
3864 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid),
3865 (bp->b_flags & B_READ));
3866 }
3867
3868 void
3869 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3870 size_t xmin, size_t xmax)
3871 {
3872 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3873 pool_sethiwat(p, xmax);
3874 pool_prime(p, xmin);
3875 pool_setlowat(p, xmin);
3876 }
3877
3878 /*
3879 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buf_queue to see
3880 * if there is IO pending and if that IO could possibly be done for a
3881 * given RAID set. Returns 0 if IO is waiting and can be done, 1
3882 * otherwise.
3883 *
3884 */
3885
3886 int
3887 rf_buf_queue_check(RF_Raid_t *raidPtr)
3888 {
3889 struct raid_softc *rs = raidPtr->softc;
3890 if ((bufq_peek(rs->buf_queue) != NULL) && raidPtr->openings > 0) {
3891 /* there is work to do */
3892 return 0;
3893 }
3894 /* default is nothing to do */
3895 return 1;
3896 }
3897
3898 int
3899 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3900 {
3901 uint64_t numsecs;
3902 unsigned secsize;
3903 int error;
3904
3905 error = getdisksize(vp, &numsecs, &secsize);
3906 if (error == 0) {
3907 diskPtr->blockSize = secsize;
3908 diskPtr->numBlocks = numsecs - rf_protectedSectors;
3909 diskPtr->partitionSize = numsecs;
3910 return 0;
3911 }
3912 return error;
3913 }
3914
3915 static int
3916 raid_match(device_t self, cfdata_t cfdata, void *aux)
3917 {
3918 return 1;
3919 }
3920
3921 static void
3922 raid_attach(device_t parent, device_t self, void *aux)
3923 {
3924
3925 }
3926
3927
3928 static int
3929 raid_detach(device_t self, int flags)
3930 {
3931 int error;
3932 struct raid_softc *rs = raidget(device_unit(self), false);
3933
3934 if (rs == NULL)
3935 return ENXIO;
3936
3937 if ((error = raidlock(rs)) != 0)
3938 return (error);
3939
3940 error = raid_detach_unlocked(rs);
3941
3942 if (error != 0)
3943 raidunlock(rs);
3944
3945 return error;
3946 }
3947
3948 static void
3949 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
3950 {
3951 struct disk_geom *dg = &rs->sc_dkdev.dk_geom;
3952
3953 memset(dg, 0, sizeof(*dg));
3954
3955 dg->dg_secperunit = raidPtr->totalSectors;
3956 dg->dg_secsize = raidPtr->bytesPerSector;
3957 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
3958 dg->dg_ntracks = 4 * raidPtr->numCol;
3959
3960 disk_set_info(rs->sc_dev, &rs->sc_dkdev, NULL);
3961 }
3962
3963 /*
3964 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3965 * We end up returning whatever error was returned by the first cache flush
3966 * that fails.
3967 */
3968
3969 int
3970 rf_sync_component_caches(RF_Raid_t *raidPtr)
3971 {
3972 int c, sparecol;
3973 int e,error;
3974 int force = 1;
3975
3976 error = 0;
3977 for (c = 0; c < raidPtr->numCol; c++) {
3978 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3979 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3980 &force, FWRITE, NOCRED);
3981 if (e) {
3982 if (e != ENODEV)
3983 printf("raid%d: cache flush to component %s failed.\n",
3984 raidPtr->raidid, raidPtr->Disks[c].devname);
3985 if (error == 0) {
3986 error = e;
3987 }
3988 }
3989 }
3990 }
3991
3992 for( c = 0; c < raidPtr->numSpare ; c++) {
3993 sparecol = raidPtr->numCol + c;
3994 /* Need to ensure that the reconstruct actually completed! */
3995 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3996 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3997 DIOCCACHESYNC, &force, FWRITE, NOCRED);
3998 if (e) {
3999 if (e != ENODEV)
4000 printf("raid%d: cache flush to component %s failed.\n",
4001 raidPtr->raidid, raidPtr->Disks[sparecol].devname);
4002 if (error == 0) {
4003 error = e;
4004 }
4005 }
4006 }
4007 }
4008 return error;
4009 }
4010
4011 /*
4012 * Module interface
4013 */
4014
4015 MODULE(MODULE_CLASS_DRIVER, raid, "dk_subr");
4016
4017 #ifdef _MODULE
4018 CFDRIVER_DECL(raid, DV_DISK, NULL);
4019 #endif
4020
4021 static int raid_modcmd(modcmd_t, void *);
4022 static int raid_modcmd_init(void);
4023 static int raid_modcmd_fini(void);
4024
4025 static int
4026 raid_modcmd(modcmd_t cmd, void *data)
4027 {
4028 int error;
4029
4030 error = 0;
4031 switch (cmd) {
4032 case MODULE_CMD_INIT:
4033 error = raid_modcmd_init();
4034 break;
4035 case MODULE_CMD_FINI:
4036 error = raid_modcmd_fini();
4037 break;
4038 default:
4039 error = ENOTTY;
4040 break;
4041 }
4042 return error;
4043 }
4044
4045 static int
4046 raid_modcmd_init(void)
4047 {
4048 int error;
4049 int bmajor, cmajor;
4050
4051 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
4052 mutex_enter(&raid_lock);
4053 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
4054 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
4055 rf_init_cond2(rf_sparet_wait_cv, "sparetw");
4056 rf_init_cond2(rf_sparet_resp_cv, "rfgst");
4057
4058 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
4059 #endif
4060
4061 bmajor = cmajor = -1;
4062 error = devsw_attach("raid", &raid_bdevsw, &bmajor,
4063 &raid_cdevsw, &cmajor);
4064 if (error != 0 && error != EEXIST) {
4065 aprint_error("%s: devsw_attach failed %d\n", __func__, error);
4066 mutex_exit(&raid_lock);
4067 return error;
4068 }
4069 #ifdef _MODULE
4070 error = config_cfdriver_attach(&raid_cd);
4071 if (error != 0) {
4072 aprint_error("%s: config_cfdriver_attach failed %d\n",
4073 __func__, error);
4074 devsw_detach(&raid_bdevsw, &raid_cdevsw);
4075 mutex_exit(&raid_lock);
4076 return error;
4077 }
4078 #endif
4079 error = config_cfattach_attach(raid_cd.cd_name, &raid_ca);
4080 if (error != 0) {
4081 aprint_error("%s: config_cfattach_attach failed %d\n",
4082 __func__, error);
4083 #ifdef _MODULE
4084 config_cfdriver_detach(&raid_cd);
4085 #endif
4086 devsw_detach(&raid_bdevsw, &raid_cdevsw);
4087 mutex_exit(&raid_lock);
4088 return error;
4089 }
4090
4091 raidautoconfigdone = false;
4092
4093 mutex_exit(&raid_lock);
4094
4095 if (error == 0) {
4096 if (rf_BootRaidframe(true) == 0)
4097 aprint_verbose("Kernelized RAIDframe activated\n");
4098 else
4099 panic("Serious error activating RAID!!");
4100 }
4101
4102 /*
4103 * Register a finalizer which will be used to auto-config RAID
4104 * sets once all real hardware devices have been found.
4105 */
4106 error = config_finalize_register(NULL, rf_autoconfig);
4107 if (error != 0) {
4108 aprint_error("WARNING: unable to register RAIDframe "
4109 "finalizer\n");
4110 }
4111
4112 return error;
4113 }
4114
4115 static int
4116 raid_modcmd_fini(void)
4117 {
4118 int error;
4119
4120 mutex_enter(&raid_lock);
4121
4122 /* Don't allow unload if raid device(s) exist. */
4123 if (!LIST_EMPTY(&raids)) {
4124 mutex_exit(&raid_lock);
4125 return EBUSY;
4126 }
4127
4128 error = config_cfattach_detach(raid_cd.cd_name, &raid_ca);
4129 if (error != 0) {
4130 mutex_exit(&raid_lock);
4131 return error;
4132 }
4133 #ifdef _MODULE
4134 error = config_cfdriver_detach(&raid_cd);
4135 if (error != 0) {
4136 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
4137 mutex_exit(&raid_lock);
4138 return error;
4139 }
4140 #endif
4141 error = devsw_detach(&raid_bdevsw, &raid_cdevsw);
4142 if (error != 0) {
4143 #ifdef _MODULE
4144 config_cfdriver_attach(&raid_cd);
4145 #endif
4146 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
4147 mutex_exit(&raid_lock);
4148 return error;
4149 }
4150 rf_BootRaidframe(false);
4151 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
4152 rf_destroy_mutex2(rf_sparet_wait_mutex);
4153 rf_destroy_cond2(rf_sparet_wait_cv);
4154 rf_destroy_cond2(rf_sparet_resp_cv);
4155 #endif
4156 mutex_exit(&raid_lock);
4157 mutex_destroy(&raid_lock);
4158
4159 return error;
4160 }
4161