rf_netbsdkintf.c revision 1.329 1 /* $NetBSD: rf_netbsdkintf.c,v 1.329 2015/12/26 12:59:00 pgoyette Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: cd.c 1.6 90/11/28$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
89 * School of Computer Science
90 * Carnegie Mellon University
91 * Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97 /***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.329 2015/12/26 12:59:00 pgoyette Exp $");
105
106 #ifdef _KERNEL_OPT
107 #include "opt_compat_netbsd.h"
108 #include "opt_raid_autoconfig.h"
109 #endif
110
111 #include <sys/param.h>
112 #include <sys/errno.h>
113 #include <sys/pool.h>
114 #include <sys/proc.h>
115 #include <sys/queue.h>
116 #include <sys/disk.h>
117 #include <sys/device.h>
118 #include <sys/stat.h>
119 #include <sys/ioctl.h>
120 #include <sys/fcntl.h>
121 #include <sys/systm.h>
122 #include <sys/vnode.h>
123 #include <sys/disklabel.h>
124 #include <sys/conf.h>
125 #include <sys/buf.h>
126 #include <sys/bufq.h>
127 #include <sys/reboot.h>
128 #include <sys/kauth.h>
129 #include <sys/module.h>
130
131 #include <prop/proplib.h>
132
133 #include <dev/raidframe/raidframevar.h>
134 #include <dev/raidframe/raidframeio.h>
135 #include <dev/raidframe/rf_paritymap.h>
136
137 #include "rf_raid.h"
138 #include "rf_copyback.h"
139 #include "rf_dag.h"
140 #include "rf_dagflags.h"
141 #include "rf_desc.h"
142 #include "rf_diskqueue.h"
143 #include "rf_etimer.h"
144 #include "rf_general.h"
145 #include "rf_kintf.h"
146 #include "rf_options.h"
147 #include "rf_driver.h"
148 #include "rf_parityscan.h"
149 #include "rf_threadstuff.h"
150
151 #ifdef COMPAT_50
152 #include "rf_compat50.h"
153 #endif
154
155 #include "ioconf.h"
156
157 #ifdef DEBUG
158 int rf_kdebug_level = 0;
159 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
160 #else /* DEBUG */
161 #define db1_printf(a) { }
162 #endif /* DEBUG */
163
164 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
165 static rf_declare_mutex2(rf_sparet_wait_mutex);
166 static rf_declare_cond2(rf_sparet_wait_cv);
167 static rf_declare_cond2(rf_sparet_resp_cv);
168
169 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
170 * spare table */
171 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
172 * installation process */
173 #endif
174
175 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
176
177 /* prototypes */
178 static void KernelWakeupFunc(struct buf *);
179 static void InitBP(struct buf *, struct vnode *, unsigned,
180 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
181 void *, int, struct proc *);
182 struct raid_softc;
183 static void raidinit(struct raid_softc *);
184
185 static int raid_match(device_t, cfdata_t, void *);
186 static void raid_attach(device_t, device_t, void *);
187 static int raid_detach(device_t, int);
188
189 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
190 daddr_t, daddr_t);
191 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
192 daddr_t, daddr_t, int);
193
194 static int raidwrite_component_label(unsigned,
195 dev_t, struct vnode *, RF_ComponentLabel_t *);
196 static int raidread_component_label(unsigned,
197 dev_t, struct vnode *, RF_ComponentLabel_t *);
198
199
200 static dev_type_open(raidopen);
201 static dev_type_close(raidclose);
202 static dev_type_read(raidread);
203 static dev_type_write(raidwrite);
204 static dev_type_ioctl(raidioctl);
205 static dev_type_strategy(raidstrategy);
206 static dev_type_dump(raiddump);
207 static dev_type_size(raidsize);
208
209 const struct bdevsw raid_bdevsw = {
210 .d_open = raidopen,
211 .d_close = raidclose,
212 .d_strategy = raidstrategy,
213 .d_ioctl = raidioctl,
214 .d_dump = raiddump,
215 .d_psize = raidsize,
216 .d_discard = nodiscard,
217 .d_flag = D_DISK
218 };
219
220 const struct cdevsw raid_cdevsw = {
221 .d_open = raidopen,
222 .d_close = raidclose,
223 .d_read = raidread,
224 .d_write = raidwrite,
225 .d_ioctl = raidioctl,
226 .d_stop = nostop,
227 .d_tty = notty,
228 .d_poll = nopoll,
229 .d_mmap = nommap,
230 .d_kqfilter = nokqfilter,
231 .d_discard = nodiscard,
232 .d_flag = D_DISK
233 };
234
235 static struct dkdriver rf_dkdriver = {
236 .d_strategy = raidstrategy,
237 .d_minphys = minphys
238 };
239
240 struct raid_softc {
241 device_t sc_dev;
242 int sc_unit;
243 int sc_flags; /* flags */
244 int sc_cflags; /* configuration flags */
245 kmutex_t sc_mutex; /* interlock mutex */
246 kcondvar_t sc_cv; /* and the condvar */
247 uint64_t sc_size; /* size of the raid device */
248 char sc_xname[20]; /* XXX external name */
249 struct disk sc_dkdev; /* generic disk device info */
250 struct bufq_state *buf_queue; /* used for the device queue */
251 RF_Raid_t sc_r;
252 LIST_ENTRY(raid_softc) sc_link;
253 };
254 /* sc_flags */
255 #define RAIDF_INITED 0x01 /* unit has been initialized */
256 #define RAIDF_WLABEL 0x02 /* label area is writable */
257 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
258 #define RAIDF_SHUTDOWN 0x08 /* unit is being shutdown */
259 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
260 #define RAIDF_LOCKED 0x80 /* unit is locked */
261
262 #define raidunit(x) DISKUNIT(x)
263
264 extern struct cfdriver raid_cd;
265 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
266 raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
267 DVF_DETACH_SHUTDOWN);
268
269 /*
270 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
271 * Be aware that large numbers can allow the driver to consume a lot of
272 * kernel memory, especially on writes, and in degraded mode reads.
273 *
274 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
275 * a single 64K write will typically require 64K for the old data,
276 * 64K for the old parity, and 64K for the new parity, for a total
277 * of 192K (if the parity buffer is not re-used immediately).
278 * Even it if is used immediately, that's still 128K, which when multiplied
279 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
280 *
281 * Now in degraded mode, for example, a 64K read on the above setup may
282 * require data reconstruction, which will require *all* of the 4 remaining
283 * disks to participate -- 4 * 32K/disk == 128K again.
284 */
285
286 #ifndef RAIDOUTSTANDING
287 #define RAIDOUTSTANDING 6
288 #endif
289
290 #define RAIDLABELDEV(dev) \
291 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
292
293 /* declared here, and made public, for the benefit of KVM stuff.. */
294
295 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
296 struct disklabel *);
297 static void raidgetdisklabel(dev_t);
298 static void raidmakedisklabel(struct raid_softc *);
299
300 static int raidlock(struct raid_softc *);
301 static void raidunlock(struct raid_softc *);
302
303 static int raid_detach_unlocked(struct raid_softc *);
304
305 static void rf_markalldirty(RF_Raid_t *);
306 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
307
308 void rf_ReconThread(struct rf_recon_req *);
309 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
310 void rf_CopybackThread(RF_Raid_t *raidPtr);
311 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
312 int rf_autoconfig(device_t);
313 void rf_buildroothack(RF_ConfigSet_t *);
314
315 RF_AutoConfig_t *rf_find_raid_components(void);
316 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
317 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
318 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
319 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
320 int rf_set_autoconfig(RF_Raid_t *, int);
321 int rf_set_rootpartition(RF_Raid_t *, int);
322 void rf_release_all_vps(RF_ConfigSet_t *);
323 void rf_cleanup_config_set(RF_ConfigSet_t *);
324 int rf_have_enough_components(RF_ConfigSet_t *);
325 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
326 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
327
328 /*
329 * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
330 * Note that this is overridden by having RAID_AUTOCONFIG as an option
331 * in the kernel config file.
332 */
333 #ifdef RAID_AUTOCONFIG
334 int raidautoconfig = 1;
335 #else
336 int raidautoconfig = 0;
337 #endif
338 static bool raidautoconfigdone = false;
339
340 struct RF_Pools_s rf_pools;
341
342 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
343 static kmutex_t raid_lock;
344
345 static struct raid_softc *
346 raidcreate(int unit) {
347 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
348 if (sc == NULL) {
349 #ifdef DIAGNOSTIC
350 printf("%s: out of memory\n", __func__);
351 #endif
352 return NULL;
353 }
354 sc->sc_unit = unit;
355 bufq_alloc(&sc->buf_queue, "fcfs", BUFQ_SORT_RAWBLOCK);
356 cv_init(&sc->sc_cv, "raidunit");
357 mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_NONE);
358 return sc;
359 }
360
361 static void
362 raiddestroy(struct raid_softc *sc) {
363 cv_destroy(&sc->sc_cv);
364 mutex_destroy(&sc->sc_mutex);
365 bufq_free(sc->buf_queue);
366 kmem_free(sc, sizeof(*sc));
367 }
368
369 static struct raid_softc *
370 raidget(int unit, bool create) {
371 struct raid_softc *sc;
372 if (unit < 0) {
373 #ifdef DIAGNOSTIC
374 panic("%s: unit %d!", __func__, unit);
375 #endif
376 return NULL;
377 }
378 mutex_enter(&raid_lock);
379 LIST_FOREACH(sc, &raids, sc_link) {
380 if (sc->sc_unit == unit) {
381 mutex_exit(&raid_lock);
382 return sc;
383 }
384 }
385 mutex_exit(&raid_lock);
386 if (!create)
387 return NULL;
388 if ((sc = raidcreate(unit)) == NULL)
389 return NULL;
390 mutex_enter(&raid_lock);
391 LIST_INSERT_HEAD(&raids, sc, sc_link);
392 mutex_exit(&raid_lock);
393 return sc;
394 }
395
396 static void
397 raidput(struct raid_softc *sc) {
398 mutex_enter(&raid_lock);
399 LIST_REMOVE(sc, sc_link);
400 mutex_exit(&raid_lock);
401 raiddestroy(sc);
402 }
403
404 void
405 raidattach(int num)
406 {
407
408 /*
409 * Device attachment and associated initialization now occurs
410 * as part of the module initialization.
411 */
412 }
413
414 int
415 rf_autoconfig(device_t self)
416 {
417 RF_AutoConfig_t *ac_list;
418 RF_ConfigSet_t *config_sets;
419
420 if (!raidautoconfig || raidautoconfigdone == true)
421 return (0);
422
423 /* XXX This code can only be run once. */
424 raidautoconfigdone = true;
425
426 #ifdef __HAVE_CPU_BOOTCONF
427 /*
428 * 0. find the boot device if needed first so we can use it later
429 * this needs to be done before we autoconfigure any raid sets,
430 * because if we use wedges we are not going to be able to open
431 * the boot device later
432 */
433 if (booted_device == NULL)
434 cpu_bootconf();
435 #endif
436 /* 1. locate all RAID components on the system */
437 aprint_debug("Searching for RAID components...\n");
438 ac_list = rf_find_raid_components();
439
440 /* 2. Sort them into their respective sets. */
441 config_sets = rf_create_auto_sets(ac_list);
442
443 /*
444 * 3. Evaluate each set and configure the valid ones.
445 * This gets done in rf_buildroothack().
446 */
447 rf_buildroothack(config_sets);
448
449 return 1;
450 }
451
452 static int
453 rf_containsboot(RF_Raid_t *r, device_t bdv) {
454 const char *bootname = device_xname(bdv);
455 size_t len = strlen(bootname);
456
457 for (int col = 0; col < r->numCol; col++) {
458 const char *devname = r->Disks[col].devname;
459 devname += sizeof("/dev/") - 1;
460 if (strncmp(devname, "dk", 2) == 0) {
461 const char *parent =
462 dkwedge_get_parent_name(r->Disks[col].dev);
463 if (parent != NULL)
464 devname = parent;
465 }
466 if (strncmp(devname, bootname, len) == 0) {
467 struct raid_softc *sc = r->softc;
468 aprint_debug("raid%d includes boot device %s\n",
469 sc->sc_unit, devname);
470 return 1;
471 }
472 }
473 return 0;
474 }
475
476 void
477 rf_buildroothack(RF_ConfigSet_t *config_sets)
478 {
479 RF_ConfigSet_t *cset;
480 RF_ConfigSet_t *next_cset;
481 int num_root;
482 struct raid_softc *sc, *rsc;
483
484 sc = rsc = NULL;
485 num_root = 0;
486 cset = config_sets;
487 while (cset != NULL) {
488 next_cset = cset->next;
489 if (rf_have_enough_components(cset) &&
490 cset->ac->clabel->autoconfigure == 1) {
491 sc = rf_auto_config_set(cset);
492 if (sc != NULL) {
493 aprint_debug("raid%d: configured ok\n",
494 sc->sc_unit);
495 if (cset->rootable) {
496 rsc = sc;
497 num_root++;
498 }
499 } else {
500 /* The autoconfig didn't work :( */
501 aprint_debug("Autoconfig failed\n");
502 rf_release_all_vps(cset);
503 }
504 } else {
505 /* we're not autoconfiguring this set...
506 release the associated resources */
507 rf_release_all_vps(cset);
508 }
509 /* cleanup */
510 rf_cleanup_config_set(cset);
511 cset = next_cset;
512 }
513
514 /* if the user has specified what the root device should be
515 then we don't touch booted_device or boothowto... */
516
517 if (rootspec != NULL)
518 return;
519
520 /* we found something bootable... */
521
522 /*
523 * XXX: The following code assumes that the root raid
524 * is the first ('a') partition. This is about the best
525 * we can do with a BSD disklabel, but we might be able
526 * to do better with a GPT label, by setting a specified
527 * attribute to indicate the root partition. We can then
528 * stash the partition number in the r->root_partition
529 * high bits (the bottom 2 bits are already used). For
530 * now we just set booted_partition to 0 when we override
531 * root.
532 */
533 if (num_root == 1) {
534 device_t candidate_root;
535 if (rsc->sc_dkdev.dk_nwedges != 0) {
536 char cname[sizeof(cset->ac->devname)];
537 /* XXX: assume 'a' */
538 snprintf(cname, sizeof(cname), "%s%c",
539 device_xname(rsc->sc_dev), 'a');
540 candidate_root = dkwedge_find_by_wname(cname);
541 } else
542 candidate_root = rsc->sc_dev;
543 if (booted_device == NULL ||
544 rsc->sc_r.root_partition == 1 ||
545 rf_containsboot(&rsc->sc_r, booted_device)) {
546 booted_device = candidate_root;
547 booted_partition = 0; /* XXX assume 'a' */
548 }
549 } else if (num_root > 1) {
550
551 /*
552 * Maybe the MD code can help. If it cannot, then
553 * setroot() will discover that we have no
554 * booted_device and will ask the user if nothing was
555 * hardwired in the kernel config file
556 */
557 if (booted_device == NULL)
558 return;
559
560 num_root = 0;
561 mutex_enter(&raid_lock);
562 LIST_FOREACH(sc, &raids, sc_link) {
563 RF_Raid_t *r = &sc->sc_r;
564 if (r->valid == 0)
565 continue;
566
567 if (r->root_partition == 0)
568 continue;
569
570 if (rf_containsboot(r, booted_device)) {
571 num_root++;
572 rsc = sc;
573 }
574 }
575 mutex_exit(&raid_lock);
576
577 if (num_root == 1) {
578 booted_device = rsc->sc_dev;
579 booted_partition = 0; /* XXX assume 'a' */
580 } else {
581 /* we can't guess.. require the user to answer... */
582 boothowto |= RB_ASKNAME;
583 }
584 }
585 }
586
587 static int
588 raidsize(dev_t dev)
589 {
590 struct raid_softc *rs;
591 struct disklabel *lp;
592 int part, unit, omask, size;
593
594 unit = raidunit(dev);
595 if ((rs = raidget(unit, false)) == NULL)
596 return -1;
597 if ((rs->sc_flags & RAIDF_INITED) == 0)
598 return (-1);
599
600 part = DISKPART(dev);
601 omask = rs->sc_dkdev.dk_openmask & (1 << part);
602 lp = rs->sc_dkdev.dk_label;
603
604 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
605 return (-1);
606
607 if (lp->d_partitions[part].p_fstype != FS_SWAP)
608 size = -1;
609 else
610 size = lp->d_partitions[part].p_size *
611 (lp->d_secsize / DEV_BSIZE);
612
613 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
614 return (-1);
615
616 return (size);
617
618 }
619
620 static int
621 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
622 {
623 int unit = raidunit(dev);
624 struct raid_softc *rs;
625 const struct bdevsw *bdev;
626 struct disklabel *lp;
627 RF_Raid_t *raidPtr;
628 daddr_t offset;
629 int part, c, sparecol, j, scol, dumpto;
630 int error = 0;
631
632 if ((rs = raidget(unit, false)) == NULL)
633 return ENXIO;
634
635 raidPtr = &rs->sc_r;
636
637 if ((rs->sc_flags & RAIDF_INITED) == 0)
638 return ENXIO;
639
640 /* we only support dumping to RAID 1 sets */
641 if (raidPtr->Layout.numDataCol != 1 ||
642 raidPtr->Layout.numParityCol != 1)
643 return EINVAL;
644
645 if ((error = raidlock(rs)) != 0)
646 return error;
647
648 if (size % DEV_BSIZE != 0) {
649 error = EINVAL;
650 goto out;
651 }
652
653 if (blkno + size / DEV_BSIZE > rs->sc_size) {
654 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
655 "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
656 size / DEV_BSIZE, rs->sc_size);
657 error = EINVAL;
658 goto out;
659 }
660
661 part = DISKPART(dev);
662 lp = rs->sc_dkdev.dk_label;
663 offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
664
665 /* figure out what device is alive.. */
666
667 /*
668 Look for a component to dump to. The preference for the
669 component to dump to is as follows:
670 1) the master
671 2) a used_spare of the master
672 3) the slave
673 4) a used_spare of the slave
674 */
675
676 dumpto = -1;
677 for (c = 0; c < raidPtr->numCol; c++) {
678 if (raidPtr->Disks[c].status == rf_ds_optimal) {
679 /* this might be the one */
680 dumpto = c;
681 break;
682 }
683 }
684
685 /*
686 At this point we have possibly selected a live master or a
687 live slave. We now check to see if there is a spared
688 master (or a spared slave), if we didn't find a live master
689 or a live slave.
690 */
691
692 for (c = 0; c < raidPtr->numSpare; c++) {
693 sparecol = raidPtr->numCol + c;
694 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
695 /* How about this one? */
696 scol = -1;
697 for(j=0;j<raidPtr->numCol;j++) {
698 if (raidPtr->Disks[j].spareCol == sparecol) {
699 scol = j;
700 break;
701 }
702 }
703 if (scol == 0) {
704 /*
705 We must have found a spared master!
706 We'll take that over anything else
707 found so far. (We couldn't have
708 found a real master before, since
709 this is a used spare, and it's
710 saying that it's replacing the
711 master.) On reboot (with
712 autoconfiguration turned on)
713 sparecol will become the 1st
714 component (component0) of this set.
715 */
716 dumpto = sparecol;
717 break;
718 } else if (scol != -1) {
719 /*
720 Must be a spared slave. We'll dump
721 to that if we havn't found anything
722 else so far.
723 */
724 if (dumpto == -1)
725 dumpto = sparecol;
726 }
727 }
728 }
729
730 if (dumpto == -1) {
731 /* we couldn't find any live components to dump to!?!?
732 */
733 error = EINVAL;
734 goto out;
735 }
736
737 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
738
739 /*
740 Note that blkno is relative to this particular partition.
741 By adding the offset of this partition in the RAID
742 set, and also adding RF_PROTECTED_SECTORS, we get a
743 value that is relative to the partition used for the
744 underlying component.
745 */
746
747 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
748 blkno + offset, va, size);
749
750 out:
751 raidunlock(rs);
752
753 return error;
754 }
755
756 /* ARGSUSED */
757 static int
758 raidopen(dev_t dev, int flags, int fmt,
759 struct lwp *l)
760 {
761 int unit = raidunit(dev);
762 struct raid_softc *rs;
763 struct disklabel *lp;
764 int part, pmask;
765 int error = 0;
766
767 if ((rs = raidget(unit, true)) == NULL)
768 return ENXIO;
769 if ((error = raidlock(rs)) != 0)
770 return (error);
771
772 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
773 error = EBUSY;
774 goto bad;
775 }
776
777 lp = rs->sc_dkdev.dk_label;
778
779 part = DISKPART(dev);
780
781 /*
782 * If there are wedges, and this is not RAW_PART, then we
783 * need to fail.
784 */
785 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
786 error = EBUSY;
787 goto bad;
788 }
789 pmask = (1 << part);
790
791 if ((rs->sc_flags & RAIDF_INITED) &&
792 (rs->sc_dkdev.dk_nwedges == 0) &&
793 (rs->sc_dkdev.dk_openmask == 0))
794 raidgetdisklabel(dev);
795
796 /* make sure that this partition exists */
797
798 if (part != RAW_PART) {
799 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
800 ((part >= lp->d_npartitions) ||
801 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
802 error = ENXIO;
803 goto bad;
804 }
805 }
806 /* Prevent this unit from being unconfigured while open. */
807 switch (fmt) {
808 case S_IFCHR:
809 rs->sc_dkdev.dk_copenmask |= pmask;
810 break;
811
812 case S_IFBLK:
813 rs->sc_dkdev.dk_bopenmask |= pmask;
814 break;
815 }
816
817 if ((rs->sc_dkdev.dk_openmask == 0) &&
818 ((rs->sc_flags & RAIDF_INITED) != 0)) {
819 /* First one... mark things as dirty... Note that we *MUST*
820 have done a configure before this. I DO NOT WANT TO BE
821 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
822 THAT THEY BELONG TOGETHER!!!!! */
823 /* XXX should check to see if we're only open for reading
824 here... If so, we needn't do this, but then need some
825 other way of keeping track of what's happened.. */
826
827 rf_markalldirty(&rs->sc_r);
828 }
829
830
831 rs->sc_dkdev.dk_openmask =
832 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
833
834 bad:
835 raidunlock(rs);
836
837 return (error);
838
839
840 }
841
842 /* ARGSUSED */
843 static int
844 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
845 {
846 int unit = raidunit(dev);
847 struct raid_softc *rs;
848 int error = 0;
849 int part;
850
851 if ((rs = raidget(unit, false)) == NULL)
852 return ENXIO;
853
854 if ((error = raidlock(rs)) != 0)
855 return (error);
856
857 part = DISKPART(dev);
858
859 /* ...that much closer to allowing unconfiguration... */
860 switch (fmt) {
861 case S_IFCHR:
862 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
863 break;
864
865 case S_IFBLK:
866 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
867 break;
868 }
869 rs->sc_dkdev.dk_openmask =
870 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
871
872 if ((rs->sc_dkdev.dk_openmask == 0) &&
873 ((rs->sc_flags & RAIDF_INITED) != 0)) {
874 /* Last one... device is not unconfigured yet.
875 Device shutdown has taken care of setting the
876 clean bits if RAIDF_INITED is not set
877 mark things as clean... */
878
879 rf_update_component_labels(&rs->sc_r,
880 RF_FINAL_COMPONENT_UPDATE);
881 }
882 if ((rs->sc_dkdev.dk_openmask == 0) &&
883 ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)) {
884 /*
885 * Detach this raid unit
886 */
887 cfdata_t cf = NULL;
888 int retcode = 0;
889
890 if (rs->sc_dev != NULL) {
891 cf = device_cfdata(rs->sc_dev);
892
893 raidunlock(rs);
894 retcode = config_detach(rs->sc_dev, DETACH_QUIET);
895 if (retcode == 0)
896 /* free the pseudo device attach bits */
897 free(cf, M_RAIDFRAME);
898 } else {
899 raidput(rs);
900 }
901 return retcode;
902 }
903
904 raidunlock(rs);
905 return (0);
906 }
907
908 static void
909 raidstrategy(struct buf *bp)
910 {
911 unsigned int unit = raidunit(bp->b_dev);
912 RF_Raid_t *raidPtr;
913 int wlabel;
914 struct raid_softc *rs;
915
916 if ((rs = raidget(unit, false)) == NULL) {
917 bp->b_error = ENXIO;
918 goto done;
919 }
920 if ((rs->sc_flags & RAIDF_INITED) == 0) {
921 bp->b_error = ENXIO;
922 goto done;
923 }
924 raidPtr = &rs->sc_r;
925 if (!raidPtr->valid) {
926 bp->b_error = ENODEV;
927 goto done;
928 }
929 if (bp->b_bcount == 0) {
930 db1_printf(("b_bcount is zero..\n"));
931 goto done;
932 }
933
934 /*
935 * Do bounds checking and adjust transfer. If there's an
936 * error, the bounds check will flag that for us.
937 */
938
939 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
940 if (DISKPART(bp->b_dev) == RAW_PART) {
941 uint64_t size; /* device size in DEV_BSIZE unit */
942
943 if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
944 size = raidPtr->totalSectors <<
945 (raidPtr->logBytesPerSector - DEV_BSHIFT);
946 } else {
947 size = raidPtr->totalSectors >>
948 (DEV_BSHIFT - raidPtr->logBytesPerSector);
949 }
950 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
951 goto done;
952 }
953 } else {
954 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
955 db1_printf(("Bounds check failed!!:%d %d\n",
956 (int) bp->b_blkno, (int) wlabel));
957 goto done;
958 }
959 }
960
961 rf_lock_mutex2(raidPtr->iodone_lock);
962
963 bp->b_resid = 0;
964
965 /* stuff it onto our queue */
966 bufq_put(rs->buf_queue, bp);
967
968 /* scheduled the IO to happen at the next convenient time */
969 rf_signal_cond2(raidPtr->iodone_cv);
970 rf_unlock_mutex2(raidPtr->iodone_lock);
971
972 return;
973
974 done:
975 bp->b_resid = bp->b_bcount;
976 biodone(bp);
977 }
978
979 /* ARGSUSED */
980 static int
981 raidread(dev_t dev, struct uio *uio, int flags)
982 {
983 int unit = raidunit(dev);
984 struct raid_softc *rs;
985
986 if ((rs = raidget(unit, false)) == NULL)
987 return ENXIO;
988
989 if ((rs->sc_flags & RAIDF_INITED) == 0)
990 return (ENXIO);
991
992 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
993
994 }
995
996 /* ARGSUSED */
997 static int
998 raidwrite(dev_t dev, struct uio *uio, int flags)
999 {
1000 int unit = raidunit(dev);
1001 struct raid_softc *rs;
1002
1003 if ((rs = raidget(unit, false)) == NULL)
1004 return ENXIO;
1005
1006 if ((rs->sc_flags & RAIDF_INITED) == 0)
1007 return (ENXIO);
1008
1009 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
1010
1011 }
1012
1013 static int
1014 raid_detach_unlocked(struct raid_softc *rs)
1015 {
1016 int error;
1017 RF_Raid_t *raidPtr;
1018
1019 raidPtr = &rs->sc_r;
1020
1021 /*
1022 * If somebody has a partition mounted, we shouldn't
1023 * shutdown.
1024 */
1025 if (rs->sc_dkdev.dk_openmask != 0)
1026 return EBUSY;
1027
1028 if ((rs->sc_flags & RAIDF_INITED) == 0)
1029 ; /* not initialized: nothing to do */
1030 else if ((error = rf_Shutdown(raidPtr)) != 0)
1031 return error;
1032 else
1033 rs->sc_flags &= ~(RAIDF_INITED|RAIDF_SHUTDOWN);
1034
1035 /* Detach the disk. */
1036 dkwedge_delall(&rs->sc_dkdev);
1037 disk_detach(&rs->sc_dkdev);
1038 disk_destroy(&rs->sc_dkdev);
1039
1040 /* Free the softc */
1041 aprint_normal_dev(rs->sc_dev, "detached\n");
1042 raidput(rs);
1043
1044 return 0;
1045 }
1046
1047 static int
1048 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1049 {
1050 int unit = raidunit(dev);
1051 int error = 0;
1052 int part, pmask, s;
1053 cfdata_t cf;
1054 struct raid_softc *rs;
1055 RF_Config_t *k_cfg, *u_cfg;
1056 RF_Raid_t *raidPtr;
1057 RF_RaidDisk_t *diskPtr;
1058 RF_AccTotals_t *totals;
1059 RF_DeviceConfig_t *d_cfg, **ucfgp;
1060 u_char *specific_buf;
1061 int retcode = 0;
1062 int column;
1063 /* int raidid; */
1064 struct rf_recon_req *rrcopy, *rr;
1065 RF_ComponentLabel_t *clabel;
1066 RF_ComponentLabel_t *ci_label;
1067 RF_ComponentLabel_t **clabel_ptr;
1068 RF_SingleComponent_t *sparePtr,*componentPtr;
1069 RF_SingleComponent_t component;
1070 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
1071 int i, j, d;
1072 #ifdef __HAVE_OLD_DISKLABEL
1073 struct disklabel newlabel;
1074 #endif
1075
1076 if ((rs = raidget(unit, false)) == NULL)
1077 return ENXIO;
1078 raidPtr = &rs->sc_r;
1079
1080 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1081 (int) DISKPART(dev), (int) unit, cmd));
1082
1083 /* Must be open for writes for these commands... */
1084 switch (cmd) {
1085 #ifdef DIOCGSECTORSIZE
1086 case DIOCGSECTORSIZE:
1087 *(u_int *)data = raidPtr->bytesPerSector;
1088 return 0;
1089 case DIOCGMEDIASIZE:
1090 *(off_t *)data =
1091 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
1092 return 0;
1093 #endif
1094 case DIOCSDINFO:
1095 case DIOCWDINFO:
1096 #ifdef __HAVE_OLD_DISKLABEL
1097 case ODIOCWDINFO:
1098 case ODIOCSDINFO:
1099 #endif
1100 case DIOCWLABEL:
1101 case DIOCAWEDGE:
1102 case DIOCDWEDGE:
1103 case DIOCMWEDGES:
1104 case DIOCSSTRATEGY:
1105 if ((flag & FWRITE) == 0)
1106 return (EBADF);
1107 }
1108
1109 /* Must be initialized for these... */
1110 switch (cmd) {
1111 case DIOCGDINFO:
1112 case DIOCSDINFO:
1113 case DIOCWDINFO:
1114 #ifdef __HAVE_OLD_DISKLABEL
1115 case ODIOCGDINFO:
1116 case ODIOCWDINFO:
1117 case ODIOCSDINFO:
1118 case ODIOCGDEFLABEL:
1119 #endif
1120 case DIOCGPARTINFO:
1121 case DIOCWLABEL:
1122 case DIOCGDEFLABEL:
1123 case DIOCAWEDGE:
1124 case DIOCDWEDGE:
1125 case DIOCLWEDGES:
1126 case DIOCMWEDGES:
1127 case DIOCCACHESYNC:
1128 case RAIDFRAME_SHUTDOWN:
1129 case RAIDFRAME_REWRITEPARITY:
1130 case RAIDFRAME_GET_INFO:
1131 case RAIDFRAME_RESET_ACCTOTALS:
1132 case RAIDFRAME_GET_ACCTOTALS:
1133 case RAIDFRAME_KEEP_ACCTOTALS:
1134 case RAIDFRAME_GET_SIZE:
1135 case RAIDFRAME_FAIL_DISK:
1136 case RAIDFRAME_COPYBACK:
1137 case RAIDFRAME_CHECK_RECON_STATUS:
1138 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1139 case RAIDFRAME_GET_COMPONENT_LABEL:
1140 case RAIDFRAME_SET_COMPONENT_LABEL:
1141 case RAIDFRAME_ADD_HOT_SPARE:
1142 case RAIDFRAME_REMOVE_HOT_SPARE:
1143 case RAIDFRAME_INIT_LABELS:
1144 case RAIDFRAME_REBUILD_IN_PLACE:
1145 case RAIDFRAME_CHECK_PARITY:
1146 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1147 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1148 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1149 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1150 case RAIDFRAME_SET_AUTOCONFIG:
1151 case RAIDFRAME_SET_ROOT:
1152 case RAIDFRAME_DELETE_COMPONENT:
1153 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1154 case RAIDFRAME_PARITYMAP_STATUS:
1155 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1156 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1157 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1158 case DIOCGSTRATEGY:
1159 case DIOCSSTRATEGY:
1160 if ((rs->sc_flags & RAIDF_INITED) == 0)
1161 return (ENXIO);
1162 }
1163
1164 switch (cmd) {
1165 #ifdef COMPAT_50
1166 case RAIDFRAME_GET_INFO50:
1167 return rf_get_info50(raidPtr, data);
1168
1169 case RAIDFRAME_CONFIGURE50:
1170 if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0)
1171 return retcode;
1172 goto config;
1173 #endif
1174 /* configure the system */
1175 case RAIDFRAME_CONFIGURE:
1176
1177 if (raidPtr->valid) {
1178 /* There is a valid RAID set running on this unit! */
1179 printf("raid%d: Device already configured!\n",unit);
1180 return(EINVAL);
1181 }
1182
1183 /* copy-in the configuration information */
1184 /* data points to a pointer to the configuration structure */
1185
1186 u_cfg = *((RF_Config_t **) data);
1187 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1188 if (k_cfg == NULL) {
1189 return (ENOMEM);
1190 }
1191 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1192 if (retcode) {
1193 RF_Free(k_cfg, sizeof(RF_Config_t));
1194 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1195 retcode));
1196 goto no_config;
1197 }
1198 goto config;
1199 config:
1200 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1201
1202 /* allocate a buffer for the layout-specific data, and copy it
1203 * in */
1204 if (k_cfg->layoutSpecificSize) {
1205 if (k_cfg->layoutSpecificSize > 10000) {
1206 /* sanity check */
1207 RF_Free(k_cfg, sizeof(RF_Config_t));
1208 retcode = EINVAL;
1209 goto no_config;
1210 }
1211 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1212 (u_char *));
1213 if (specific_buf == NULL) {
1214 RF_Free(k_cfg, sizeof(RF_Config_t));
1215 retcode = ENOMEM;
1216 goto no_config;
1217 }
1218 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1219 k_cfg->layoutSpecificSize);
1220 if (retcode) {
1221 RF_Free(k_cfg, sizeof(RF_Config_t));
1222 RF_Free(specific_buf,
1223 k_cfg->layoutSpecificSize);
1224 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1225 retcode));
1226 goto no_config;
1227 }
1228 } else
1229 specific_buf = NULL;
1230 k_cfg->layoutSpecific = specific_buf;
1231
1232 /* should do some kind of sanity check on the configuration.
1233 * Store the sum of all the bytes in the last byte? */
1234
1235 /* configure the system */
1236
1237 /*
1238 * Clear the entire RAID descriptor, just to make sure
1239 * there is no stale data left in the case of a
1240 * reconfiguration
1241 */
1242 memset(raidPtr, 0, sizeof(*raidPtr));
1243 raidPtr->softc = rs;
1244 raidPtr->raidid = unit;
1245
1246 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1247
1248 if (retcode == 0) {
1249
1250 /* allow this many simultaneous IO's to
1251 this RAID device */
1252 raidPtr->openings = RAIDOUTSTANDING;
1253
1254 raidinit(rs);
1255 rf_markalldirty(raidPtr);
1256 }
1257 /* free the buffers. No return code here. */
1258 if (k_cfg->layoutSpecificSize) {
1259 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1260 }
1261 RF_Free(k_cfg, sizeof(RF_Config_t));
1262
1263 no_config:
1264 /*
1265 * If configuration failed, set sc_flags so that we
1266 * will detach the device when we close it.
1267 */
1268 if (retcode != 0)
1269 rs->sc_flags |= RAIDF_SHUTDOWN;
1270 return (retcode);
1271
1272 /* shutdown the system */
1273 case RAIDFRAME_SHUTDOWN:
1274
1275 part = DISKPART(dev);
1276 pmask = (1 << part);
1277
1278 if ((error = raidlock(rs)) != 0)
1279 return (error);
1280
1281 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
1282 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1283 (rs->sc_dkdev.dk_copenmask & pmask)))
1284 retcode = EBUSY;
1285 else {
1286 rs->sc_flags |= RAIDF_SHUTDOWN;
1287 rs->sc_dkdev.dk_copenmask &= ~pmask;
1288 rs->sc_dkdev.dk_bopenmask &= ~pmask;
1289 rs->sc_dkdev.dk_openmask &= ~pmask;
1290 retcode = 0;
1291 }
1292
1293 raidunlock(rs);
1294
1295 if (retcode != 0)
1296 return retcode;
1297
1298 /* free the pseudo device attach bits */
1299
1300 cf = device_cfdata(rs->sc_dev);
1301 if ((retcode = config_detach(rs->sc_dev, DETACH_QUIET)) == 0)
1302 free(cf, M_RAIDFRAME);
1303
1304 return (retcode);
1305 case RAIDFRAME_GET_COMPONENT_LABEL:
1306 clabel_ptr = (RF_ComponentLabel_t **) data;
1307 /* need to read the component label for the disk indicated
1308 by row,column in clabel */
1309
1310 /*
1311 * Perhaps there should be an option to skip the in-core
1312 * copy and hit the disk, as with disklabel(8).
1313 */
1314 RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *));
1315
1316 retcode = copyin(*clabel_ptr, clabel, sizeof(*clabel));
1317
1318 if (retcode) {
1319 RF_Free(clabel, sizeof(*clabel));
1320 return retcode;
1321 }
1322
1323 clabel->row = 0; /* Don't allow looking at anything else.*/
1324
1325 column = clabel->column;
1326
1327 if ((column < 0) || (column >= raidPtr->numCol +
1328 raidPtr->numSpare)) {
1329 RF_Free(clabel, sizeof(*clabel));
1330 return EINVAL;
1331 }
1332
1333 RF_Free(clabel, sizeof(*clabel));
1334
1335 clabel = raidget_component_label(raidPtr, column);
1336
1337 return copyout(clabel, *clabel_ptr, sizeof(**clabel_ptr));
1338
1339 #if 0
1340 case RAIDFRAME_SET_COMPONENT_LABEL:
1341 clabel = (RF_ComponentLabel_t *) data;
1342
1343 /* XXX check the label for valid stuff... */
1344 /* Note that some things *should not* get modified --
1345 the user should be re-initing the labels instead of
1346 trying to patch things.
1347 */
1348
1349 raidid = raidPtr->raidid;
1350 #ifdef DEBUG
1351 printf("raid%d: Got component label:\n", raidid);
1352 printf("raid%d: Version: %d\n", raidid, clabel->version);
1353 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1354 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1355 printf("raid%d: Column: %d\n", raidid, clabel->column);
1356 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1357 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1358 printf("raid%d: Status: %d\n", raidid, clabel->status);
1359 #endif
1360 clabel->row = 0;
1361 column = clabel->column;
1362
1363 if ((column < 0) || (column >= raidPtr->numCol)) {
1364 return(EINVAL);
1365 }
1366
1367 /* XXX this isn't allowed to do anything for now :-) */
1368
1369 /* XXX and before it is, we need to fill in the rest
1370 of the fields!?!?!?! */
1371 memcpy(raidget_component_label(raidPtr, column),
1372 clabel, sizeof(*clabel));
1373 raidflush_component_label(raidPtr, column);
1374 return (0);
1375 #endif
1376
1377 case RAIDFRAME_INIT_LABELS:
1378 clabel = (RF_ComponentLabel_t *) data;
1379 /*
1380 we only want the serial number from
1381 the above. We get all the rest of the information
1382 from the config that was used to create this RAID
1383 set.
1384 */
1385
1386 raidPtr->serial_number = clabel->serial_number;
1387
1388 for(column=0;column<raidPtr->numCol;column++) {
1389 diskPtr = &raidPtr->Disks[column];
1390 if (!RF_DEAD_DISK(diskPtr->status)) {
1391 ci_label = raidget_component_label(raidPtr,
1392 column);
1393 /* Zeroing this is important. */
1394 memset(ci_label, 0, sizeof(*ci_label));
1395 raid_init_component_label(raidPtr, ci_label);
1396 ci_label->serial_number =
1397 raidPtr->serial_number;
1398 ci_label->row = 0; /* we dont' pretend to support more */
1399 rf_component_label_set_partitionsize(ci_label,
1400 diskPtr->partitionSize);
1401 ci_label->column = column;
1402 raidflush_component_label(raidPtr, column);
1403 }
1404 /* XXXjld what about the spares? */
1405 }
1406
1407 return (retcode);
1408 case RAIDFRAME_SET_AUTOCONFIG:
1409 d = rf_set_autoconfig(raidPtr, *(int *) data);
1410 printf("raid%d: New autoconfig value is: %d\n",
1411 raidPtr->raidid, d);
1412 *(int *) data = d;
1413 return (retcode);
1414
1415 case RAIDFRAME_SET_ROOT:
1416 d = rf_set_rootpartition(raidPtr, *(int *) data);
1417 printf("raid%d: New rootpartition value is: %d\n",
1418 raidPtr->raidid, d);
1419 *(int *) data = d;
1420 return (retcode);
1421
1422 /* initialize all parity */
1423 case RAIDFRAME_REWRITEPARITY:
1424
1425 if (raidPtr->Layout.map->faultsTolerated == 0) {
1426 /* Parity for RAID 0 is trivially correct */
1427 raidPtr->parity_good = RF_RAID_CLEAN;
1428 return(0);
1429 }
1430
1431 if (raidPtr->parity_rewrite_in_progress == 1) {
1432 /* Re-write is already in progress! */
1433 return(EINVAL);
1434 }
1435
1436 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1437 rf_RewriteParityThread,
1438 raidPtr,"raid_parity");
1439 return (retcode);
1440
1441
1442 case RAIDFRAME_ADD_HOT_SPARE:
1443 sparePtr = (RF_SingleComponent_t *) data;
1444 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1445 retcode = rf_add_hot_spare(raidPtr, &component);
1446 return(retcode);
1447
1448 case RAIDFRAME_REMOVE_HOT_SPARE:
1449 return(retcode);
1450
1451 case RAIDFRAME_DELETE_COMPONENT:
1452 componentPtr = (RF_SingleComponent_t *)data;
1453 memcpy( &component, componentPtr,
1454 sizeof(RF_SingleComponent_t));
1455 retcode = rf_delete_component(raidPtr, &component);
1456 return(retcode);
1457
1458 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1459 componentPtr = (RF_SingleComponent_t *)data;
1460 memcpy( &component, componentPtr,
1461 sizeof(RF_SingleComponent_t));
1462 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1463 return(retcode);
1464
1465 case RAIDFRAME_REBUILD_IN_PLACE:
1466
1467 if (raidPtr->Layout.map->faultsTolerated == 0) {
1468 /* Can't do this on a RAID 0!! */
1469 return(EINVAL);
1470 }
1471
1472 if (raidPtr->recon_in_progress == 1) {
1473 /* a reconstruct is already in progress! */
1474 return(EINVAL);
1475 }
1476
1477 componentPtr = (RF_SingleComponent_t *) data;
1478 memcpy( &component, componentPtr,
1479 sizeof(RF_SingleComponent_t));
1480 component.row = 0; /* we don't support any more */
1481 column = component.column;
1482
1483 if ((column < 0) || (column >= raidPtr->numCol)) {
1484 return(EINVAL);
1485 }
1486
1487 rf_lock_mutex2(raidPtr->mutex);
1488 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1489 (raidPtr->numFailures > 0)) {
1490 /* XXX 0 above shouldn't be constant!!! */
1491 /* some component other than this has failed.
1492 Let's not make things worse than they already
1493 are... */
1494 printf("raid%d: Unable to reconstruct to disk at:\n",
1495 raidPtr->raidid);
1496 printf("raid%d: Col: %d Too many failures.\n",
1497 raidPtr->raidid, column);
1498 rf_unlock_mutex2(raidPtr->mutex);
1499 return (EINVAL);
1500 }
1501 if (raidPtr->Disks[column].status ==
1502 rf_ds_reconstructing) {
1503 printf("raid%d: Unable to reconstruct to disk at:\n",
1504 raidPtr->raidid);
1505 printf("raid%d: Col: %d Reconstruction already occurring!\n", raidPtr->raidid, column);
1506
1507 rf_unlock_mutex2(raidPtr->mutex);
1508 return (EINVAL);
1509 }
1510 if (raidPtr->Disks[column].status == rf_ds_spared) {
1511 rf_unlock_mutex2(raidPtr->mutex);
1512 return (EINVAL);
1513 }
1514 rf_unlock_mutex2(raidPtr->mutex);
1515
1516 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1517 if (rrcopy == NULL)
1518 return(ENOMEM);
1519
1520 rrcopy->raidPtr = (void *) raidPtr;
1521 rrcopy->col = column;
1522
1523 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1524 rf_ReconstructInPlaceThread,
1525 rrcopy,"raid_reconip");
1526 return(retcode);
1527
1528 case RAIDFRAME_GET_INFO:
1529 if (!raidPtr->valid)
1530 return (ENODEV);
1531 ucfgp = (RF_DeviceConfig_t **) data;
1532 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1533 (RF_DeviceConfig_t *));
1534 if (d_cfg == NULL)
1535 return (ENOMEM);
1536 d_cfg->rows = 1; /* there is only 1 row now */
1537 d_cfg->cols = raidPtr->numCol;
1538 d_cfg->ndevs = raidPtr->numCol;
1539 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1540 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1541 return (ENOMEM);
1542 }
1543 d_cfg->nspares = raidPtr->numSpare;
1544 if (d_cfg->nspares >= RF_MAX_DISKS) {
1545 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1546 return (ENOMEM);
1547 }
1548 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1549 d = 0;
1550 for (j = 0; j < d_cfg->cols; j++) {
1551 d_cfg->devs[d] = raidPtr->Disks[j];
1552 d++;
1553 }
1554 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1555 d_cfg->spares[i] = raidPtr->Disks[j];
1556 if (d_cfg->spares[i].status == rf_ds_rebuilding_spare) {
1557 /* XXX: raidctl(8) expects to see this as a used spare */
1558 d_cfg->spares[i].status = rf_ds_used_spare;
1559 }
1560 }
1561 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1562 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1563
1564 return (retcode);
1565
1566 case RAIDFRAME_CHECK_PARITY:
1567 *(int *) data = raidPtr->parity_good;
1568 return (0);
1569
1570 case RAIDFRAME_PARITYMAP_STATUS:
1571 if (rf_paritymap_ineligible(raidPtr))
1572 return EINVAL;
1573 rf_paritymap_status(raidPtr->parity_map,
1574 (struct rf_pmstat *)data);
1575 return 0;
1576
1577 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1578 if (rf_paritymap_ineligible(raidPtr))
1579 return EINVAL;
1580 if (raidPtr->parity_map == NULL)
1581 return ENOENT; /* ??? */
1582 if (0 != rf_paritymap_set_params(raidPtr->parity_map,
1583 (struct rf_pmparams *)data, 1))
1584 return EINVAL;
1585 return 0;
1586
1587 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1588 if (rf_paritymap_ineligible(raidPtr))
1589 return EINVAL;
1590 *(int *) data = rf_paritymap_get_disable(raidPtr);
1591 return 0;
1592
1593 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1594 if (rf_paritymap_ineligible(raidPtr))
1595 return EINVAL;
1596 rf_paritymap_set_disable(raidPtr, *(int *)data);
1597 /* XXX should errors be passed up? */
1598 return 0;
1599
1600 case RAIDFRAME_RESET_ACCTOTALS:
1601 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1602 return (0);
1603
1604 case RAIDFRAME_GET_ACCTOTALS:
1605 totals = (RF_AccTotals_t *) data;
1606 *totals = raidPtr->acc_totals;
1607 return (0);
1608
1609 case RAIDFRAME_KEEP_ACCTOTALS:
1610 raidPtr->keep_acc_totals = *(int *)data;
1611 return (0);
1612
1613 case RAIDFRAME_GET_SIZE:
1614 *(int *) data = raidPtr->totalSectors;
1615 return (0);
1616
1617 /* fail a disk & optionally start reconstruction */
1618 case RAIDFRAME_FAIL_DISK:
1619
1620 if (raidPtr->Layout.map->faultsTolerated == 0) {
1621 /* Can't do this on a RAID 0!! */
1622 return(EINVAL);
1623 }
1624
1625 rr = (struct rf_recon_req *) data;
1626 rr->row = 0;
1627 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1628 return (EINVAL);
1629
1630
1631 rf_lock_mutex2(raidPtr->mutex);
1632 if (raidPtr->status == rf_rs_reconstructing) {
1633 /* you can't fail a disk while we're reconstructing! */
1634 /* XXX wrong for RAID6 */
1635 rf_unlock_mutex2(raidPtr->mutex);
1636 return (EINVAL);
1637 }
1638 if ((raidPtr->Disks[rr->col].status ==
1639 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1640 /* some other component has failed. Let's not make
1641 things worse. XXX wrong for RAID6 */
1642 rf_unlock_mutex2(raidPtr->mutex);
1643 return (EINVAL);
1644 }
1645 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1646 /* Can't fail a spared disk! */
1647 rf_unlock_mutex2(raidPtr->mutex);
1648 return (EINVAL);
1649 }
1650 rf_unlock_mutex2(raidPtr->mutex);
1651
1652 /* make a copy of the recon request so that we don't rely on
1653 * the user's buffer */
1654 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1655 if (rrcopy == NULL)
1656 return(ENOMEM);
1657 memcpy(rrcopy, rr, sizeof(*rr));
1658 rrcopy->raidPtr = (void *) raidPtr;
1659
1660 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1661 rf_ReconThread,
1662 rrcopy,"raid_recon");
1663 return (0);
1664
1665 /* invoke a copyback operation after recon on whatever disk
1666 * needs it, if any */
1667 case RAIDFRAME_COPYBACK:
1668
1669 if (raidPtr->Layout.map->faultsTolerated == 0) {
1670 /* This makes no sense on a RAID 0!! */
1671 return(EINVAL);
1672 }
1673
1674 if (raidPtr->copyback_in_progress == 1) {
1675 /* Copyback is already in progress! */
1676 return(EINVAL);
1677 }
1678
1679 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1680 rf_CopybackThread,
1681 raidPtr,"raid_copyback");
1682 return (retcode);
1683
1684 /* return the percentage completion of reconstruction */
1685 case RAIDFRAME_CHECK_RECON_STATUS:
1686 if (raidPtr->Layout.map->faultsTolerated == 0) {
1687 /* This makes no sense on a RAID 0, so tell the
1688 user it's done. */
1689 *(int *) data = 100;
1690 return(0);
1691 }
1692 if (raidPtr->status != rf_rs_reconstructing)
1693 *(int *) data = 100;
1694 else {
1695 if (raidPtr->reconControl->numRUsTotal > 0) {
1696 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1697 } else {
1698 *(int *) data = 0;
1699 }
1700 }
1701 return (0);
1702 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1703 progressInfoPtr = (RF_ProgressInfo_t **) data;
1704 if (raidPtr->status != rf_rs_reconstructing) {
1705 progressInfo.remaining = 0;
1706 progressInfo.completed = 100;
1707 progressInfo.total = 100;
1708 } else {
1709 progressInfo.total =
1710 raidPtr->reconControl->numRUsTotal;
1711 progressInfo.completed =
1712 raidPtr->reconControl->numRUsComplete;
1713 progressInfo.remaining = progressInfo.total -
1714 progressInfo.completed;
1715 }
1716 retcode = copyout(&progressInfo, *progressInfoPtr,
1717 sizeof(RF_ProgressInfo_t));
1718 return (retcode);
1719
1720 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1721 if (raidPtr->Layout.map->faultsTolerated == 0) {
1722 /* This makes no sense on a RAID 0, so tell the
1723 user it's done. */
1724 *(int *) data = 100;
1725 return(0);
1726 }
1727 if (raidPtr->parity_rewrite_in_progress == 1) {
1728 *(int *) data = 100 *
1729 raidPtr->parity_rewrite_stripes_done /
1730 raidPtr->Layout.numStripe;
1731 } else {
1732 *(int *) data = 100;
1733 }
1734 return (0);
1735
1736 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1737 progressInfoPtr = (RF_ProgressInfo_t **) data;
1738 if (raidPtr->parity_rewrite_in_progress == 1) {
1739 progressInfo.total = raidPtr->Layout.numStripe;
1740 progressInfo.completed =
1741 raidPtr->parity_rewrite_stripes_done;
1742 progressInfo.remaining = progressInfo.total -
1743 progressInfo.completed;
1744 } else {
1745 progressInfo.remaining = 0;
1746 progressInfo.completed = 100;
1747 progressInfo.total = 100;
1748 }
1749 retcode = copyout(&progressInfo, *progressInfoPtr,
1750 sizeof(RF_ProgressInfo_t));
1751 return (retcode);
1752
1753 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1754 if (raidPtr->Layout.map->faultsTolerated == 0) {
1755 /* This makes no sense on a RAID 0 */
1756 *(int *) data = 100;
1757 return(0);
1758 }
1759 if (raidPtr->copyback_in_progress == 1) {
1760 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1761 raidPtr->Layout.numStripe;
1762 } else {
1763 *(int *) data = 100;
1764 }
1765 return (0);
1766
1767 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1768 progressInfoPtr = (RF_ProgressInfo_t **) data;
1769 if (raidPtr->copyback_in_progress == 1) {
1770 progressInfo.total = raidPtr->Layout.numStripe;
1771 progressInfo.completed =
1772 raidPtr->copyback_stripes_done;
1773 progressInfo.remaining = progressInfo.total -
1774 progressInfo.completed;
1775 } else {
1776 progressInfo.remaining = 0;
1777 progressInfo.completed = 100;
1778 progressInfo.total = 100;
1779 }
1780 retcode = copyout(&progressInfo, *progressInfoPtr,
1781 sizeof(RF_ProgressInfo_t));
1782 return (retcode);
1783
1784 /* the sparetable daemon calls this to wait for the kernel to
1785 * need a spare table. this ioctl does not return until a
1786 * spare table is needed. XXX -- calling mpsleep here in the
1787 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1788 * -- I should either compute the spare table in the kernel,
1789 * or have a different -- XXX XXX -- interface (a different
1790 * character device) for delivering the table -- XXX */
1791 #if 0
1792 case RAIDFRAME_SPARET_WAIT:
1793 rf_lock_mutex2(rf_sparet_wait_mutex);
1794 while (!rf_sparet_wait_queue)
1795 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1796 waitreq = rf_sparet_wait_queue;
1797 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1798 rf_unlock_mutex2(rf_sparet_wait_mutex);
1799
1800 /* structure assignment */
1801 *((RF_SparetWait_t *) data) = *waitreq;
1802
1803 RF_Free(waitreq, sizeof(*waitreq));
1804 return (0);
1805
1806 /* wakes up a process waiting on SPARET_WAIT and puts an error
1807 * code in it that will cause the dameon to exit */
1808 case RAIDFRAME_ABORT_SPARET_WAIT:
1809 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1810 waitreq->fcol = -1;
1811 rf_lock_mutex2(rf_sparet_wait_mutex);
1812 waitreq->next = rf_sparet_wait_queue;
1813 rf_sparet_wait_queue = waitreq;
1814 rf_broadcast_conf2(rf_sparet_wait_cv);
1815 rf_unlock_mutex2(rf_sparet_wait_mutex);
1816 return (0);
1817
1818 /* used by the spare table daemon to deliver a spare table
1819 * into the kernel */
1820 case RAIDFRAME_SEND_SPARET:
1821
1822 /* install the spare table */
1823 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1824
1825 /* respond to the requestor. the return status of the spare
1826 * table installation is passed in the "fcol" field */
1827 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1828 waitreq->fcol = retcode;
1829 rf_lock_mutex2(rf_sparet_wait_mutex);
1830 waitreq->next = rf_sparet_resp_queue;
1831 rf_sparet_resp_queue = waitreq;
1832 rf_broadcast_cond2(rf_sparet_resp_cv);
1833 rf_unlock_mutex2(rf_sparet_wait_mutex);
1834
1835 return (retcode);
1836 #endif
1837
1838 default:
1839 break; /* fall through to the os-specific code below */
1840
1841 }
1842
1843 if (!raidPtr->valid)
1844 return (EINVAL);
1845
1846 /*
1847 * Add support for "regular" device ioctls here.
1848 */
1849
1850 error = disk_ioctl(&rs->sc_dkdev, dev, cmd, data, flag, l);
1851 if (error != EPASSTHROUGH)
1852 return (error);
1853
1854 switch (cmd) {
1855 case DIOCWDINFO:
1856 case DIOCSDINFO:
1857 #ifdef __HAVE_OLD_DISKLABEL
1858 case ODIOCWDINFO:
1859 case ODIOCSDINFO:
1860 #endif
1861 {
1862 struct disklabel *lp;
1863 #ifdef __HAVE_OLD_DISKLABEL
1864 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1865 memset(&newlabel, 0, sizeof newlabel);
1866 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1867 lp = &newlabel;
1868 } else
1869 #endif
1870 lp = (struct disklabel *)data;
1871
1872 if ((error = raidlock(rs)) != 0)
1873 return (error);
1874
1875 rs->sc_flags |= RAIDF_LABELLING;
1876
1877 error = setdisklabel(rs->sc_dkdev.dk_label,
1878 lp, 0, rs->sc_dkdev.dk_cpulabel);
1879 if (error == 0) {
1880 if (cmd == DIOCWDINFO
1881 #ifdef __HAVE_OLD_DISKLABEL
1882 || cmd == ODIOCWDINFO
1883 #endif
1884 )
1885 error = writedisklabel(RAIDLABELDEV(dev),
1886 raidstrategy, rs->sc_dkdev.dk_label,
1887 rs->sc_dkdev.dk_cpulabel);
1888 }
1889 rs->sc_flags &= ~RAIDF_LABELLING;
1890
1891 raidunlock(rs);
1892
1893 if (error)
1894 return (error);
1895 break;
1896 }
1897
1898 case DIOCWLABEL:
1899 if (*(int *) data != 0)
1900 rs->sc_flags |= RAIDF_WLABEL;
1901 else
1902 rs->sc_flags &= ~RAIDF_WLABEL;
1903 break;
1904
1905 case DIOCGDEFLABEL:
1906 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1907 break;
1908
1909 #ifdef __HAVE_OLD_DISKLABEL
1910 case ODIOCGDEFLABEL:
1911 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1912 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1913 return ENOTTY;
1914 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1915 break;
1916 #endif
1917
1918 case DIOCCACHESYNC:
1919 return rf_sync_component_caches(raidPtr);
1920
1921 case DIOCGSTRATEGY:
1922 {
1923 struct disk_strategy *dks = (void *)data;
1924
1925 s = splbio();
1926 strlcpy(dks->dks_name, bufq_getstrategyname(rs->buf_queue),
1927 sizeof(dks->dks_name));
1928 splx(s);
1929 dks->dks_paramlen = 0;
1930
1931 return 0;
1932 }
1933
1934 case DIOCSSTRATEGY:
1935 {
1936 struct disk_strategy *dks = (void *)data;
1937 struct bufq_state *new;
1938 struct bufq_state *old;
1939
1940 if (dks->dks_param != NULL) {
1941 return EINVAL;
1942 }
1943 dks->dks_name[sizeof(dks->dks_name) - 1] = 0; /* ensure term */
1944 error = bufq_alloc(&new, dks->dks_name,
1945 BUFQ_EXACT|BUFQ_SORT_RAWBLOCK);
1946 if (error) {
1947 return error;
1948 }
1949 s = splbio();
1950 old = rs->buf_queue;
1951 bufq_move(new, old);
1952 rs->buf_queue = new;
1953 splx(s);
1954 bufq_free(old);
1955
1956 return 0;
1957 }
1958
1959 default:
1960 retcode = ENOTTY;
1961 }
1962 return (retcode);
1963
1964 }
1965
1966
1967 /* raidinit -- complete the rest of the initialization for the
1968 RAIDframe device. */
1969
1970
1971 static void
1972 raidinit(struct raid_softc *rs)
1973 {
1974 cfdata_t cf;
1975 int unit;
1976 RF_Raid_t *raidPtr = &rs->sc_r;
1977
1978 unit = raidPtr->raidid;
1979
1980
1981 /* XXX should check return code first... */
1982 rs->sc_flags |= RAIDF_INITED;
1983
1984 /* XXX doesn't check bounds. */
1985 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1986
1987 /* attach the pseudo device */
1988 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1989 cf->cf_name = raid_cd.cd_name;
1990 cf->cf_atname = raid_cd.cd_name;
1991 cf->cf_unit = unit;
1992 cf->cf_fstate = FSTATE_STAR;
1993
1994 rs->sc_dev = config_attach_pseudo(cf);
1995
1996 if (rs->sc_dev == NULL) {
1997 printf("raid%d: config_attach_pseudo failed\n",
1998 raidPtr->raidid);
1999 rs->sc_flags &= ~RAIDF_INITED;
2000 free(cf, M_RAIDFRAME);
2001 return;
2002 }
2003
2004 /* disk_attach actually creates space for the CPU disklabel, among
2005 * other things, so it's critical to call this *BEFORE* we try putzing
2006 * with disklabels. */
2007
2008 disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver);
2009 disk_attach(&rs->sc_dkdev);
2010
2011 /* XXX There may be a weird interaction here between this, and
2012 * protectedSectors, as used in RAIDframe. */
2013
2014 rs->sc_size = raidPtr->totalSectors;
2015
2016 rf_set_geometry(rs, raidPtr);
2017
2018 dkwedge_discover(&rs->sc_dkdev);
2019
2020 }
2021 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
2022 /* wake up the daemon & tell it to get us a spare table
2023 * XXX
2024 * the entries in the queues should be tagged with the raidPtr
2025 * so that in the extremely rare case that two recons happen at once,
2026 * we know for which device were requesting a spare table
2027 * XXX
2028 *
2029 * XXX This code is not currently used. GO
2030 */
2031 int
2032 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
2033 {
2034 int retcode;
2035
2036 rf_lock_mutex2(rf_sparet_wait_mutex);
2037 req->next = rf_sparet_wait_queue;
2038 rf_sparet_wait_queue = req;
2039 rf_broadcast_cond2(rf_sparet_wait_cv);
2040
2041 /* mpsleep unlocks the mutex */
2042 while (!rf_sparet_resp_queue) {
2043 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
2044 }
2045 req = rf_sparet_resp_queue;
2046 rf_sparet_resp_queue = req->next;
2047 rf_unlock_mutex2(rf_sparet_wait_mutex);
2048
2049 retcode = req->fcol;
2050 RF_Free(req, sizeof(*req)); /* this is not the same req as we
2051 * alloc'd */
2052 return (retcode);
2053 }
2054 #endif
2055
2056 /* a wrapper around rf_DoAccess that extracts appropriate info from the
2057 * bp & passes it down.
2058 * any calls originating in the kernel must use non-blocking I/O
2059 * do some extra sanity checking to return "appropriate" error values for
2060 * certain conditions (to make some standard utilities work)
2061 *
2062 * Formerly known as: rf_DoAccessKernel
2063 */
2064 void
2065 raidstart(RF_Raid_t *raidPtr)
2066 {
2067 RF_SectorCount_t num_blocks, pb, sum;
2068 RF_RaidAddr_t raid_addr;
2069 struct partition *pp;
2070 daddr_t blocknum;
2071 struct raid_softc *rs;
2072 int do_async;
2073 struct buf *bp;
2074 int rc;
2075
2076 rs = raidPtr->softc;
2077 /* quick check to see if anything has died recently */
2078 rf_lock_mutex2(raidPtr->mutex);
2079 if (raidPtr->numNewFailures > 0) {
2080 rf_unlock_mutex2(raidPtr->mutex);
2081 rf_update_component_labels(raidPtr,
2082 RF_NORMAL_COMPONENT_UPDATE);
2083 rf_lock_mutex2(raidPtr->mutex);
2084 raidPtr->numNewFailures--;
2085 }
2086
2087 /* Check to see if we're at the limit... */
2088 while (raidPtr->openings > 0) {
2089 rf_unlock_mutex2(raidPtr->mutex);
2090
2091 /* get the next item, if any, from the queue */
2092 if ((bp = bufq_get(rs->buf_queue)) == NULL) {
2093 /* nothing more to do */
2094 return;
2095 }
2096
2097 /* Ok, for the bp we have here, bp->b_blkno is relative to the
2098 * partition.. Need to make it absolute to the underlying
2099 * device.. */
2100
2101 blocknum = bp->b_blkno << DEV_BSHIFT >> raidPtr->logBytesPerSector;
2102 if (DISKPART(bp->b_dev) != RAW_PART) {
2103 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
2104 blocknum += pp->p_offset;
2105 }
2106
2107 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
2108 (int) blocknum));
2109
2110 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
2111 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
2112
2113 /* *THIS* is where we adjust what block we're going to...
2114 * but DO NOT TOUCH bp->b_blkno!!! */
2115 raid_addr = blocknum;
2116
2117 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
2118 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
2119 sum = raid_addr + num_blocks + pb;
2120 if (1 || rf_debugKernelAccess) {
2121 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
2122 (int) raid_addr, (int) sum, (int) num_blocks,
2123 (int) pb, (int) bp->b_resid));
2124 }
2125 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
2126 || (sum < num_blocks) || (sum < pb)) {
2127 bp->b_error = ENOSPC;
2128 bp->b_resid = bp->b_bcount;
2129 biodone(bp);
2130 rf_lock_mutex2(raidPtr->mutex);
2131 continue;
2132 }
2133 /*
2134 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
2135 */
2136
2137 if (bp->b_bcount & raidPtr->sectorMask) {
2138 bp->b_error = EINVAL;
2139 bp->b_resid = bp->b_bcount;
2140 biodone(bp);
2141 rf_lock_mutex2(raidPtr->mutex);
2142 continue;
2143
2144 }
2145 db1_printf(("Calling DoAccess..\n"));
2146
2147
2148 rf_lock_mutex2(raidPtr->mutex);
2149 raidPtr->openings--;
2150 rf_unlock_mutex2(raidPtr->mutex);
2151
2152 /*
2153 * Everything is async.
2154 */
2155 do_async = 1;
2156
2157 disk_busy(&rs->sc_dkdev);
2158
2159 /* XXX we're still at splbio() here... do we *really*
2160 need to be? */
2161
2162 /* don't ever condition on bp->b_flags & B_WRITE.
2163 * always condition on B_READ instead */
2164
2165 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
2166 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
2167 do_async, raid_addr, num_blocks,
2168 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
2169
2170 if (rc) {
2171 bp->b_error = rc;
2172 bp->b_resid = bp->b_bcount;
2173 biodone(bp);
2174 /* continue loop */
2175 }
2176
2177 rf_lock_mutex2(raidPtr->mutex);
2178 }
2179 rf_unlock_mutex2(raidPtr->mutex);
2180 }
2181
2182
2183
2184
2185 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
2186
2187 int
2188 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
2189 {
2190 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2191 struct buf *bp;
2192
2193 req->queue = queue;
2194 bp = req->bp;
2195
2196 switch (req->type) {
2197 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2198 /* XXX need to do something extra here.. */
2199 /* I'm leaving this in, as I've never actually seen it used,
2200 * and I'd like folks to report it... GO */
2201 printf(("WAKEUP CALLED\n"));
2202 queue->numOutstanding++;
2203
2204 bp->b_flags = 0;
2205 bp->b_private = req;
2206
2207 KernelWakeupFunc(bp);
2208 break;
2209
2210 case RF_IO_TYPE_READ:
2211 case RF_IO_TYPE_WRITE:
2212 #if RF_ACC_TRACE > 0
2213 if (req->tracerec) {
2214 RF_ETIMER_START(req->tracerec->timer);
2215 }
2216 #endif
2217 InitBP(bp, queue->rf_cinfo->ci_vp,
2218 op, queue->rf_cinfo->ci_dev,
2219 req->sectorOffset, req->numSector,
2220 req->buf, KernelWakeupFunc, (void *) req,
2221 queue->raidPtr->logBytesPerSector, req->b_proc);
2222
2223 if (rf_debugKernelAccess) {
2224 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2225 (long) bp->b_blkno));
2226 }
2227 queue->numOutstanding++;
2228 queue->last_deq_sector = req->sectorOffset;
2229 /* acc wouldn't have been let in if there were any pending
2230 * reqs at any other priority */
2231 queue->curPriority = req->priority;
2232
2233 db1_printf(("Going for %c to unit %d col %d\n",
2234 req->type, queue->raidPtr->raidid,
2235 queue->col));
2236 db1_printf(("sector %d count %d (%d bytes) %d\n",
2237 (int) req->sectorOffset, (int) req->numSector,
2238 (int) (req->numSector <<
2239 queue->raidPtr->logBytesPerSector),
2240 (int) queue->raidPtr->logBytesPerSector));
2241
2242 /*
2243 * XXX: drop lock here since this can block at
2244 * least with backing SCSI devices. Retake it
2245 * to minimize fuss with calling interfaces.
2246 */
2247
2248 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2249 bdev_strategy(bp);
2250 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2251 break;
2252
2253 default:
2254 panic("bad req->type in rf_DispatchKernelIO");
2255 }
2256 db1_printf(("Exiting from DispatchKernelIO\n"));
2257
2258 return (0);
2259 }
2260 /* this is the callback function associated with a I/O invoked from
2261 kernel code.
2262 */
2263 static void
2264 KernelWakeupFunc(struct buf *bp)
2265 {
2266 RF_DiskQueueData_t *req = NULL;
2267 RF_DiskQueue_t *queue;
2268
2269 db1_printf(("recovering the request queue:\n"));
2270
2271 req = bp->b_private;
2272
2273 queue = (RF_DiskQueue_t *) req->queue;
2274
2275 rf_lock_mutex2(queue->raidPtr->iodone_lock);
2276
2277 #if RF_ACC_TRACE > 0
2278 if (req->tracerec) {
2279 RF_ETIMER_STOP(req->tracerec->timer);
2280 RF_ETIMER_EVAL(req->tracerec->timer);
2281 rf_lock_mutex2(rf_tracing_mutex);
2282 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2283 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2284 req->tracerec->num_phys_ios++;
2285 rf_unlock_mutex2(rf_tracing_mutex);
2286 }
2287 #endif
2288
2289 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2290 * ballistic, and mark the component as hosed... */
2291
2292 if (bp->b_error != 0) {
2293 /* Mark the disk as dead */
2294 /* but only mark it once... */
2295 /* and only if it wouldn't leave this RAID set
2296 completely broken */
2297 if (((queue->raidPtr->Disks[queue->col].status ==
2298 rf_ds_optimal) ||
2299 (queue->raidPtr->Disks[queue->col].status ==
2300 rf_ds_used_spare)) &&
2301 (queue->raidPtr->numFailures <
2302 queue->raidPtr->Layout.map->faultsTolerated)) {
2303 printf("raid%d: IO Error (%d). Marking %s as failed.\n",
2304 queue->raidPtr->raidid,
2305 bp->b_error,
2306 queue->raidPtr->Disks[queue->col].devname);
2307 queue->raidPtr->Disks[queue->col].status =
2308 rf_ds_failed;
2309 queue->raidPtr->status = rf_rs_degraded;
2310 queue->raidPtr->numFailures++;
2311 queue->raidPtr->numNewFailures++;
2312 } else { /* Disk is already dead... */
2313 /* printf("Disk already marked as dead!\n"); */
2314 }
2315
2316 }
2317
2318 /* Fill in the error value */
2319 req->error = bp->b_error;
2320
2321 /* Drop this one on the "finished" queue... */
2322 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2323
2324 /* Let the raidio thread know there is work to be done. */
2325 rf_signal_cond2(queue->raidPtr->iodone_cv);
2326
2327 rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2328 }
2329
2330
2331 /*
2332 * initialize a buf structure for doing an I/O in the kernel.
2333 */
2334 static void
2335 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2336 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2337 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2338 struct proc *b_proc)
2339 {
2340 /* bp->b_flags = B_PHYS | rw_flag; */
2341 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */
2342 bp->b_oflags = 0;
2343 bp->b_cflags = 0;
2344 bp->b_bcount = numSect << logBytesPerSector;
2345 bp->b_bufsize = bp->b_bcount;
2346 bp->b_error = 0;
2347 bp->b_dev = dev;
2348 bp->b_data = bf;
2349 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2350 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2351 if (bp->b_bcount == 0) {
2352 panic("bp->b_bcount is zero in InitBP!!");
2353 }
2354 bp->b_proc = b_proc;
2355 bp->b_iodone = cbFunc;
2356 bp->b_private = cbArg;
2357 }
2358
2359 static void
2360 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2361 struct disklabel *lp)
2362 {
2363 memset(lp, 0, sizeof(*lp));
2364
2365 /* fabricate a label... */
2366 if (raidPtr->totalSectors > UINT32_MAX)
2367 lp->d_secperunit = UINT32_MAX;
2368 else
2369 lp->d_secperunit = raidPtr->totalSectors;
2370 lp->d_secsize = raidPtr->bytesPerSector;
2371 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2372 lp->d_ntracks = 4 * raidPtr->numCol;
2373 lp->d_ncylinders = raidPtr->totalSectors /
2374 (lp->d_nsectors * lp->d_ntracks);
2375 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2376
2377 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2378 lp->d_type = DKTYPE_RAID;
2379 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2380 lp->d_rpm = 3600;
2381 lp->d_interleave = 1;
2382 lp->d_flags = 0;
2383
2384 lp->d_partitions[RAW_PART].p_offset = 0;
2385 lp->d_partitions[RAW_PART].p_size = lp->d_secperunit;
2386 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2387 lp->d_npartitions = RAW_PART + 1;
2388
2389 lp->d_magic = DISKMAGIC;
2390 lp->d_magic2 = DISKMAGIC;
2391 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2392
2393 }
2394 /*
2395 * Read the disklabel from the raid device. If one is not present, fake one
2396 * up.
2397 */
2398 static void
2399 raidgetdisklabel(dev_t dev)
2400 {
2401 int unit = raidunit(dev);
2402 struct raid_softc *rs;
2403 const char *errstring;
2404 struct disklabel *lp;
2405 struct cpu_disklabel *clp;
2406 RF_Raid_t *raidPtr;
2407
2408 if ((rs = raidget(unit, false)) == NULL)
2409 return;
2410
2411 lp = rs->sc_dkdev.dk_label;
2412 clp = rs->sc_dkdev.dk_cpulabel;
2413
2414 db1_printf(("Getting the disklabel...\n"));
2415
2416 memset(clp, 0, sizeof(*clp));
2417
2418 raidPtr = &rs->sc_r;
2419
2420 raidgetdefaultlabel(raidPtr, rs, lp);
2421
2422 /*
2423 * Call the generic disklabel extraction routine.
2424 */
2425 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2426 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2427 if (errstring)
2428 raidmakedisklabel(rs);
2429 else {
2430 int i;
2431 struct partition *pp;
2432
2433 /*
2434 * Sanity check whether the found disklabel is valid.
2435 *
2436 * This is necessary since total size of the raid device
2437 * may vary when an interleave is changed even though exactly
2438 * same components are used, and old disklabel may used
2439 * if that is found.
2440 */
2441 if (lp->d_secperunit < UINT32_MAX ?
2442 lp->d_secperunit != rs->sc_size :
2443 lp->d_secperunit > rs->sc_size)
2444 printf("raid%d: WARNING: %s: "
2445 "total sector size in disklabel (%ju) != "
2446 "the size of raid (%ju)\n", unit, rs->sc_xname,
2447 (uintmax_t)lp->d_secperunit,
2448 (uintmax_t)rs->sc_size);
2449 for (i = 0; i < lp->d_npartitions; i++) {
2450 pp = &lp->d_partitions[i];
2451 if (pp->p_offset + pp->p_size > rs->sc_size)
2452 printf("raid%d: WARNING: %s: end of partition `%c' "
2453 "exceeds the size of raid (%ju)\n",
2454 unit, rs->sc_xname, 'a' + i,
2455 (uintmax_t)rs->sc_size);
2456 }
2457 }
2458
2459 }
2460 /*
2461 * Take care of things one might want to take care of in the event
2462 * that a disklabel isn't present.
2463 */
2464 static void
2465 raidmakedisklabel(struct raid_softc *rs)
2466 {
2467 struct disklabel *lp = rs->sc_dkdev.dk_label;
2468 db1_printf(("Making a label..\n"));
2469
2470 /*
2471 * For historical reasons, if there's no disklabel present
2472 * the raw partition must be marked FS_BSDFFS.
2473 */
2474
2475 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2476
2477 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2478
2479 lp->d_checksum = dkcksum(lp);
2480 }
2481 /*
2482 * Wait interruptibly for an exclusive lock.
2483 *
2484 * XXX
2485 * Several drivers do this; it should be abstracted and made MP-safe.
2486 * (Hmm... where have we seen this warning before :-> GO )
2487 */
2488 static int
2489 raidlock(struct raid_softc *rs)
2490 {
2491 int error;
2492
2493 mutex_enter(&rs->sc_mutex);
2494 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2495 rs->sc_flags |= RAIDF_WANTED;
2496 error = cv_wait_sig(&rs->sc_cv, &rs->sc_mutex);
2497 if (error != 0)
2498 return (error);
2499 }
2500 rs->sc_flags |= RAIDF_LOCKED;
2501 mutex_exit(&rs->sc_mutex);
2502 return (0);
2503 }
2504 /*
2505 * Unlock and wake up any waiters.
2506 */
2507 static void
2508 raidunlock(struct raid_softc *rs)
2509 {
2510
2511 mutex_enter(&rs->sc_mutex);
2512 rs->sc_flags &= ~RAIDF_LOCKED;
2513 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2514 rs->sc_flags &= ~RAIDF_WANTED;
2515 cv_broadcast(&rs->sc_cv);
2516 }
2517 mutex_exit(&rs->sc_mutex);
2518 }
2519
2520
2521 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2522 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2523 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2524
2525 static daddr_t
2526 rf_component_info_offset(void)
2527 {
2528
2529 return RF_COMPONENT_INFO_OFFSET;
2530 }
2531
2532 static daddr_t
2533 rf_component_info_size(unsigned secsize)
2534 {
2535 daddr_t info_size;
2536
2537 KASSERT(secsize);
2538 if (secsize > RF_COMPONENT_INFO_SIZE)
2539 info_size = secsize;
2540 else
2541 info_size = RF_COMPONENT_INFO_SIZE;
2542
2543 return info_size;
2544 }
2545
2546 static daddr_t
2547 rf_parity_map_offset(RF_Raid_t *raidPtr)
2548 {
2549 daddr_t map_offset;
2550
2551 KASSERT(raidPtr->bytesPerSector);
2552 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2553 map_offset = raidPtr->bytesPerSector;
2554 else
2555 map_offset = RF_COMPONENT_INFO_SIZE;
2556 map_offset += rf_component_info_offset();
2557
2558 return map_offset;
2559 }
2560
2561 static daddr_t
2562 rf_parity_map_size(RF_Raid_t *raidPtr)
2563 {
2564 daddr_t map_size;
2565
2566 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2567 map_size = raidPtr->bytesPerSector;
2568 else
2569 map_size = RF_PARITY_MAP_SIZE;
2570
2571 return map_size;
2572 }
2573
2574 int
2575 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2576 {
2577 RF_ComponentLabel_t *clabel;
2578
2579 clabel = raidget_component_label(raidPtr, col);
2580 clabel->clean = RF_RAID_CLEAN;
2581 raidflush_component_label(raidPtr, col);
2582 return(0);
2583 }
2584
2585
2586 int
2587 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2588 {
2589 RF_ComponentLabel_t *clabel;
2590
2591 clabel = raidget_component_label(raidPtr, col);
2592 clabel->clean = RF_RAID_DIRTY;
2593 raidflush_component_label(raidPtr, col);
2594 return(0);
2595 }
2596
2597 int
2598 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2599 {
2600 KASSERT(raidPtr->bytesPerSector);
2601 return raidread_component_label(raidPtr->bytesPerSector,
2602 raidPtr->Disks[col].dev,
2603 raidPtr->raid_cinfo[col].ci_vp,
2604 &raidPtr->raid_cinfo[col].ci_label);
2605 }
2606
2607 RF_ComponentLabel_t *
2608 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2609 {
2610 return &raidPtr->raid_cinfo[col].ci_label;
2611 }
2612
2613 int
2614 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2615 {
2616 RF_ComponentLabel_t *label;
2617
2618 label = &raidPtr->raid_cinfo[col].ci_label;
2619 label->mod_counter = raidPtr->mod_counter;
2620 #ifndef RF_NO_PARITY_MAP
2621 label->parity_map_modcount = label->mod_counter;
2622 #endif
2623 return raidwrite_component_label(raidPtr->bytesPerSector,
2624 raidPtr->Disks[col].dev,
2625 raidPtr->raid_cinfo[col].ci_vp, label);
2626 }
2627
2628
2629 static int
2630 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2631 RF_ComponentLabel_t *clabel)
2632 {
2633 return raidread_component_area(dev, b_vp, clabel,
2634 sizeof(RF_ComponentLabel_t),
2635 rf_component_info_offset(),
2636 rf_component_info_size(secsize));
2637 }
2638
2639 /* ARGSUSED */
2640 static int
2641 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2642 size_t msize, daddr_t offset, daddr_t dsize)
2643 {
2644 struct buf *bp;
2645 const struct bdevsw *bdev;
2646 int error;
2647
2648 /* XXX should probably ensure that we don't try to do this if
2649 someone has changed rf_protected_sectors. */
2650
2651 if (b_vp == NULL) {
2652 /* For whatever reason, this component is not valid.
2653 Don't try to read a component label from it. */
2654 return(EINVAL);
2655 }
2656
2657 /* get a block of the appropriate size... */
2658 bp = geteblk((int)dsize);
2659 bp->b_dev = dev;
2660
2661 /* get our ducks in a row for the read */
2662 bp->b_blkno = offset / DEV_BSIZE;
2663 bp->b_bcount = dsize;
2664 bp->b_flags |= B_READ;
2665 bp->b_resid = dsize;
2666
2667 bdev = bdevsw_lookup(bp->b_dev);
2668 if (bdev == NULL)
2669 return (ENXIO);
2670 (*bdev->d_strategy)(bp);
2671
2672 error = biowait(bp);
2673
2674 if (!error) {
2675 memcpy(data, bp->b_data, msize);
2676 }
2677
2678 brelse(bp, 0);
2679 return(error);
2680 }
2681
2682
2683 static int
2684 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2685 RF_ComponentLabel_t *clabel)
2686 {
2687 return raidwrite_component_area(dev, b_vp, clabel,
2688 sizeof(RF_ComponentLabel_t),
2689 rf_component_info_offset(),
2690 rf_component_info_size(secsize), 0);
2691 }
2692
2693 /* ARGSUSED */
2694 static int
2695 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2696 size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2697 {
2698 struct buf *bp;
2699 const struct bdevsw *bdev;
2700 int error;
2701
2702 /* get a block of the appropriate size... */
2703 bp = geteblk((int)dsize);
2704 bp->b_dev = dev;
2705
2706 /* get our ducks in a row for the write */
2707 bp->b_blkno = offset / DEV_BSIZE;
2708 bp->b_bcount = dsize;
2709 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2710 bp->b_resid = dsize;
2711
2712 memset(bp->b_data, 0, dsize);
2713 memcpy(bp->b_data, data, msize);
2714
2715 bdev = bdevsw_lookup(bp->b_dev);
2716 if (bdev == NULL)
2717 return (ENXIO);
2718 (*bdev->d_strategy)(bp);
2719 if (asyncp)
2720 return 0;
2721 error = biowait(bp);
2722 brelse(bp, 0);
2723 if (error) {
2724 #if 1
2725 printf("Failed to write RAID component info!\n");
2726 #endif
2727 }
2728
2729 return(error);
2730 }
2731
2732 void
2733 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2734 {
2735 int c;
2736
2737 for (c = 0; c < raidPtr->numCol; c++) {
2738 /* Skip dead disks. */
2739 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2740 continue;
2741 /* XXXjld: what if an error occurs here? */
2742 raidwrite_component_area(raidPtr->Disks[c].dev,
2743 raidPtr->raid_cinfo[c].ci_vp, map,
2744 RF_PARITYMAP_NBYTE,
2745 rf_parity_map_offset(raidPtr),
2746 rf_parity_map_size(raidPtr), 0);
2747 }
2748 }
2749
2750 void
2751 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2752 {
2753 struct rf_paritymap_ondisk tmp;
2754 int c,first;
2755
2756 first=1;
2757 for (c = 0; c < raidPtr->numCol; c++) {
2758 /* Skip dead disks. */
2759 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2760 continue;
2761 raidread_component_area(raidPtr->Disks[c].dev,
2762 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2763 RF_PARITYMAP_NBYTE,
2764 rf_parity_map_offset(raidPtr),
2765 rf_parity_map_size(raidPtr));
2766 if (first) {
2767 memcpy(map, &tmp, sizeof(*map));
2768 first = 0;
2769 } else {
2770 rf_paritymap_merge(map, &tmp);
2771 }
2772 }
2773 }
2774
2775 void
2776 rf_markalldirty(RF_Raid_t *raidPtr)
2777 {
2778 RF_ComponentLabel_t *clabel;
2779 int sparecol;
2780 int c;
2781 int j;
2782 int scol = -1;
2783
2784 raidPtr->mod_counter++;
2785 for (c = 0; c < raidPtr->numCol; c++) {
2786 /* we don't want to touch (at all) a disk that has
2787 failed */
2788 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2789 clabel = raidget_component_label(raidPtr, c);
2790 if (clabel->status == rf_ds_spared) {
2791 /* XXX do something special...
2792 but whatever you do, don't
2793 try to access it!! */
2794 } else {
2795 raidmarkdirty(raidPtr, c);
2796 }
2797 }
2798 }
2799
2800 for( c = 0; c < raidPtr->numSpare ; c++) {
2801 sparecol = raidPtr->numCol + c;
2802 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2803 /*
2804
2805 we claim this disk is "optimal" if it's
2806 rf_ds_used_spare, as that means it should be
2807 directly substitutable for the disk it replaced.
2808 We note that too...
2809
2810 */
2811
2812 for(j=0;j<raidPtr->numCol;j++) {
2813 if (raidPtr->Disks[j].spareCol == sparecol) {
2814 scol = j;
2815 break;
2816 }
2817 }
2818
2819 clabel = raidget_component_label(raidPtr, sparecol);
2820 /* make sure status is noted */
2821
2822 raid_init_component_label(raidPtr, clabel);
2823
2824 clabel->row = 0;
2825 clabel->column = scol;
2826 /* Note: we *don't* change status from rf_ds_used_spare
2827 to rf_ds_optimal */
2828 /* clabel.status = rf_ds_optimal; */
2829
2830 raidmarkdirty(raidPtr, sparecol);
2831 }
2832 }
2833 }
2834
2835
2836 void
2837 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2838 {
2839 RF_ComponentLabel_t *clabel;
2840 int sparecol;
2841 int c;
2842 int j;
2843 int scol;
2844
2845 scol = -1;
2846
2847 /* XXX should do extra checks to make sure things really are clean,
2848 rather than blindly setting the clean bit... */
2849
2850 raidPtr->mod_counter++;
2851
2852 for (c = 0; c < raidPtr->numCol; c++) {
2853 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2854 clabel = raidget_component_label(raidPtr, c);
2855 /* make sure status is noted */
2856 clabel->status = rf_ds_optimal;
2857
2858 /* note what unit we are configured as */
2859 clabel->last_unit = raidPtr->raidid;
2860
2861 raidflush_component_label(raidPtr, c);
2862 if (final == RF_FINAL_COMPONENT_UPDATE) {
2863 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2864 raidmarkclean(raidPtr, c);
2865 }
2866 }
2867 }
2868 /* else we don't touch it.. */
2869 }
2870
2871 for( c = 0; c < raidPtr->numSpare ; c++) {
2872 sparecol = raidPtr->numCol + c;
2873 /* Need to ensure that the reconstruct actually completed! */
2874 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2875 /*
2876
2877 we claim this disk is "optimal" if it's
2878 rf_ds_used_spare, as that means it should be
2879 directly substitutable for the disk it replaced.
2880 We note that too...
2881
2882 */
2883
2884 for(j=0;j<raidPtr->numCol;j++) {
2885 if (raidPtr->Disks[j].spareCol == sparecol) {
2886 scol = j;
2887 break;
2888 }
2889 }
2890
2891 /* XXX shouldn't *really* need this... */
2892 clabel = raidget_component_label(raidPtr, sparecol);
2893 /* make sure status is noted */
2894
2895 raid_init_component_label(raidPtr, clabel);
2896
2897 clabel->column = scol;
2898 clabel->status = rf_ds_optimal;
2899 clabel->last_unit = raidPtr->raidid;
2900
2901 raidflush_component_label(raidPtr, sparecol);
2902 if (final == RF_FINAL_COMPONENT_UPDATE) {
2903 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2904 raidmarkclean(raidPtr, sparecol);
2905 }
2906 }
2907 }
2908 }
2909 }
2910
2911 void
2912 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2913 {
2914
2915 if (vp != NULL) {
2916 if (auto_configured == 1) {
2917 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2918 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2919 vput(vp);
2920
2921 } else {
2922 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2923 }
2924 }
2925 }
2926
2927
2928 void
2929 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2930 {
2931 int r,c;
2932 struct vnode *vp;
2933 int acd;
2934
2935
2936 /* We take this opportunity to close the vnodes like we should.. */
2937
2938 for (c = 0; c < raidPtr->numCol; c++) {
2939 vp = raidPtr->raid_cinfo[c].ci_vp;
2940 acd = raidPtr->Disks[c].auto_configured;
2941 rf_close_component(raidPtr, vp, acd);
2942 raidPtr->raid_cinfo[c].ci_vp = NULL;
2943 raidPtr->Disks[c].auto_configured = 0;
2944 }
2945
2946 for (r = 0; r < raidPtr->numSpare; r++) {
2947 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2948 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2949 rf_close_component(raidPtr, vp, acd);
2950 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2951 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2952 }
2953 }
2954
2955
2956 void
2957 rf_ReconThread(struct rf_recon_req *req)
2958 {
2959 int s;
2960 RF_Raid_t *raidPtr;
2961
2962 s = splbio();
2963 raidPtr = (RF_Raid_t *) req->raidPtr;
2964 raidPtr->recon_in_progress = 1;
2965
2966 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2967 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2968
2969 RF_Free(req, sizeof(*req));
2970
2971 raidPtr->recon_in_progress = 0;
2972 splx(s);
2973
2974 /* That's all... */
2975 kthread_exit(0); /* does not return */
2976 }
2977
2978 void
2979 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2980 {
2981 int retcode;
2982 int s;
2983
2984 raidPtr->parity_rewrite_stripes_done = 0;
2985 raidPtr->parity_rewrite_in_progress = 1;
2986 s = splbio();
2987 retcode = rf_RewriteParity(raidPtr);
2988 splx(s);
2989 if (retcode) {
2990 printf("raid%d: Error re-writing parity (%d)!\n",
2991 raidPtr->raidid, retcode);
2992 } else {
2993 /* set the clean bit! If we shutdown correctly,
2994 the clean bit on each component label will get
2995 set */
2996 raidPtr->parity_good = RF_RAID_CLEAN;
2997 }
2998 raidPtr->parity_rewrite_in_progress = 0;
2999
3000 /* Anyone waiting for us to stop? If so, inform them... */
3001 if (raidPtr->waitShutdown) {
3002 wakeup(&raidPtr->parity_rewrite_in_progress);
3003 }
3004
3005 /* That's all... */
3006 kthread_exit(0); /* does not return */
3007 }
3008
3009
3010 void
3011 rf_CopybackThread(RF_Raid_t *raidPtr)
3012 {
3013 int s;
3014
3015 raidPtr->copyback_in_progress = 1;
3016 s = splbio();
3017 rf_CopybackReconstructedData(raidPtr);
3018 splx(s);
3019 raidPtr->copyback_in_progress = 0;
3020
3021 /* That's all... */
3022 kthread_exit(0); /* does not return */
3023 }
3024
3025
3026 void
3027 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
3028 {
3029 int s;
3030 RF_Raid_t *raidPtr;
3031
3032 s = splbio();
3033 raidPtr = req->raidPtr;
3034 raidPtr->recon_in_progress = 1;
3035 rf_ReconstructInPlace(raidPtr, req->col);
3036 RF_Free(req, sizeof(*req));
3037 raidPtr->recon_in_progress = 0;
3038 splx(s);
3039
3040 /* That's all... */
3041 kthread_exit(0); /* does not return */
3042 }
3043
3044 static RF_AutoConfig_t *
3045 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
3046 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
3047 unsigned secsize)
3048 {
3049 int good_one = 0;
3050 RF_ComponentLabel_t *clabel;
3051 RF_AutoConfig_t *ac;
3052
3053 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
3054 if (clabel == NULL) {
3055 oomem:
3056 while(ac_list) {
3057 ac = ac_list;
3058 if (ac->clabel)
3059 free(ac->clabel, M_RAIDFRAME);
3060 ac_list = ac_list->next;
3061 free(ac, M_RAIDFRAME);
3062 }
3063 printf("RAID auto config: out of memory!\n");
3064 return NULL; /* XXX probably should panic? */
3065 }
3066
3067 if (!raidread_component_label(secsize, dev, vp, clabel)) {
3068 /* Got the label. Does it look reasonable? */
3069 if (rf_reasonable_label(clabel, numsecs) &&
3070 (rf_component_label_partitionsize(clabel) <= size)) {
3071 #ifdef DEBUG
3072 printf("Component on: %s: %llu\n",
3073 cname, (unsigned long long)size);
3074 rf_print_component_label(clabel);
3075 #endif
3076 /* if it's reasonable, add it, else ignore it. */
3077 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
3078 M_NOWAIT);
3079 if (ac == NULL) {
3080 free(clabel, M_RAIDFRAME);
3081 goto oomem;
3082 }
3083 strlcpy(ac->devname, cname, sizeof(ac->devname));
3084 ac->dev = dev;
3085 ac->vp = vp;
3086 ac->clabel = clabel;
3087 ac->next = ac_list;
3088 ac_list = ac;
3089 good_one = 1;
3090 }
3091 }
3092 if (!good_one) {
3093 /* cleanup */
3094 free(clabel, M_RAIDFRAME);
3095 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3096 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3097 vput(vp);
3098 }
3099 return ac_list;
3100 }
3101
3102 RF_AutoConfig_t *
3103 rf_find_raid_components(void)
3104 {
3105 struct vnode *vp;
3106 struct disklabel label;
3107 device_t dv;
3108 deviter_t di;
3109 dev_t dev;
3110 int bmajor, bminor, wedge, rf_part_found;
3111 int error;
3112 int i;
3113 RF_AutoConfig_t *ac_list;
3114 uint64_t numsecs;
3115 unsigned secsize;
3116
3117 /* initialize the AutoConfig list */
3118 ac_list = NULL;
3119
3120 /* we begin by trolling through *all* the devices on the system */
3121
3122 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
3123 dv = deviter_next(&di)) {
3124
3125 /* we are only interested in disks... */
3126 if (device_class(dv) != DV_DISK)
3127 continue;
3128
3129 /* we don't care about floppies... */
3130 if (device_is_a(dv, "fd")) {
3131 continue;
3132 }
3133
3134 /* we don't care about CD's... */
3135 if (device_is_a(dv, "cd")) {
3136 continue;
3137 }
3138
3139 /* we don't care about md's... */
3140 if (device_is_a(dv, "md")) {
3141 continue;
3142 }
3143
3144 /* hdfd is the Atari/Hades floppy driver */
3145 if (device_is_a(dv, "hdfd")) {
3146 continue;
3147 }
3148
3149 /* fdisa is the Atari/Milan floppy driver */
3150 if (device_is_a(dv, "fdisa")) {
3151 continue;
3152 }
3153
3154 /* need to find the device_name_to_block_device_major stuff */
3155 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
3156
3157 rf_part_found = 0; /*No raid partition as yet*/
3158
3159 /* get a vnode for the raw partition of this disk */
3160
3161 wedge = device_is_a(dv, "dk");
3162 bminor = minor(device_unit(dv));
3163 dev = wedge ? makedev(bmajor, bminor) :
3164 MAKEDISKDEV(bmajor, bminor, RAW_PART);
3165 if (bdevvp(dev, &vp))
3166 panic("RAID can't alloc vnode");
3167
3168 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
3169
3170 if (error) {
3171 /* "Who cares." Continue looking
3172 for something that exists*/
3173 vput(vp);
3174 continue;
3175 }
3176
3177 error = getdisksize(vp, &numsecs, &secsize);
3178 if (error) {
3179 vput(vp);
3180 continue;
3181 }
3182 if (wedge) {
3183 struct dkwedge_info dkw;
3184 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
3185 NOCRED);
3186 if (error) {
3187 printf("RAIDframe: can't get wedge info for "
3188 "dev %s (%d)\n", device_xname(dv), error);
3189 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3190 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3191 vput(vp);
3192 continue;
3193 }
3194
3195 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
3196 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3197 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3198 vput(vp);
3199 continue;
3200 }
3201
3202 ac_list = rf_get_component(ac_list, dev, vp,
3203 device_xname(dv), dkw.dkw_size, numsecs, secsize);
3204 rf_part_found = 1; /*There is a raid component on this disk*/
3205 continue;
3206 }
3207
3208 /* Ok, the disk exists. Go get the disklabel. */
3209 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
3210 if (error) {
3211 /*
3212 * XXX can't happen - open() would
3213 * have errored out (or faked up one)
3214 */
3215 if (error != ENOTTY)
3216 printf("RAIDframe: can't get label for dev "
3217 "%s (%d)\n", device_xname(dv), error);
3218 }
3219
3220 /* don't need this any more. We'll allocate it again
3221 a little later if we really do... */
3222 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3223 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3224 vput(vp);
3225
3226 if (error)
3227 continue;
3228
3229 rf_part_found = 0; /*No raid partitions yet*/
3230 for (i = 0; i < label.d_npartitions; i++) {
3231 char cname[sizeof(ac_list->devname)];
3232
3233 /* We only support partitions marked as RAID */
3234 if (label.d_partitions[i].p_fstype != FS_RAID)
3235 continue;
3236
3237 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
3238 if (bdevvp(dev, &vp))
3239 panic("RAID can't alloc vnode");
3240
3241 error = VOP_OPEN(vp, FREAD, NOCRED);
3242 if (error) {
3243 /* Whatever... */
3244 vput(vp);
3245 continue;
3246 }
3247 snprintf(cname, sizeof(cname), "%s%c",
3248 device_xname(dv), 'a' + i);
3249 ac_list = rf_get_component(ac_list, dev, vp, cname,
3250 label.d_partitions[i].p_size, numsecs, secsize);
3251 rf_part_found = 1; /*There is at least one raid partition on this disk*/
3252 }
3253
3254 /*
3255 *If there is no raid component on this disk, either in a
3256 *disklabel or inside a wedge, check the raw partition as well,
3257 *as it is possible to configure raid components on raw disk
3258 *devices.
3259 */
3260
3261 if (!rf_part_found) {
3262 char cname[sizeof(ac_list->devname)];
3263
3264 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
3265 if (bdevvp(dev, &vp))
3266 panic("RAID can't alloc vnode");
3267
3268 error = VOP_OPEN(vp, FREAD, NOCRED);
3269 if (error) {
3270 /* Whatever... */
3271 vput(vp);
3272 continue;
3273 }
3274 snprintf(cname, sizeof(cname), "%s%c",
3275 device_xname(dv), 'a' + RAW_PART);
3276 ac_list = rf_get_component(ac_list, dev, vp, cname,
3277 label.d_partitions[RAW_PART].p_size, numsecs, secsize);
3278 }
3279 }
3280 deviter_release(&di);
3281 return ac_list;
3282 }
3283
3284
3285 int
3286 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3287 {
3288
3289 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
3290 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
3291 ((clabel->clean == RF_RAID_CLEAN) ||
3292 (clabel->clean == RF_RAID_DIRTY)) &&
3293 clabel->row >=0 &&
3294 clabel->column >= 0 &&
3295 clabel->num_rows > 0 &&
3296 clabel->num_columns > 0 &&
3297 clabel->row < clabel->num_rows &&
3298 clabel->column < clabel->num_columns &&
3299 clabel->blockSize > 0 &&
3300 /*
3301 * numBlocksHi may contain garbage, but it is ok since
3302 * the type is unsigned. If it is really garbage,
3303 * rf_fix_old_label_size() will fix it.
3304 */
3305 rf_component_label_numblocks(clabel) > 0) {
3306 /*
3307 * label looks reasonable enough...
3308 * let's make sure it has no old garbage.
3309 */
3310 if (numsecs)
3311 rf_fix_old_label_size(clabel, numsecs);
3312 return(1);
3313 }
3314 return(0);
3315 }
3316
3317
3318 /*
3319 * For reasons yet unknown, some old component labels have garbage in
3320 * the newer numBlocksHi region, and this causes lossage. Since those
3321 * disks will also have numsecs set to less than 32 bits of sectors,
3322 * we can determine when this corruption has occurred, and fix it.
3323 *
3324 * The exact same problem, with the same unknown reason, happens to
3325 * the partitionSizeHi member as well.
3326 */
3327 static void
3328 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3329 {
3330
3331 if (numsecs < ((uint64_t)1 << 32)) {
3332 if (clabel->numBlocksHi) {
3333 printf("WARNING: total sectors < 32 bits, yet "
3334 "numBlocksHi set\n"
3335 "WARNING: resetting numBlocksHi to zero.\n");
3336 clabel->numBlocksHi = 0;
3337 }
3338
3339 if (clabel->partitionSizeHi) {
3340 printf("WARNING: total sectors < 32 bits, yet "
3341 "partitionSizeHi set\n"
3342 "WARNING: resetting partitionSizeHi to zero.\n");
3343 clabel->partitionSizeHi = 0;
3344 }
3345 }
3346 }
3347
3348
3349 #ifdef DEBUG
3350 void
3351 rf_print_component_label(RF_ComponentLabel_t *clabel)
3352 {
3353 uint64_t numBlocks;
3354 static const char *rp[] = {
3355 "No", "Force", "Soft", "*invalid*"
3356 };
3357
3358
3359 numBlocks = rf_component_label_numblocks(clabel);
3360
3361 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3362 clabel->row, clabel->column,
3363 clabel->num_rows, clabel->num_columns);
3364 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3365 clabel->version, clabel->serial_number,
3366 clabel->mod_counter);
3367 printf(" Clean: %s Status: %d\n",
3368 clabel->clean ? "Yes" : "No", clabel->status);
3369 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3370 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3371 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3372 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3373 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3374 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]);
3375 printf(" Last configured as: raid%d\n", clabel->last_unit);
3376 #if 0
3377 printf(" Config order: %d\n", clabel->config_order);
3378 #endif
3379
3380 }
3381 #endif
3382
3383 RF_ConfigSet_t *
3384 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3385 {
3386 RF_AutoConfig_t *ac;
3387 RF_ConfigSet_t *config_sets;
3388 RF_ConfigSet_t *cset;
3389 RF_AutoConfig_t *ac_next;
3390
3391
3392 config_sets = NULL;
3393
3394 /* Go through the AutoConfig list, and figure out which components
3395 belong to what sets. */
3396 ac = ac_list;
3397 while(ac!=NULL) {
3398 /* we're going to putz with ac->next, so save it here
3399 for use at the end of the loop */
3400 ac_next = ac->next;
3401
3402 if (config_sets == NULL) {
3403 /* will need at least this one... */
3404 config_sets = (RF_ConfigSet_t *)
3405 malloc(sizeof(RF_ConfigSet_t),
3406 M_RAIDFRAME, M_NOWAIT);
3407 if (config_sets == NULL) {
3408 panic("rf_create_auto_sets: No memory!");
3409 }
3410 /* this one is easy :) */
3411 config_sets->ac = ac;
3412 config_sets->next = NULL;
3413 config_sets->rootable = 0;
3414 ac->next = NULL;
3415 } else {
3416 /* which set does this component fit into? */
3417 cset = config_sets;
3418 while(cset!=NULL) {
3419 if (rf_does_it_fit(cset, ac)) {
3420 /* looks like it matches... */
3421 ac->next = cset->ac;
3422 cset->ac = ac;
3423 break;
3424 }
3425 cset = cset->next;
3426 }
3427 if (cset==NULL) {
3428 /* didn't find a match above... new set..*/
3429 cset = (RF_ConfigSet_t *)
3430 malloc(sizeof(RF_ConfigSet_t),
3431 M_RAIDFRAME, M_NOWAIT);
3432 if (cset == NULL) {
3433 panic("rf_create_auto_sets: No memory!");
3434 }
3435 cset->ac = ac;
3436 ac->next = NULL;
3437 cset->next = config_sets;
3438 cset->rootable = 0;
3439 config_sets = cset;
3440 }
3441 }
3442 ac = ac_next;
3443 }
3444
3445
3446 return(config_sets);
3447 }
3448
3449 static int
3450 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3451 {
3452 RF_ComponentLabel_t *clabel1, *clabel2;
3453
3454 /* If this one matches the *first* one in the set, that's good
3455 enough, since the other members of the set would have been
3456 through here too... */
3457 /* note that we are not checking partitionSize here..
3458
3459 Note that we are also not checking the mod_counters here.
3460 If everything else matches except the mod_counter, that's
3461 good enough for this test. We will deal with the mod_counters
3462 a little later in the autoconfiguration process.
3463
3464 (clabel1->mod_counter == clabel2->mod_counter) &&
3465
3466 The reason we don't check for this is that failed disks
3467 will have lower modification counts. If those disks are
3468 not added to the set they used to belong to, then they will
3469 form their own set, which may result in 2 different sets,
3470 for example, competing to be configured at raid0, and
3471 perhaps competing to be the root filesystem set. If the
3472 wrong ones get configured, or both attempt to become /,
3473 weird behaviour and or serious lossage will occur. Thus we
3474 need to bring them into the fold here, and kick them out at
3475 a later point.
3476
3477 */
3478
3479 clabel1 = cset->ac->clabel;
3480 clabel2 = ac->clabel;
3481 if ((clabel1->version == clabel2->version) &&
3482 (clabel1->serial_number == clabel2->serial_number) &&
3483 (clabel1->num_rows == clabel2->num_rows) &&
3484 (clabel1->num_columns == clabel2->num_columns) &&
3485 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3486 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3487 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3488 (clabel1->parityConfig == clabel2->parityConfig) &&
3489 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3490 (clabel1->blockSize == clabel2->blockSize) &&
3491 rf_component_label_numblocks(clabel1) ==
3492 rf_component_label_numblocks(clabel2) &&
3493 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3494 (clabel1->root_partition == clabel2->root_partition) &&
3495 (clabel1->last_unit == clabel2->last_unit) &&
3496 (clabel1->config_order == clabel2->config_order)) {
3497 /* if it get's here, it almost *has* to be a match */
3498 } else {
3499 /* it's not consistent with somebody in the set..
3500 punt */
3501 return(0);
3502 }
3503 /* all was fine.. it must fit... */
3504 return(1);
3505 }
3506
3507 int
3508 rf_have_enough_components(RF_ConfigSet_t *cset)
3509 {
3510 RF_AutoConfig_t *ac;
3511 RF_AutoConfig_t *auto_config;
3512 RF_ComponentLabel_t *clabel;
3513 int c;
3514 int num_cols;
3515 int num_missing;
3516 int mod_counter;
3517 int mod_counter_found;
3518 int even_pair_failed;
3519 char parity_type;
3520
3521
3522 /* check to see that we have enough 'live' components
3523 of this set. If so, we can configure it if necessary */
3524
3525 num_cols = cset->ac->clabel->num_columns;
3526 parity_type = cset->ac->clabel->parityConfig;
3527
3528 /* XXX Check for duplicate components!?!?!? */
3529
3530 /* Determine what the mod_counter is supposed to be for this set. */
3531
3532 mod_counter_found = 0;
3533 mod_counter = 0;
3534 ac = cset->ac;
3535 while(ac!=NULL) {
3536 if (mod_counter_found==0) {
3537 mod_counter = ac->clabel->mod_counter;
3538 mod_counter_found = 1;
3539 } else {
3540 if (ac->clabel->mod_counter > mod_counter) {
3541 mod_counter = ac->clabel->mod_counter;
3542 }
3543 }
3544 ac = ac->next;
3545 }
3546
3547 num_missing = 0;
3548 auto_config = cset->ac;
3549
3550 even_pair_failed = 0;
3551 for(c=0; c<num_cols; c++) {
3552 ac = auto_config;
3553 while(ac!=NULL) {
3554 if ((ac->clabel->column == c) &&
3555 (ac->clabel->mod_counter == mod_counter)) {
3556 /* it's this one... */
3557 #ifdef DEBUG
3558 printf("Found: %s at %d\n",
3559 ac->devname,c);
3560 #endif
3561 break;
3562 }
3563 ac=ac->next;
3564 }
3565 if (ac==NULL) {
3566 /* Didn't find one here! */
3567 /* special case for RAID 1, especially
3568 where there are more than 2
3569 components (where RAIDframe treats
3570 things a little differently :( ) */
3571 if (parity_type == '1') {
3572 if (c%2 == 0) { /* even component */
3573 even_pair_failed = 1;
3574 } else { /* odd component. If
3575 we're failed, and
3576 so is the even
3577 component, it's
3578 "Good Night, Charlie" */
3579 if (even_pair_failed == 1) {
3580 return(0);
3581 }
3582 }
3583 } else {
3584 /* normal accounting */
3585 num_missing++;
3586 }
3587 }
3588 if ((parity_type == '1') && (c%2 == 1)) {
3589 /* Just did an even component, and we didn't
3590 bail.. reset the even_pair_failed flag,
3591 and go on to the next component.... */
3592 even_pair_failed = 0;
3593 }
3594 }
3595
3596 clabel = cset->ac->clabel;
3597
3598 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3599 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3600 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3601 /* XXX this needs to be made *much* more general */
3602 /* Too many failures */
3603 return(0);
3604 }
3605 /* otherwise, all is well, and we've got enough to take a kick
3606 at autoconfiguring this set */
3607 return(1);
3608 }
3609
3610 void
3611 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3612 RF_Raid_t *raidPtr)
3613 {
3614 RF_ComponentLabel_t *clabel;
3615 int i;
3616
3617 clabel = ac->clabel;
3618
3619 /* 1. Fill in the common stuff */
3620 config->numRow = clabel->num_rows = 1;
3621 config->numCol = clabel->num_columns;
3622 config->numSpare = 0; /* XXX should this be set here? */
3623 config->sectPerSU = clabel->sectPerSU;
3624 config->SUsPerPU = clabel->SUsPerPU;
3625 config->SUsPerRU = clabel->SUsPerRU;
3626 config->parityConfig = clabel->parityConfig;
3627 /* XXX... */
3628 strcpy(config->diskQueueType,"fifo");
3629 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3630 config->layoutSpecificSize = 0; /* XXX ?? */
3631
3632 while(ac!=NULL) {
3633 /* row/col values will be in range due to the checks
3634 in reasonable_label() */
3635 strcpy(config->devnames[0][ac->clabel->column],
3636 ac->devname);
3637 ac = ac->next;
3638 }
3639
3640 for(i=0;i<RF_MAXDBGV;i++) {
3641 config->debugVars[i][0] = 0;
3642 }
3643 }
3644
3645 int
3646 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3647 {
3648 RF_ComponentLabel_t *clabel;
3649 int column;
3650 int sparecol;
3651
3652 raidPtr->autoconfigure = new_value;
3653
3654 for(column=0; column<raidPtr->numCol; column++) {
3655 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3656 clabel = raidget_component_label(raidPtr, column);
3657 clabel->autoconfigure = new_value;
3658 raidflush_component_label(raidPtr, column);
3659 }
3660 }
3661 for(column = 0; column < raidPtr->numSpare ; column++) {
3662 sparecol = raidPtr->numCol + column;
3663 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3664 clabel = raidget_component_label(raidPtr, sparecol);
3665 clabel->autoconfigure = new_value;
3666 raidflush_component_label(raidPtr, sparecol);
3667 }
3668 }
3669 return(new_value);
3670 }
3671
3672 int
3673 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3674 {
3675 RF_ComponentLabel_t *clabel;
3676 int column;
3677 int sparecol;
3678
3679 raidPtr->root_partition = new_value;
3680 for(column=0; column<raidPtr->numCol; column++) {
3681 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3682 clabel = raidget_component_label(raidPtr, column);
3683 clabel->root_partition = new_value;
3684 raidflush_component_label(raidPtr, column);
3685 }
3686 }
3687 for(column = 0; column < raidPtr->numSpare ; column++) {
3688 sparecol = raidPtr->numCol + column;
3689 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3690 clabel = raidget_component_label(raidPtr, sparecol);
3691 clabel->root_partition = new_value;
3692 raidflush_component_label(raidPtr, sparecol);
3693 }
3694 }
3695 return(new_value);
3696 }
3697
3698 void
3699 rf_release_all_vps(RF_ConfigSet_t *cset)
3700 {
3701 RF_AutoConfig_t *ac;
3702
3703 ac = cset->ac;
3704 while(ac!=NULL) {
3705 /* Close the vp, and give it back */
3706 if (ac->vp) {
3707 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3708 VOP_CLOSE(ac->vp, FREAD, NOCRED);
3709 vput(ac->vp);
3710 ac->vp = NULL;
3711 }
3712 ac = ac->next;
3713 }
3714 }
3715
3716
3717 void
3718 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3719 {
3720 RF_AutoConfig_t *ac;
3721 RF_AutoConfig_t *next_ac;
3722
3723 ac = cset->ac;
3724 while(ac!=NULL) {
3725 next_ac = ac->next;
3726 /* nuke the label */
3727 free(ac->clabel, M_RAIDFRAME);
3728 /* cleanup the config structure */
3729 free(ac, M_RAIDFRAME);
3730 /* "next.." */
3731 ac = next_ac;
3732 }
3733 /* and, finally, nuke the config set */
3734 free(cset, M_RAIDFRAME);
3735 }
3736
3737
3738 void
3739 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3740 {
3741 /* current version number */
3742 clabel->version = RF_COMPONENT_LABEL_VERSION;
3743 clabel->serial_number = raidPtr->serial_number;
3744 clabel->mod_counter = raidPtr->mod_counter;
3745
3746 clabel->num_rows = 1;
3747 clabel->num_columns = raidPtr->numCol;
3748 clabel->clean = RF_RAID_DIRTY; /* not clean */
3749 clabel->status = rf_ds_optimal; /* "It's good!" */
3750
3751 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3752 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3753 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3754
3755 clabel->blockSize = raidPtr->bytesPerSector;
3756 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3757
3758 /* XXX not portable */
3759 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3760 clabel->maxOutstanding = raidPtr->maxOutstanding;
3761 clabel->autoconfigure = raidPtr->autoconfigure;
3762 clabel->root_partition = raidPtr->root_partition;
3763 clabel->last_unit = raidPtr->raidid;
3764 clabel->config_order = raidPtr->config_order;
3765
3766 #ifndef RF_NO_PARITY_MAP
3767 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3768 #endif
3769 }
3770
3771 struct raid_softc *
3772 rf_auto_config_set(RF_ConfigSet_t *cset)
3773 {
3774 RF_Raid_t *raidPtr;
3775 RF_Config_t *config;
3776 int raidID;
3777 struct raid_softc *sc;
3778
3779 #ifdef DEBUG
3780 printf("RAID autoconfigure\n");
3781 #endif
3782
3783 /* 1. Create a config structure */
3784 config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO);
3785 if (config == NULL) {
3786 printf("%s: Out of mem - config!?!?\n", __func__);
3787 /* XXX do something more intelligent here. */
3788 return NULL;
3789 }
3790
3791 /*
3792 2. Figure out what RAID ID this one is supposed to live at
3793 See if we can get the same RAID dev that it was configured
3794 on last time..
3795 */
3796
3797 raidID = cset->ac->clabel->last_unit;
3798 for (sc = raidget(raidID, false); sc && sc->sc_r.valid != 0;
3799 sc = raidget(++raidID, false))
3800 continue;
3801 #ifdef DEBUG
3802 printf("Configuring raid%d:\n",raidID);
3803 #endif
3804
3805 if (sc == NULL)
3806 sc = raidget(raidID, true);
3807 if (sc == NULL) {
3808 printf("%s: Out of mem - softc!?!?\n", __func__);
3809 /* XXX do something more intelligent here. */
3810 free(config, M_RAIDFRAME);
3811 return NULL;
3812 }
3813
3814 raidPtr = &sc->sc_r;
3815
3816 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3817 raidPtr->softc = sc;
3818 raidPtr->raidid = raidID;
3819 raidPtr->openings = RAIDOUTSTANDING;
3820
3821 /* 3. Build the configuration structure */
3822 rf_create_configuration(cset->ac, config, raidPtr);
3823
3824 /* 4. Do the configuration */
3825 if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3826 raidinit(sc);
3827
3828 rf_markalldirty(raidPtr);
3829 raidPtr->autoconfigure = 1; /* XXX do this here? */
3830 switch (cset->ac->clabel->root_partition) {
3831 case 1: /* Force Root */
3832 case 2: /* Soft Root: root when boot partition part of raid */
3833 /*
3834 * everything configured just fine. Make a note
3835 * that this set is eligible to be root,
3836 * or forced to be root
3837 */
3838 cset->rootable = cset->ac->clabel->root_partition;
3839 /* XXX do this here? */
3840 raidPtr->root_partition = cset->rootable;
3841 break;
3842 default:
3843 break;
3844 }
3845 } else {
3846 raidput(sc);
3847 sc = NULL;
3848 }
3849
3850 /* 5. Cleanup */
3851 free(config, M_RAIDFRAME);
3852 return sc;
3853 }
3854
3855 void
3856 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3857 {
3858 struct buf *bp;
3859 struct raid_softc *rs;
3860
3861 bp = (struct buf *)desc->bp;
3862 rs = desc->raidPtr->softc;
3863 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid),
3864 (bp->b_flags & B_READ));
3865 }
3866
3867 void
3868 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3869 size_t xmin, size_t xmax)
3870 {
3871 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3872 pool_sethiwat(p, xmax);
3873 pool_prime(p, xmin);
3874 pool_setlowat(p, xmin);
3875 }
3876
3877 /*
3878 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buf_queue to see
3879 * if there is IO pending and if that IO could possibly be done for a
3880 * given RAID set. Returns 0 if IO is waiting and can be done, 1
3881 * otherwise.
3882 *
3883 */
3884
3885 int
3886 rf_buf_queue_check(RF_Raid_t *raidPtr)
3887 {
3888 struct raid_softc *rs = raidPtr->softc;
3889 if ((bufq_peek(rs->buf_queue) != NULL) && raidPtr->openings > 0) {
3890 /* there is work to do */
3891 return 0;
3892 }
3893 /* default is nothing to do */
3894 return 1;
3895 }
3896
3897 int
3898 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3899 {
3900 uint64_t numsecs;
3901 unsigned secsize;
3902 int error;
3903
3904 error = getdisksize(vp, &numsecs, &secsize);
3905 if (error == 0) {
3906 diskPtr->blockSize = secsize;
3907 diskPtr->numBlocks = numsecs - rf_protectedSectors;
3908 diskPtr->partitionSize = numsecs;
3909 return 0;
3910 }
3911 return error;
3912 }
3913
3914 static int
3915 raid_match(device_t self, cfdata_t cfdata, void *aux)
3916 {
3917 return 1;
3918 }
3919
3920 static void
3921 raid_attach(device_t parent, device_t self, void *aux)
3922 {
3923
3924 }
3925
3926
3927 static int
3928 raid_detach(device_t self, int flags)
3929 {
3930 int error;
3931 struct raid_softc *rs = raidget(device_unit(self), false);
3932
3933 if (rs == NULL)
3934 return ENXIO;
3935
3936 if ((error = raidlock(rs)) != 0)
3937 return (error);
3938
3939 error = raid_detach_unlocked(rs);
3940
3941 if (error != 0)
3942 raidunlock(rs);
3943
3944 return error;
3945 }
3946
3947 static void
3948 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
3949 {
3950 struct disk_geom *dg = &rs->sc_dkdev.dk_geom;
3951
3952 memset(dg, 0, sizeof(*dg));
3953
3954 dg->dg_secperunit = raidPtr->totalSectors;
3955 dg->dg_secsize = raidPtr->bytesPerSector;
3956 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
3957 dg->dg_ntracks = 4 * raidPtr->numCol;
3958
3959 disk_set_info(rs->sc_dev, &rs->sc_dkdev, NULL);
3960 }
3961
3962 /*
3963 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3964 * We end up returning whatever error was returned by the first cache flush
3965 * that fails.
3966 */
3967
3968 int
3969 rf_sync_component_caches(RF_Raid_t *raidPtr)
3970 {
3971 int c, sparecol;
3972 int e,error;
3973 int force = 1;
3974
3975 error = 0;
3976 for (c = 0; c < raidPtr->numCol; c++) {
3977 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3978 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3979 &force, FWRITE, NOCRED);
3980 if (e) {
3981 if (e != ENODEV)
3982 printf("raid%d: cache flush to component %s failed.\n",
3983 raidPtr->raidid, raidPtr->Disks[c].devname);
3984 if (error == 0) {
3985 error = e;
3986 }
3987 }
3988 }
3989 }
3990
3991 for( c = 0; c < raidPtr->numSpare ; c++) {
3992 sparecol = raidPtr->numCol + c;
3993 /* Need to ensure that the reconstruct actually completed! */
3994 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3995 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3996 DIOCCACHESYNC, &force, FWRITE, NOCRED);
3997 if (e) {
3998 if (e != ENODEV)
3999 printf("raid%d: cache flush to component %s failed.\n",
4000 raidPtr->raidid, raidPtr->Disks[sparecol].devname);
4001 if (error == 0) {
4002 error = e;
4003 }
4004 }
4005 }
4006 }
4007 return error;
4008 }
4009
4010 /*
4011 * Module interface
4012 */
4013
4014 MODULE(MODULE_CLASS_DRIVER, raid, "dk_subr");
4015
4016 #ifdef _MODULE
4017 CFDRIVER_DECL(raid, DV_DISK, NULL);
4018 #endif
4019
4020 static int raid_modcmd(modcmd_t, void *);
4021 static int raid_modcmd_init(void);
4022 static int raid_modcmd_fini(void);
4023
4024 static int
4025 raid_modcmd(modcmd_t cmd, void *data)
4026 {
4027 int error;
4028
4029 error = 0;
4030 switch (cmd) {
4031 case MODULE_CMD_INIT:
4032 error = raid_modcmd_init();
4033 break;
4034 case MODULE_CMD_FINI:
4035 error = raid_modcmd_fini();
4036 break;
4037 default:
4038 error = ENOTTY;
4039 break;
4040 }
4041 return error;
4042 }
4043
4044 static int
4045 raid_modcmd_init(void)
4046 {
4047 int error;
4048 int bmajor, cmajor;
4049
4050 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
4051 mutex_enter(&raid_lock);
4052 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
4053 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
4054 rf_init_cond2(rf_sparet_wait_cv, "sparetw");
4055 rf_init_cond2(rf_sparet_resp_cv, "rfgst");
4056
4057 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
4058 #endif
4059
4060 bmajor = cmajor = -1;
4061 error = devsw_attach("raid", &raid_bdevsw, &bmajor,
4062 &raid_cdevsw, &cmajor);
4063 if (error != 0 && error != EEXIST) {
4064 aprint_error("%s: devsw_attach failed %d\n", __func__, error);
4065 mutex_exit(&raid_lock);
4066 return error;
4067 }
4068 #ifdef _MODULE
4069 error = config_cfdriver_attach(&raid_cd);
4070 if (error != 0) {
4071 aprint_error("%s: config_cfdriver_attach failed %d\n",
4072 __func__, error);
4073 devsw_detach(&raid_bdevsw, &raid_cdevsw);
4074 mutex_exit(&raid_lock);
4075 return error;
4076 }
4077 #endif
4078 error = config_cfattach_attach(raid_cd.cd_name, &raid_ca);
4079 if (error != 0) {
4080 aprint_error("%s: config_cfattach_attach failed %d\n",
4081 __func__, error);
4082 #ifdef _MODULE
4083 config_cfdriver_detach(&raid_cd);
4084 #endif
4085 devsw_detach(&raid_bdevsw, &raid_cdevsw);
4086 mutex_exit(&raid_lock);
4087 return error;
4088 }
4089
4090 raidautoconfigdone = false;
4091
4092 mutex_exit(&raid_lock);
4093
4094 if (error == 0) {
4095 if (rf_BootRaidframe(true) == 0)
4096 aprint_verbose("Kernelized RAIDframe activated\n");
4097 else
4098 panic("Serious error activating RAID!!");
4099 }
4100
4101 /*
4102 * Register a finalizer which will be used to auto-config RAID
4103 * sets once all real hardware devices have been found.
4104 */
4105 error = config_finalize_register(NULL, rf_autoconfig);
4106 if (error != 0) {
4107 aprint_error("WARNING: unable to register RAIDframe "
4108 "finalizer\n");
4109 error = 0;
4110 }
4111
4112 return error;
4113 }
4114
4115 static int
4116 raid_modcmd_fini(void)
4117 {
4118 int error;
4119
4120 mutex_enter(&raid_lock);
4121
4122 /* Don't allow unload if raid device(s) exist. */
4123 if (!LIST_EMPTY(&raids)) {
4124 mutex_exit(&raid_lock);
4125 return EBUSY;
4126 }
4127
4128 error = config_cfattach_detach(raid_cd.cd_name, &raid_ca);
4129 if (error != 0) {
4130 mutex_exit(&raid_lock);
4131 return error;
4132 }
4133 #ifdef _MODULE
4134 error = config_cfdriver_detach(&raid_cd);
4135 if (error != 0) {
4136 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
4137 mutex_exit(&raid_lock);
4138 return error;
4139 }
4140 #endif
4141 error = devsw_detach(&raid_bdevsw, &raid_cdevsw);
4142 if (error != 0) {
4143 #ifdef _MODULE
4144 config_cfdriver_attach(&raid_cd);
4145 #endif
4146 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
4147 mutex_exit(&raid_lock);
4148 return error;
4149 }
4150 rf_BootRaidframe(false);
4151 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
4152 rf_destroy_mutex2(rf_sparet_wait_mutex);
4153 rf_destroy_cond2(rf_sparet_wait_cv);
4154 rf_destroy_cond2(rf_sparet_resp_cv);
4155 #endif
4156 mutex_exit(&raid_lock);
4157 mutex_destroy(&raid_lock);
4158
4159 return error;
4160 }
4161