rf_netbsdkintf.c revision 1.331 1 /* $NetBSD: rf_netbsdkintf.c,v 1.331 2016/01/02 16:00:01 mlelstv Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: cd.c 1.6 90/11/28$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
89 * School of Computer Science
90 * Carnegie Mellon University
91 * Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97 /***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.331 2016/01/02 16:00:01 mlelstv Exp $");
105
106 #ifdef _KERNEL_OPT
107 #include "opt_compat_netbsd.h"
108 #include "opt_raid_autoconfig.h"
109 #endif
110
111 #include <sys/param.h>
112 #include <sys/errno.h>
113 #include <sys/pool.h>
114 #include <sys/proc.h>
115 #include <sys/queue.h>
116 #include <sys/disk.h>
117 #include <sys/device.h>
118 #include <sys/stat.h>
119 #include <sys/ioctl.h>
120 #include <sys/fcntl.h>
121 #include <sys/systm.h>
122 #include <sys/vnode.h>
123 #include <sys/disklabel.h>
124 #include <sys/conf.h>
125 #include <sys/buf.h>
126 #include <sys/bufq.h>
127 #include <sys/reboot.h>
128 #include <sys/kauth.h>
129 #include <sys/module.h>
130
131 #include <prop/proplib.h>
132
133 #include <dev/raidframe/raidframevar.h>
134 #include <dev/raidframe/raidframeio.h>
135 #include <dev/raidframe/rf_paritymap.h>
136
137 #include "rf_raid.h"
138 #include "rf_copyback.h"
139 #include "rf_dag.h"
140 #include "rf_dagflags.h"
141 #include "rf_desc.h"
142 #include "rf_diskqueue.h"
143 #include "rf_etimer.h"
144 #include "rf_general.h"
145 #include "rf_kintf.h"
146 #include "rf_options.h"
147 #include "rf_driver.h"
148 #include "rf_parityscan.h"
149 #include "rf_threadstuff.h"
150
151 #ifdef COMPAT_50
152 #include "rf_compat50.h"
153 #endif
154
155 #include "ioconf.h"
156
157 #ifdef DEBUG
158 int rf_kdebug_level = 0;
159 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
160 #else /* DEBUG */
161 #define db1_printf(a) { }
162 #endif /* DEBUG */
163
164 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
165 static rf_declare_mutex2(rf_sparet_wait_mutex);
166 static rf_declare_cond2(rf_sparet_wait_cv);
167 static rf_declare_cond2(rf_sparet_resp_cv);
168
169 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
170 * spare table */
171 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
172 * installation process */
173 #endif
174
175 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
176
177 /* prototypes */
178 static void KernelWakeupFunc(struct buf *);
179 static void InitBP(struct buf *, struct vnode *, unsigned,
180 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
181 void *, int, struct proc *);
182 struct raid_softc;
183 static void raidinit(struct raid_softc *);
184
185 static int raid_match(device_t, cfdata_t, void *);
186 static void raid_attach(device_t, device_t, void *);
187 static int raid_detach(device_t, int);
188
189 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
190 daddr_t, daddr_t);
191 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
192 daddr_t, daddr_t, int);
193
194 static int raidwrite_component_label(unsigned,
195 dev_t, struct vnode *, RF_ComponentLabel_t *);
196 static int raidread_component_label(unsigned,
197 dev_t, struct vnode *, RF_ComponentLabel_t *);
198
199
200 static dev_type_open(raidopen);
201 static dev_type_close(raidclose);
202 static dev_type_read(raidread);
203 static dev_type_write(raidwrite);
204 static dev_type_ioctl(raidioctl);
205 static dev_type_strategy(raidstrategy);
206 static dev_type_dump(raiddump);
207 static dev_type_size(raidsize);
208
209 const struct bdevsw raid_bdevsw = {
210 .d_open = raidopen,
211 .d_close = raidclose,
212 .d_strategy = raidstrategy,
213 .d_ioctl = raidioctl,
214 .d_dump = raiddump,
215 .d_psize = raidsize,
216 .d_discard = nodiscard,
217 .d_flag = D_DISK
218 };
219
220 const struct cdevsw raid_cdevsw = {
221 .d_open = raidopen,
222 .d_close = raidclose,
223 .d_read = raidread,
224 .d_write = raidwrite,
225 .d_ioctl = raidioctl,
226 .d_stop = nostop,
227 .d_tty = notty,
228 .d_poll = nopoll,
229 .d_mmap = nommap,
230 .d_kqfilter = nokqfilter,
231 .d_discard = nodiscard,
232 .d_flag = D_DISK
233 };
234
235 static struct dkdriver rf_dkdriver = {
236 .d_strategy = raidstrategy,
237 .d_minphys = minphys
238 };
239
240 struct raid_softc {
241 device_t sc_dev;
242 int sc_unit;
243 int sc_flags; /* flags */
244 int sc_cflags; /* configuration flags */
245 kmutex_t sc_mutex; /* interlock mutex */
246 kcondvar_t sc_cv; /* and the condvar */
247 uint64_t sc_size; /* size of the raid device */
248 char sc_xname[20]; /* XXX external name */
249 struct disk sc_dkdev; /* generic disk device info */
250 struct bufq_state *buf_queue; /* used for the device queue */
251 RF_Raid_t sc_r;
252 LIST_ENTRY(raid_softc) sc_link;
253 };
254 /* sc_flags */
255 #define RAIDF_INITED 0x01 /* unit has been initialized */
256 #define RAIDF_WLABEL 0x02 /* label area is writable */
257 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
258 #define RAIDF_SHUTDOWN 0x08 /* unit is being shutdown */
259 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
260 #define RAIDF_LOCKED 0x80 /* unit is locked */
261
262 #define raidunit(x) DISKUNIT(x)
263
264 extern struct cfdriver raid_cd;
265 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
266 raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
267 DVF_DETACH_SHUTDOWN);
268
269 /*
270 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
271 * Be aware that large numbers can allow the driver to consume a lot of
272 * kernel memory, especially on writes, and in degraded mode reads.
273 *
274 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
275 * a single 64K write will typically require 64K for the old data,
276 * 64K for the old parity, and 64K for the new parity, for a total
277 * of 192K (if the parity buffer is not re-used immediately).
278 * Even it if is used immediately, that's still 128K, which when multiplied
279 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
280 *
281 * Now in degraded mode, for example, a 64K read on the above setup may
282 * require data reconstruction, which will require *all* of the 4 remaining
283 * disks to participate -- 4 * 32K/disk == 128K again.
284 */
285
286 #ifndef RAIDOUTSTANDING
287 #define RAIDOUTSTANDING 6
288 #endif
289
290 #define RAIDLABELDEV(dev) \
291 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
292
293 /* declared here, and made public, for the benefit of KVM stuff.. */
294
295 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
296 struct disklabel *);
297 static void raidgetdisklabel(dev_t);
298 static void raidmakedisklabel(struct raid_softc *);
299
300 static int raidlock(struct raid_softc *);
301 static void raidunlock(struct raid_softc *);
302
303 static int raid_detach_unlocked(struct raid_softc *);
304
305 static void rf_markalldirty(RF_Raid_t *);
306 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
307
308 void rf_ReconThread(struct rf_recon_req *);
309 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
310 void rf_CopybackThread(RF_Raid_t *raidPtr);
311 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
312 int rf_autoconfig(device_t);
313 void rf_buildroothack(RF_ConfigSet_t *);
314
315 RF_AutoConfig_t *rf_find_raid_components(void);
316 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
317 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
318 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
319 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
320 int rf_set_autoconfig(RF_Raid_t *, int);
321 int rf_set_rootpartition(RF_Raid_t *, int);
322 void rf_release_all_vps(RF_ConfigSet_t *);
323 void rf_cleanup_config_set(RF_ConfigSet_t *);
324 int rf_have_enough_components(RF_ConfigSet_t *);
325 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
326 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
327
328 /*
329 * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
330 * Note that this is overridden by having RAID_AUTOCONFIG as an option
331 * in the kernel config file.
332 */
333 #ifdef RAID_AUTOCONFIG
334 int raidautoconfig = 1;
335 #else
336 int raidautoconfig = 0;
337 #endif
338 static bool raidautoconfigdone = false;
339
340 struct RF_Pools_s rf_pools;
341
342 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
343 static kmutex_t raid_lock;
344
345 static struct raid_softc *
346 raidcreate(int unit) {
347 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
348 if (sc == NULL) {
349 #ifdef DIAGNOSTIC
350 printf("%s: out of memory\n", __func__);
351 #endif
352 return NULL;
353 }
354 sc->sc_unit = unit;
355 bufq_alloc(&sc->buf_queue, "fcfs", BUFQ_SORT_RAWBLOCK);
356 cv_init(&sc->sc_cv, "raidunit");
357 mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_NONE);
358 return sc;
359 }
360
361 static void
362 raiddestroy(struct raid_softc *sc) {
363 cv_destroy(&sc->sc_cv);
364 mutex_destroy(&sc->sc_mutex);
365 bufq_free(sc->buf_queue);
366 kmem_free(sc, sizeof(*sc));
367 }
368
369 static struct raid_softc *
370 raidget(int unit, bool create) {
371 struct raid_softc *sc;
372 if (unit < 0) {
373 #ifdef DIAGNOSTIC
374 panic("%s: unit %d!", __func__, unit);
375 #endif
376 return NULL;
377 }
378 mutex_enter(&raid_lock);
379 LIST_FOREACH(sc, &raids, sc_link) {
380 if (sc->sc_unit == unit) {
381 mutex_exit(&raid_lock);
382 return sc;
383 }
384 }
385 mutex_exit(&raid_lock);
386 if (!create)
387 return NULL;
388 if ((sc = raidcreate(unit)) == NULL)
389 return NULL;
390 mutex_enter(&raid_lock);
391 LIST_INSERT_HEAD(&raids, sc, sc_link);
392 mutex_exit(&raid_lock);
393 return sc;
394 }
395
396 static void
397 raidput(struct raid_softc *sc) {
398 mutex_enter(&raid_lock);
399 LIST_REMOVE(sc, sc_link);
400 mutex_exit(&raid_lock);
401 raiddestroy(sc);
402 }
403
404 void
405 raidattach(int num)
406 {
407
408 /*
409 * Device attachment and associated initialization now occurs
410 * as part of the module initialization.
411 */
412 }
413
414 int
415 rf_autoconfig(device_t self)
416 {
417 RF_AutoConfig_t *ac_list;
418 RF_ConfigSet_t *config_sets;
419
420 if (!raidautoconfig || raidautoconfigdone == true)
421 return (0);
422
423 /* XXX This code can only be run once. */
424 raidautoconfigdone = true;
425
426 #ifdef __HAVE_CPU_BOOTCONF
427 /*
428 * 0. find the boot device if needed first so we can use it later
429 * this needs to be done before we autoconfigure any raid sets,
430 * because if we use wedges we are not going to be able to open
431 * the boot device later
432 */
433 if (booted_device == NULL)
434 cpu_bootconf();
435 #endif
436 /* 1. locate all RAID components on the system */
437 aprint_debug("Searching for RAID components...\n");
438 ac_list = rf_find_raid_components();
439
440 /* 2. Sort them into their respective sets. */
441 config_sets = rf_create_auto_sets(ac_list);
442
443 /*
444 * 3. Evaluate each set and configure the valid ones.
445 * This gets done in rf_buildroothack().
446 */
447 rf_buildroothack(config_sets);
448
449 return 1;
450 }
451
452 static int
453 rf_containsboot(RF_Raid_t *r, device_t bdv) {
454 const char *bootname = device_xname(bdv);
455 size_t len = strlen(bootname);
456
457 for (int col = 0; col < r->numCol; col++) {
458 const char *devname = r->Disks[col].devname;
459 devname += sizeof("/dev/") - 1;
460 if (strncmp(devname, "dk", 2) == 0) {
461 const char *parent =
462 dkwedge_get_parent_name(r->Disks[col].dev);
463 if (parent != NULL)
464 devname = parent;
465 }
466 if (strncmp(devname, bootname, len) == 0) {
467 struct raid_softc *sc = r->softc;
468 aprint_debug("raid%d includes boot device %s\n",
469 sc->sc_unit, devname);
470 return 1;
471 }
472 }
473 return 0;
474 }
475
476 void
477 rf_buildroothack(RF_ConfigSet_t *config_sets)
478 {
479 RF_ConfigSet_t *cset;
480 RF_ConfigSet_t *next_cset;
481 int num_root;
482 struct raid_softc *sc, *rsc;
483
484 sc = rsc = NULL;
485 num_root = 0;
486 cset = config_sets;
487 while (cset != NULL) {
488 next_cset = cset->next;
489 if (rf_have_enough_components(cset) &&
490 cset->ac->clabel->autoconfigure == 1) {
491 sc = rf_auto_config_set(cset);
492 if (sc != NULL) {
493 aprint_debug("raid%d: configured ok\n",
494 sc->sc_unit);
495 if (cset->rootable) {
496 rsc = sc;
497 num_root++;
498 }
499 } else {
500 /* The autoconfig didn't work :( */
501 aprint_debug("Autoconfig failed\n");
502 rf_release_all_vps(cset);
503 }
504 } else {
505 /* we're not autoconfiguring this set...
506 release the associated resources */
507 rf_release_all_vps(cset);
508 }
509 /* cleanup */
510 rf_cleanup_config_set(cset);
511 cset = next_cset;
512 }
513
514 /* if the user has specified what the root device should be
515 then we don't touch booted_device or boothowto... */
516
517 if (rootspec != NULL)
518 return;
519
520 /* we found something bootable... */
521
522 /*
523 * XXX: The following code assumes that the root raid
524 * is the first ('a') partition. This is about the best
525 * we can do with a BSD disklabel, but we might be able
526 * to do better with a GPT label, by setting a specified
527 * attribute to indicate the root partition. We can then
528 * stash the partition number in the r->root_partition
529 * high bits (the bottom 2 bits are already used). For
530 * now we just set booted_partition to 0 when we override
531 * root.
532 */
533 if (num_root == 1) {
534 device_t candidate_root;
535 if (rsc->sc_dkdev.dk_nwedges != 0) {
536 char cname[sizeof(cset->ac->devname)];
537 /* XXX: assume 'a' */
538 snprintf(cname, sizeof(cname), "%s%c",
539 device_xname(rsc->sc_dev), 'a');
540 candidate_root = dkwedge_find_by_wname(cname);
541 } else
542 candidate_root = rsc->sc_dev;
543 if (booted_device == NULL ||
544 rsc->sc_r.root_partition == 1 ||
545 rf_containsboot(&rsc->sc_r, booted_device)) {
546 booted_device = candidate_root;
547 booted_partition = 0; /* XXX assume 'a' */
548 }
549 } else if (num_root > 1) {
550
551 /*
552 * Maybe the MD code can help. If it cannot, then
553 * setroot() will discover that we have no
554 * booted_device and will ask the user if nothing was
555 * hardwired in the kernel config file
556 */
557 if (booted_device == NULL)
558 return;
559
560 num_root = 0;
561 mutex_enter(&raid_lock);
562 LIST_FOREACH(sc, &raids, sc_link) {
563 RF_Raid_t *r = &sc->sc_r;
564 if (r->valid == 0)
565 continue;
566
567 if (r->root_partition == 0)
568 continue;
569
570 if (rf_containsboot(r, booted_device)) {
571 num_root++;
572 rsc = sc;
573 }
574 }
575 mutex_exit(&raid_lock);
576
577 if (num_root == 1) {
578 booted_device = rsc->sc_dev;
579 booted_partition = 0; /* XXX assume 'a' */
580 } else {
581 /* we can't guess.. require the user to answer... */
582 boothowto |= RB_ASKNAME;
583 }
584 }
585 }
586
587 static int
588 raidsize(dev_t dev)
589 {
590 struct raid_softc *rs;
591 struct disklabel *lp;
592 int part, unit, omask, size;
593
594 unit = raidunit(dev);
595 if ((rs = raidget(unit, false)) == NULL)
596 return -1;
597 if ((rs->sc_flags & RAIDF_INITED) == 0)
598 return (-1);
599
600 part = DISKPART(dev);
601 omask = rs->sc_dkdev.dk_openmask & (1 << part);
602 lp = rs->sc_dkdev.dk_label;
603
604 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
605 return (-1);
606
607 if (lp->d_partitions[part].p_fstype != FS_SWAP)
608 size = -1;
609 else
610 size = lp->d_partitions[part].p_size *
611 (lp->d_secsize / DEV_BSIZE);
612
613 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
614 return (-1);
615
616 return (size);
617
618 }
619
620 static int
621 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
622 {
623 int unit = raidunit(dev);
624 struct raid_softc *rs;
625 const struct bdevsw *bdev;
626 struct disklabel *lp;
627 RF_Raid_t *raidPtr;
628 daddr_t offset;
629 int part, c, sparecol, j, scol, dumpto;
630 int error = 0;
631
632 if ((rs = raidget(unit, false)) == NULL)
633 return ENXIO;
634
635 raidPtr = &rs->sc_r;
636
637 if ((rs->sc_flags & RAIDF_INITED) == 0)
638 return ENXIO;
639
640 /* we only support dumping to RAID 1 sets */
641 if (raidPtr->Layout.numDataCol != 1 ||
642 raidPtr->Layout.numParityCol != 1)
643 return EINVAL;
644
645 if ((error = raidlock(rs)) != 0)
646 return error;
647
648 if (size % DEV_BSIZE != 0) {
649 error = EINVAL;
650 goto out;
651 }
652
653 if (blkno + size / DEV_BSIZE > rs->sc_size) {
654 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
655 "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
656 size / DEV_BSIZE, rs->sc_size);
657 error = EINVAL;
658 goto out;
659 }
660
661 part = DISKPART(dev);
662 lp = rs->sc_dkdev.dk_label;
663 offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
664
665 /* figure out what device is alive.. */
666
667 /*
668 Look for a component to dump to. The preference for the
669 component to dump to is as follows:
670 1) the master
671 2) a used_spare of the master
672 3) the slave
673 4) a used_spare of the slave
674 */
675
676 dumpto = -1;
677 for (c = 0; c < raidPtr->numCol; c++) {
678 if (raidPtr->Disks[c].status == rf_ds_optimal) {
679 /* this might be the one */
680 dumpto = c;
681 break;
682 }
683 }
684
685 /*
686 At this point we have possibly selected a live master or a
687 live slave. We now check to see if there is a spared
688 master (or a spared slave), if we didn't find a live master
689 or a live slave.
690 */
691
692 for (c = 0; c < raidPtr->numSpare; c++) {
693 sparecol = raidPtr->numCol + c;
694 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
695 /* How about this one? */
696 scol = -1;
697 for(j=0;j<raidPtr->numCol;j++) {
698 if (raidPtr->Disks[j].spareCol == sparecol) {
699 scol = j;
700 break;
701 }
702 }
703 if (scol == 0) {
704 /*
705 We must have found a spared master!
706 We'll take that over anything else
707 found so far. (We couldn't have
708 found a real master before, since
709 this is a used spare, and it's
710 saying that it's replacing the
711 master.) On reboot (with
712 autoconfiguration turned on)
713 sparecol will become the 1st
714 component (component0) of this set.
715 */
716 dumpto = sparecol;
717 break;
718 } else if (scol != -1) {
719 /*
720 Must be a spared slave. We'll dump
721 to that if we havn't found anything
722 else so far.
723 */
724 if (dumpto == -1)
725 dumpto = sparecol;
726 }
727 }
728 }
729
730 if (dumpto == -1) {
731 /* we couldn't find any live components to dump to!?!?
732 */
733 error = EINVAL;
734 goto out;
735 }
736
737 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
738
739 /*
740 Note that blkno is relative to this particular partition.
741 By adding the offset of this partition in the RAID
742 set, and also adding RF_PROTECTED_SECTORS, we get a
743 value that is relative to the partition used for the
744 underlying component.
745 */
746
747 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
748 blkno + offset, va, size);
749
750 out:
751 raidunlock(rs);
752
753 return error;
754 }
755
756 /* ARGSUSED */
757 static int
758 raidopen(dev_t dev, int flags, int fmt,
759 struct lwp *l)
760 {
761 int unit = raidunit(dev);
762 struct raid_softc *rs;
763 struct disklabel *lp;
764 int part, pmask;
765 int error = 0;
766
767 if ((rs = raidget(unit, true)) == NULL)
768 return ENXIO;
769 if ((error = raidlock(rs)) != 0)
770 return (error);
771
772 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
773 error = EBUSY;
774 goto bad;
775 }
776
777 lp = rs->sc_dkdev.dk_label;
778
779 part = DISKPART(dev);
780
781 /*
782 * If there are wedges, and this is not RAW_PART, then we
783 * need to fail.
784 */
785 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
786 error = EBUSY;
787 goto bad;
788 }
789 pmask = (1 << part);
790
791 if ((rs->sc_flags & RAIDF_INITED) &&
792 (rs->sc_dkdev.dk_nwedges == 0) &&
793 (rs->sc_dkdev.dk_openmask == 0))
794 raidgetdisklabel(dev);
795
796 /* make sure that this partition exists */
797
798 if (part != RAW_PART) {
799 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
800 ((part >= lp->d_npartitions) ||
801 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
802 error = ENXIO;
803 goto bad;
804 }
805 }
806 /* Prevent this unit from being unconfigured while open. */
807 switch (fmt) {
808 case S_IFCHR:
809 rs->sc_dkdev.dk_copenmask |= pmask;
810 break;
811
812 case S_IFBLK:
813 rs->sc_dkdev.dk_bopenmask |= pmask;
814 break;
815 }
816
817 if ((rs->sc_dkdev.dk_openmask == 0) &&
818 ((rs->sc_flags & RAIDF_INITED) != 0)) {
819 /* First one... mark things as dirty... Note that we *MUST*
820 have done a configure before this. I DO NOT WANT TO BE
821 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
822 THAT THEY BELONG TOGETHER!!!!! */
823 /* XXX should check to see if we're only open for reading
824 here... If so, we needn't do this, but then need some
825 other way of keeping track of what's happened.. */
826
827 rf_markalldirty(&rs->sc_r);
828 }
829
830
831 rs->sc_dkdev.dk_openmask =
832 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
833
834 bad:
835 raidunlock(rs);
836
837 return (error);
838
839
840 }
841
842 /* ARGSUSED */
843 static int
844 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
845 {
846 int unit = raidunit(dev);
847 struct raid_softc *rs;
848 int error = 0;
849 int part;
850
851 if ((rs = raidget(unit, false)) == NULL)
852 return ENXIO;
853
854 if ((error = raidlock(rs)) != 0)
855 return (error);
856
857 part = DISKPART(dev);
858
859 /* ...that much closer to allowing unconfiguration... */
860 switch (fmt) {
861 case S_IFCHR:
862 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
863 break;
864
865 case S_IFBLK:
866 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
867 break;
868 }
869 rs->sc_dkdev.dk_openmask =
870 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
871
872 if ((rs->sc_dkdev.dk_openmask == 0) &&
873 ((rs->sc_flags & RAIDF_INITED) != 0)) {
874 /* Last one... device is not unconfigured yet.
875 Device shutdown has taken care of setting the
876 clean bits if RAIDF_INITED is not set
877 mark things as clean... */
878
879 rf_update_component_labels(&rs->sc_r,
880 RF_FINAL_COMPONENT_UPDATE);
881 }
882 if ((rs->sc_dkdev.dk_openmask == 0) &&
883 ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)) {
884 /*
885 * Detach this raid unit
886 */
887 cfdata_t cf = NULL;
888 int retcode = 0;
889
890 if (rs->sc_dev != NULL) {
891 cf = device_cfdata(rs->sc_dev);
892
893 raidunlock(rs);
894 retcode = config_detach(rs->sc_dev, DETACH_QUIET);
895 if (retcode == 0)
896 /* free the pseudo device attach bits */
897 free(cf, M_RAIDFRAME);
898 } else {
899 raidput(rs);
900 }
901 return retcode;
902 }
903
904 raidunlock(rs);
905 return (0);
906 }
907
908 static void
909 raidstrategy(struct buf *bp)
910 {
911 unsigned int unit = raidunit(bp->b_dev);
912 RF_Raid_t *raidPtr;
913 int wlabel;
914 struct raid_softc *rs;
915
916 if ((rs = raidget(unit, false)) == NULL) {
917 bp->b_error = ENXIO;
918 goto done;
919 }
920 if ((rs->sc_flags & RAIDF_INITED) == 0) {
921 bp->b_error = ENXIO;
922 goto done;
923 }
924 raidPtr = &rs->sc_r;
925 if (!raidPtr->valid) {
926 bp->b_error = ENODEV;
927 goto done;
928 }
929 if (bp->b_bcount == 0) {
930 db1_printf(("b_bcount is zero..\n"));
931 goto done;
932 }
933
934 /*
935 * Do bounds checking and adjust transfer. If there's an
936 * error, the bounds check will flag that for us.
937 */
938
939 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
940 if (DISKPART(bp->b_dev) == RAW_PART) {
941 uint64_t size; /* device size in DEV_BSIZE unit */
942
943 if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
944 size = raidPtr->totalSectors <<
945 (raidPtr->logBytesPerSector - DEV_BSHIFT);
946 } else {
947 size = raidPtr->totalSectors >>
948 (DEV_BSHIFT - raidPtr->logBytesPerSector);
949 }
950 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
951 goto done;
952 }
953 } else {
954 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
955 db1_printf(("Bounds check failed!!:%d %d\n",
956 (int) bp->b_blkno, (int) wlabel));
957 goto done;
958 }
959 }
960
961 rf_lock_mutex2(raidPtr->iodone_lock);
962
963 bp->b_resid = 0;
964
965 /* stuff it onto our queue */
966 bufq_put(rs->buf_queue, bp);
967
968 /* scheduled the IO to happen at the next convenient time */
969 rf_signal_cond2(raidPtr->iodone_cv);
970 rf_unlock_mutex2(raidPtr->iodone_lock);
971
972 return;
973
974 done:
975 bp->b_resid = bp->b_bcount;
976 biodone(bp);
977 }
978
979 /* ARGSUSED */
980 static int
981 raidread(dev_t dev, struct uio *uio, int flags)
982 {
983 int unit = raidunit(dev);
984 struct raid_softc *rs;
985
986 if ((rs = raidget(unit, false)) == NULL)
987 return ENXIO;
988
989 if ((rs->sc_flags & RAIDF_INITED) == 0)
990 return (ENXIO);
991
992 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
993
994 }
995
996 /* ARGSUSED */
997 static int
998 raidwrite(dev_t dev, struct uio *uio, int flags)
999 {
1000 int unit = raidunit(dev);
1001 struct raid_softc *rs;
1002
1003 if ((rs = raidget(unit, false)) == NULL)
1004 return ENXIO;
1005
1006 if ((rs->sc_flags & RAIDF_INITED) == 0)
1007 return (ENXIO);
1008
1009 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
1010
1011 }
1012
1013 static int
1014 raid_detach_unlocked(struct raid_softc *rs)
1015 {
1016 int error;
1017 RF_Raid_t *raidPtr;
1018
1019 raidPtr = &rs->sc_r;
1020
1021 /*
1022 * If somebody has a partition mounted, we shouldn't
1023 * shutdown.
1024 */
1025 if (rs->sc_dkdev.dk_openmask != 0)
1026 return EBUSY;
1027
1028 if ((rs->sc_flags & RAIDF_INITED) == 0)
1029 ; /* not initialized: nothing to do */
1030 else if ((error = rf_Shutdown(raidPtr)) != 0)
1031 return error;
1032 else
1033 rs->sc_flags &= ~(RAIDF_INITED|RAIDF_SHUTDOWN);
1034
1035 /* Detach the disk. */
1036 dkwedge_delall(&rs->sc_dkdev);
1037 disk_detach(&rs->sc_dkdev);
1038 disk_destroy(&rs->sc_dkdev);
1039
1040 /* Free the softc */
1041 aprint_normal_dev(rs->sc_dev, "detached\n");
1042 raidunlock(rs);
1043 raidput(rs);
1044
1045 return 0;
1046 }
1047
1048 static int
1049 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1050 {
1051 int unit = raidunit(dev);
1052 int error = 0;
1053 int part, pmask, s;
1054 cfdata_t cf;
1055 struct raid_softc *rs;
1056 RF_Config_t *k_cfg, *u_cfg;
1057 RF_Raid_t *raidPtr;
1058 RF_RaidDisk_t *diskPtr;
1059 RF_AccTotals_t *totals;
1060 RF_DeviceConfig_t *d_cfg, **ucfgp;
1061 u_char *specific_buf;
1062 int retcode = 0;
1063 int column;
1064 /* int raidid; */
1065 struct rf_recon_req *rrcopy, *rr;
1066 RF_ComponentLabel_t *clabel;
1067 RF_ComponentLabel_t *ci_label;
1068 RF_ComponentLabel_t **clabel_ptr;
1069 RF_SingleComponent_t *sparePtr,*componentPtr;
1070 RF_SingleComponent_t component;
1071 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
1072 int i, j, d;
1073 #ifdef __HAVE_OLD_DISKLABEL
1074 struct disklabel newlabel;
1075 #endif
1076
1077 if ((rs = raidget(unit, false)) == NULL)
1078 return ENXIO;
1079 raidPtr = &rs->sc_r;
1080
1081 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1082 (int) DISKPART(dev), (int) unit, cmd));
1083
1084 /* Must be open for writes for these commands... */
1085 switch (cmd) {
1086 #ifdef DIOCGSECTORSIZE
1087 case DIOCGSECTORSIZE:
1088 *(u_int *)data = raidPtr->bytesPerSector;
1089 return 0;
1090 case DIOCGMEDIASIZE:
1091 *(off_t *)data =
1092 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
1093 return 0;
1094 #endif
1095 case DIOCSDINFO:
1096 case DIOCWDINFO:
1097 #ifdef __HAVE_OLD_DISKLABEL
1098 case ODIOCWDINFO:
1099 case ODIOCSDINFO:
1100 #endif
1101 case DIOCWLABEL:
1102 case DIOCAWEDGE:
1103 case DIOCDWEDGE:
1104 case DIOCMWEDGES:
1105 case DIOCSSTRATEGY:
1106 if ((flag & FWRITE) == 0)
1107 return (EBADF);
1108 }
1109
1110 /* Must be initialized for these... */
1111 switch (cmd) {
1112 case DIOCGDINFO:
1113 case DIOCSDINFO:
1114 case DIOCWDINFO:
1115 #ifdef __HAVE_OLD_DISKLABEL
1116 case ODIOCGDINFO:
1117 case ODIOCWDINFO:
1118 case ODIOCSDINFO:
1119 case ODIOCGDEFLABEL:
1120 #endif
1121 case DIOCGPARTINFO:
1122 case DIOCWLABEL:
1123 case DIOCGDEFLABEL:
1124 case DIOCAWEDGE:
1125 case DIOCDWEDGE:
1126 case DIOCLWEDGES:
1127 case DIOCMWEDGES:
1128 case DIOCCACHESYNC:
1129 case RAIDFRAME_SHUTDOWN:
1130 case RAIDFRAME_REWRITEPARITY:
1131 case RAIDFRAME_GET_INFO:
1132 case RAIDFRAME_RESET_ACCTOTALS:
1133 case RAIDFRAME_GET_ACCTOTALS:
1134 case RAIDFRAME_KEEP_ACCTOTALS:
1135 case RAIDFRAME_GET_SIZE:
1136 case RAIDFRAME_FAIL_DISK:
1137 case RAIDFRAME_COPYBACK:
1138 case RAIDFRAME_CHECK_RECON_STATUS:
1139 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1140 case RAIDFRAME_GET_COMPONENT_LABEL:
1141 case RAIDFRAME_SET_COMPONENT_LABEL:
1142 case RAIDFRAME_ADD_HOT_SPARE:
1143 case RAIDFRAME_REMOVE_HOT_SPARE:
1144 case RAIDFRAME_INIT_LABELS:
1145 case RAIDFRAME_REBUILD_IN_PLACE:
1146 case RAIDFRAME_CHECK_PARITY:
1147 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1148 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1149 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1150 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1151 case RAIDFRAME_SET_AUTOCONFIG:
1152 case RAIDFRAME_SET_ROOT:
1153 case RAIDFRAME_DELETE_COMPONENT:
1154 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1155 case RAIDFRAME_PARITYMAP_STATUS:
1156 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1157 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1158 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1159 case DIOCGSTRATEGY:
1160 case DIOCSSTRATEGY:
1161 if ((rs->sc_flags & RAIDF_INITED) == 0)
1162 return (ENXIO);
1163 }
1164
1165 switch (cmd) {
1166 #ifdef COMPAT_50
1167 case RAIDFRAME_GET_INFO50:
1168 return rf_get_info50(raidPtr, data);
1169
1170 case RAIDFRAME_CONFIGURE50:
1171 if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0)
1172 return retcode;
1173 goto config;
1174 #endif
1175 /* configure the system */
1176 case RAIDFRAME_CONFIGURE:
1177
1178 if (raidPtr->valid) {
1179 /* There is a valid RAID set running on this unit! */
1180 printf("raid%d: Device already configured!\n",unit);
1181 return(EINVAL);
1182 }
1183
1184 /* copy-in the configuration information */
1185 /* data points to a pointer to the configuration structure */
1186
1187 u_cfg = *((RF_Config_t **) data);
1188 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1189 if (k_cfg == NULL) {
1190 return (ENOMEM);
1191 }
1192 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1193 if (retcode) {
1194 RF_Free(k_cfg, sizeof(RF_Config_t));
1195 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1196 retcode));
1197 goto no_config;
1198 }
1199 goto config;
1200 config:
1201 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1202
1203 /* allocate a buffer for the layout-specific data, and copy it
1204 * in */
1205 if (k_cfg->layoutSpecificSize) {
1206 if (k_cfg->layoutSpecificSize > 10000) {
1207 /* sanity check */
1208 RF_Free(k_cfg, sizeof(RF_Config_t));
1209 retcode = EINVAL;
1210 goto no_config;
1211 }
1212 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1213 (u_char *));
1214 if (specific_buf == NULL) {
1215 RF_Free(k_cfg, sizeof(RF_Config_t));
1216 retcode = ENOMEM;
1217 goto no_config;
1218 }
1219 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1220 k_cfg->layoutSpecificSize);
1221 if (retcode) {
1222 RF_Free(k_cfg, sizeof(RF_Config_t));
1223 RF_Free(specific_buf,
1224 k_cfg->layoutSpecificSize);
1225 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1226 retcode));
1227 goto no_config;
1228 }
1229 } else
1230 specific_buf = NULL;
1231 k_cfg->layoutSpecific = specific_buf;
1232
1233 /* should do some kind of sanity check on the configuration.
1234 * Store the sum of all the bytes in the last byte? */
1235
1236 /* configure the system */
1237
1238 /*
1239 * Clear the entire RAID descriptor, just to make sure
1240 * there is no stale data left in the case of a
1241 * reconfiguration
1242 */
1243 memset(raidPtr, 0, sizeof(*raidPtr));
1244 raidPtr->softc = rs;
1245 raidPtr->raidid = unit;
1246
1247 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1248
1249 if (retcode == 0) {
1250
1251 /* allow this many simultaneous IO's to
1252 this RAID device */
1253 raidPtr->openings = RAIDOUTSTANDING;
1254
1255 raidinit(rs);
1256 rf_markalldirty(raidPtr);
1257 }
1258 /* free the buffers. No return code here. */
1259 if (k_cfg->layoutSpecificSize) {
1260 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1261 }
1262 RF_Free(k_cfg, sizeof(RF_Config_t));
1263
1264 no_config:
1265 /*
1266 * If configuration failed, set sc_flags so that we
1267 * will detach the device when we close it.
1268 */
1269 if (retcode != 0)
1270 rs->sc_flags |= RAIDF_SHUTDOWN;
1271 return (retcode);
1272
1273 /* shutdown the system */
1274 case RAIDFRAME_SHUTDOWN:
1275
1276 part = DISKPART(dev);
1277 pmask = (1 << part);
1278
1279 if ((error = raidlock(rs)) != 0)
1280 return (error);
1281
1282 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
1283 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1284 (rs->sc_dkdev.dk_copenmask & pmask)))
1285 retcode = EBUSY;
1286 else {
1287 rs->sc_flags |= RAIDF_SHUTDOWN;
1288 rs->sc_dkdev.dk_copenmask &= ~pmask;
1289 rs->sc_dkdev.dk_bopenmask &= ~pmask;
1290 rs->sc_dkdev.dk_openmask &= ~pmask;
1291 retcode = 0;
1292 }
1293
1294 raidunlock(rs);
1295
1296 if (retcode != 0)
1297 return retcode;
1298
1299 /* free the pseudo device attach bits */
1300
1301 cf = device_cfdata(rs->sc_dev);
1302 if ((retcode = config_detach(rs->sc_dev, DETACH_QUIET)) == 0)
1303 free(cf, M_RAIDFRAME);
1304
1305 return (retcode);
1306 case RAIDFRAME_GET_COMPONENT_LABEL:
1307 clabel_ptr = (RF_ComponentLabel_t **) data;
1308 /* need to read the component label for the disk indicated
1309 by row,column in clabel */
1310
1311 /*
1312 * Perhaps there should be an option to skip the in-core
1313 * copy and hit the disk, as with disklabel(8).
1314 */
1315 RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *));
1316
1317 retcode = copyin(*clabel_ptr, clabel, sizeof(*clabel));
1318
1319 if (retcode) {
1320 RF_Free(clabel, sizeof(*clabel));
1321 return retcode;
1322 }
1323
1324 clabel->row = 0; /* Don't allow looking at anything else.*/
1325
1326 column = clabel->column;
1327
1328 if ((column < 0) || (column >= raidPtr->numCol +
1329 raidPtr->numSpare)) {
1330 RF_Free(clabel, sizeof(*clabel));
1331 return EINVAL;
1332 }
1333
1334 RF_Free(clabel, sizeof(*clabel));
1335
1336 clabel = raidget_component_label(raidPtr, column);
1337
1338 return copyout(clabel, *clabel_ptr, sizeof(**clabel_ptr));
1339
1340 #if 0
1341 case RAIDFRAME_SET_COMPONENT_LABEL:
1342 clabel = (RF_ComponentLabel_t *) data;
1343
1344 /* XXX check the label for valid stuff... */
1345 /* Note that some things *should not* get modified --
1346 the user should be re-initing the labels instead of
1347 trying to patch things.
1348 */
1349
1350 raidid = raidPtr->raidid;
1351 #ifdef DEBUG
1352 printf("raid%d: Got component label:\n", raidid);
1353 printf("raid%d: Version: %d\n", raidid, clabel->version);
1354 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1355 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1356 printf("raid%d: Column: %d\n", raidid, clabel->column);
1357 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1358 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1359 printf("raid%d: Status: %d\n", raidid, clabel->status);
1360 #endif
1361 clabel->row = 0;
1362 column = clabel->column;
1363
1364 if ((column < 0) || (column >= raidPtr->numCol)) {
1365 return(EINVAL);
1366 }
1367
1368 /* XXX this isn't allowed to do anything for now :-) */
1369
1370 /* XXX and before it is, we need to fill in the rest
1371 of the fields!?!?!?! */
1372 memcpy(raidget_component_label(raidPtr, column),
1373 clabel, sizeof(*clabel));
1374 raidflush_component_label(raidPtr, column);
1375 return (0);
1376 #endif
1377
1378 case RAIDFRAME_INIT_LABELS:
1379 clabel = (RF_ComponentLabel_t *) data;
1380 /*
1381 we only want the serial number from
1382 the above. We get all the rest of the information
1383 from the config that was used to create this RAID
1384 set.
1385 */
1386
1387 raidPtr->serial_number = clabel->serial_number;
1388
1389 for(column=0;column<raidPtr->numCol;column++) {
1390 diskPtr = &raidPtr->Disks[column];
1391 if (!RF_DEAD_DISK(diskPtr->status)) {
1392 ci_label = raidget_component_label(raidPtr,
1393 column);
1394 /* Zeroing this is important. */
1395 memset(ci_label, 0, sizeof(*ci_label));
1396 raid_init_component_label(raidPtr, ci_label);
1397 ci_label->serial_number =
1398 raidPtr->serial_number;
1399 ci_label->row = 0; /* we dont' pretend to support more */
1400 rf_component_label_set_partitionsize(ci_label,
1401 diskPtr->partitionSize);
1402 ci_label->column = column;
1403 raidflush_component_label(raidPtr, column);
1404 }
1405 /* XXXjld what about the spares? */
1406 }
1407
1408 return (retcode);
1409 case RAIDFRAME_SET_AUTOCONFIG:
1410 d = rf_set_autoconfig(raidPtr, *(int *) data);
1411 printf("raid%d: New autoconfig value is: %d\n",
1412 raidPtr->raidid, d);
1413 *(int *) data = d;
1414 return (retcode);
1415
1416 case RAIDFRAME_SET_ROOT:
1417 d = rf_set_rootpartition(raidPtr, *(int *) data);
1418 printf("raid%d: New rootpartition value is: %d\n",
1419 raidPtr->raidid, d);
1420 *(int *) data = d;
1421 return (retcode);
1422
1423 /* initialize all parity */
1424 case RAIDFRAME_REWRITEPARITY:
1425
1426 if (raidPtr->Layout.map->faultsTolerated == 0) {
1427 /* Parity for RAID 0 is trivially correct */
1428 raidPtr->parity_good = RF_RAID_CLEAN;
1429 return(0);
1430 }
1431
1432 if (raidPtr->parity_rewrite_in_progress == 1) {
1433 /* Re-write is already in progress! */
1434 return(EINVAL);
1435 }
1436
1437 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1438 rf_RewriteParityThread,
1439 raidPtr,"raid_parity");
1440 return (retcode);
1441
1442
1443 case RAIDFRAME_ADD_HOT_SPARE:
1444 sparePtr = (RF_SingleComponent_t *) data;
1445 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1446 retcode = rf_add_hot_spare(raidPtr, &component);
1447 return(retcode);
1448
1449 case RAIDFRAME_REMOVE_HOT_SPARE:
1450 return(retcode);
1451
1452 case RAIDFRAME_DELETE_COMPONENT:
1453 componentPtr = (RF_SingleComponent_t *)data;
1454 memcpy( &component, componentPtr,
1455 sizeof(RF_SingleComponent_t));
1456 retcode = rf_delete_component(raidPtr, &component);
1457 return(retcode);
1458
1459 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1460 componentPtr = (RF_SingleComponent_t *)data;
1461 memcpy( &component, componentPtr,
1462 sizeof(RF_SingleComponent_t));
1463 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1464 return(retcode);
1465
1466 case RAIDFRAME_REBUILD_IN_PLACE:
1467
1468 if (raidPtr->Layout.map->faultsTolerated == 0) {
1469 /* Can't do this on a RAID 0!! */
1470 return(EINVAL);
1471 }
1472
1473 if (raidPtr->recon_in_progress == 1) {
1474 /* a reconstruct is already in progress! */
1475 return(EINVAL);
1476 }
1477
1478 componentPtr = (RF_SingleComponent_t *) data;
1479 memcpy( &component, componentPtr,
1480 sizeof(RF_SingleComponent_t));
1481 component.row = 0; /* we don't support any more */
1482 column = component.column;
1483
1484 if ((column < 0) || (column >= raidPtr->numCol)) {
1485 return(EINVAL);
1486 }
1487
1488 rf_lock_mutex2(raidPtr->mutex);
1489 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1490 (raidPtr->numFailures > 0)) {
1491 /* XXX 0 above shouldn't be constant!!! */
1492 /* some component other than this has failed.
1493 Let's not make things worse than they already
1494 are... */
1495 printf("raid%d: Unable to reconstruct to disk at:\n",
1496 raidPtr->raidid);
1497 printf("raid%d: Col: %d Too many failures.\n",
1498 raidPtr->raidid, column);
1499 rf_unlock_mutex2(raidPtr->mutex);
1500 return (EINVAL);
1501 }
1502 if (raidPtr->Disks[column].status ==
1503 rf_ds_reconstructing) {
1504 printf("raid%d: Unable to reconstruct to disk at:\n",
1505 raidPtr->raidid);
1506 printf("raid%d: Col: %d Reconstruction already occurring!\n", raidPtr->raidid, column);
1507
1508 rf_unlock_mutex2(raidPtr->mutex);
1509 return (EINVAL);
1510 }
1511 if (raidPtr->Disks[column].status == rf_ds_spared) {
1512 rf_unlock_mutex2(raidPtr->mutex);
1513 return (EINVAL);
1514 }
1515 rf_unlock_mutex2(raidPtr->mutex);
1516
1517 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1518 if (rrcopy == NULL)
1519 return(ENOMEM);
1520
1521 rrcopy->raidPtr = (void *) raidPtr;
1522 rrcopy->col = column;
1523
1524 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1525 rf_ReconstructInPlaceThread,
1526 rrcopy,"raid_reconip");
1527 return(retcode);
1528
1529 case RAIDFRAME_GET_INFO:
1530 if (!raidPtr->valid)
1531 return (ENODEV);
1532 ucfgp = (RF_DeviceConfig_t **) data;
1533 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1534 (RF_DeviceConfig_t *));
1535 if (d_cfg == NULL)
1536 return (ENOMEM);
1537 d_cfg->rows = 1; /* there is only 1 row now */
1538 d_cfg->cols = raidPtr->numCol;
1539 d_cfg->ndevs = raidPtr->numCol;
1540 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1541 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1542 return (ENOMEM);
1543 }
1544 d_cfg->nspares = raidPtr->numSpare;
1545 if (d_cfg->nspares >= RF_MAX_DISKS) {
1546 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1547 return (ENOMEM);
1548 }
1549 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1550 d = 0;
1551 for (j = 0; j < d_cfg->cols; j++) {
1552 d_cfg->devs[d] = raidPtr->Disks[j];
1553 d++;
1554 }
1555 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1556 d_cfg->spares[i] = raidPtr->Disks[j];
1557 if (d_cfg->spares[i].status == rf_ds_rebuilding_spare) {
1558 /* XXX: raidctl(8) expects to see this as a used spare */
1559 d_cfg->spares[i].status = rf_ds_used_spare;
1560 }
1561 }
1562 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1563 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1564
1565 return (retcode);
1566
1567 case RAIDFRAME_CHECK_PARITY:
1568 *(int *) data = raidPtr->parity_good;
1569 return (0);
1570
1571 case RAIDFRAME_PARITYMAP_STATUS:
1572 if (rf_paritymap_ineligible(raidPtr))
1573 return EINVAL;
1574 rf_paritymap_status(raidPtr->parity_map,
1575 (struct rf_pmstat *)data);
1576 return 0;
1577
1578 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1579 if (rf_paritymap_ineligible(raidPtr))
1580 return EINVAL;
1581 if (raidPtr->parity_map == NULL)
1582 return ENOENT; /* ??? */
1583 if (0 != rf_paritymap_set_params(raidPtr->parity_map,
1584 (struct rf_pmparams *)data, 1))
1585 return EINVAL;
1586 return 0;
1587
1588 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1589 if (rf_paritymap_ineligible(raidPtr))
1590 return EINVAL;
1591 *(int *) data = rf_paritymap_get_disable(raidPtr);
1592 return 0;
1593
1594 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1595 if (rf_paritymap_ineligible(raidPtr))
1596 return EINVAL;
1597 rf_paritymap_set_disable(raidPtr, *(int *)data);
1598 /* XXX should errors be passed up? */
1599 return 0;
1600
1601 case RAIDFRAME_RESET_ACCTOTALS:
1602 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1603 return (0);
1604
1605 case RAIDFRAME_GET_ACCTOTALS:
1606 totals = (RF_AccTotals_t *) data;
1607 *totals = raidPtr->acc_totals;
1608 return (0);
1609
1610 case RAIDFRAME_KEEP_ACCTOTALS:
1611 raidPtr->keep_acc_totals = *(int *)data;
1612 return (0);
1613
1614 case RAIDFRAME_GET_SIZE:
1615 *(int *) data = raidPtr->totalSectors;
1616 return (0);
1617
1618 /* fail a disk & optionally start reconstruction */
1619 case RAIDFRAME_FAIL_DISK:
1620
1621 if (raidPtr->Layout.map->faultsTolerated == 0) {
1622 /* Can't do this on a RAID 0!! */
1623 return(EINVAL);
1624 }
1625
1626 rr = (struct rf_recon_req *) data;
1627 rr->row = 0;
1628 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1629 return (EINVAL);
1630
1631
1632 rf_lock_mutex2(raidPtr->mutex);
1633 if (raidPtr->status == rf_rs_reconstructing) {
1634 /* you can't fail a disk while we're reconstructing! */
1635 /* XXX wrong for RAID6 */
1636 rf_unlock_mutex2(raidPtr->mutex);
1637 return (EINVAL);
1638 }
1639 if ((raidPtr->Disks[rr->col].status ==
1640 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1641 /* some other component has failed. Let's not make
1642 things worse. XXX wrong for RAID6 */
1643 rf_unlock_mutex2(raidPtr->mutex);
1644 return (EINVAL);
1645 }
1646 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1647 /* Can't fail a spared disk! */
1648 rf_unlock_mutex2(raidPtr->mutex);
1649 return (EINVAL);
1650 }
1651 rf_unlock_mutex2(raidPtr->mutex);
1652
1653 /* make a copy of the recon request so that we don't rely on
1654 * the user's buffer */
1655 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1656 if (rrcopy == NULL)
1657 return(ENOMEM);
1658 memcpy(rrcopy, rr, sizeof(*rr));
1659 rrcopy->raidPtr = (void *) raidPtr;
1660
1661 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1662 rf_ReconThread,
1663 rrcopy,"raid_recon");
1664 return (0);
1665
1666 /* invoke a copyback operation after recon on whatever disk
1667 * needs it, if any */
1668 case RAIDFRAME_COPYBACK:
1669
1670 if (raidPtr->Layout.map->faultsTolerated == 0) {
1671 /* This makes no sense on a RAID 0!! */
1672 return(EINVAL);
1673 }
1674
1675 if (raidPtr->copyback_in_progress == 1) {
1676 /* Copyback is already in progress! */
1677 return(EINVAL);
1678 }
1679
1680 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1681 rf_CopybackThread,
1682 raidPtr,"raid_copyback");
1683 return (retcode);
1684
1685 /* return the percentage completion of reconstruction */
1686 case RAIDFRAME_CHECK_RECON_STATUS:
1687 if (raidPtr->Layout.map->faultsTolerated == 0) {
1688 /* This makes no sense on a RAID 0, so tell the
1689 user it's done. */
1690 *(int *) data = 100;
1691 return(0);
1692 }
1693 if (raidPtr->status != rf_rs_reconstructing)
1694 *(int *) data = 100;
1695 else {
1696 if (raidPtr->reconControl->numRUsTotal > 0) {
1697 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1698 } else {
1699 *(int *) data = 0;
1700 }
1701 }
1702 return (0);
1703 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1704 progressInfoPtr = (RF_ProgressInfo_t **) data;
1705 if (raidPtr->status != rf_rs_reconstructing) {
1706 progressInfo.remaining = 0;
1707 progressInfo.completed = 100;
1708 progressInfo.total = 100;
1709 } else {
1710 progressInfo.total =
1711 raidPtr->reconControl->numRUsTotal;
1712 progressInfo.completed =
1713 raidPtr->reconControl->numRUsComplete;
1714 progressInfo.remaining = progressInfo.total -
1715 progressInfo.completed;
1716 }
1717 retcode = copyout(&progressInfo, *progressInfoPtr,
1718 sizeof(RF_ProgressInfo_t));
1719 return (retcode);
1720
1721 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1722 if (raidPtr->Layout.map->faultsTolerated == 0) {
1723 /* This makes no sense on a RAID 0, so tell the
1724 user it's done. */
1725 *(int *) data = 100;
1726 return(0);
1727 }
1728 if (raidPtr->parity_rewrite_in_progress == 1) {
1729 *(int *) data = 100 *
1730 raidPtr->parity_rewrite_stripes_done /
1731 raidPtr->Layout.numStripe;
1732 } else {
1733 *(int *) data = 100;
1734 }
1735 return (0);
1736
1737 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1738 progressInfoPtr = (RF_ProgressInfo_t **) data;
1739 if (raidPtr->parity_rewrite_in_progress == 1) {
1740 progressInfo.total = raidPtr->Layout.numStripe;
1741 progressInfo.completed =
1742 raidPtr->parity_rewrite_stripes_done;
1743 progressInfo.remaining = progressInfo.total -
1744 progressInfo.completed;
1745 } else {
1746 progressInfo.remaining = 0;
1747 progressInfo.completed = 100;
1748 progressInfo.total = 100;
1749 }
1750 retcode = copyout(&progressInfo, *progressInfoPtr,
1751 sizeof(RF_ProgressInfo_t));
1752 return (retcode);
1753
1754 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1755 if (raidPtr->Layout.map->faultsTolerated == 0) {
1756 /* This makes no sense on a RAID 0 */
1757 *(int *) data = 100;
1758 return(0);
1759 }
1760 if (raidPtr->copyback_in_progress == 1) {
1761 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1762 raidPtr->Layout.numStripe;
1763 } else {
1764 *(int *) data = 100;
1765 }
1766 return (0);
1767
1768 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1769 progressInfoPtr = (RF_ProgressInfo_t **) data;
1770 if (raidPtr->copyback_in_progress == 1) {
1771 progressInfo.total = raidPtr->Layout.numStripe;
1772 progressInfo.completed =
1773 raidPtr->copyback_stripes_done;
1774 progressInfo.remaining = progressInfo.total -
1775 progressInfo.completed;
1776 } else {
1777 progressInfo.remaining = 0;
1778 progressInfo.completed = 100;
1779 progressInfo.total = 100;
1780 }
1781 retcode = copyout(&progressInfo, *progressInfoPtr,
1782 sizeof(RF_ProgressInfo_t));
1783 return (retcode);
1784
1785 /* the sparetable daemon calls this to wait for the kernel to
1786 * need a spare table. this ioctl does not return until a
1787 * spare table is needed. XXX -- calling mpsleep here in the
1788 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1789 * -- I should either compute the spare table in the kernel,
1790 * or have a different -- XXX XXX -- interface (a different
1791 * character device) for delivering the table -- XXX */
1792 #if 0
1793 case RAIDFRAME_SPARET_WAIT:
1794 rf_lock_mutex2(rf_sparet_wait_mutex);
1795 while (!rf_sparet_wait_queue)
1796 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1797 waitreq = rf_sparet_wait_queue;
1798 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1799 rf_unlock_mutex2(rf_sparet_wait_mutex);
1800
1801 /* structure assignment */
1802 *((RF_SparetWait_t *) data) = *waitreq;
1803
1804 RF_Free(waitreq, sizeof(*waitreq));
1805 return (0);
1806
1807 /* wakes up a process waiting on SPARET_WAIT and puts an error
1808 * code in it that will cause the dameon to exit */
1809 case RAIDFRAME_ABORT_SPARET_WAIT:
1810 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1811 waitreq->fcol = -1;
1812 rf_lock_mutex2(rf_sparet_wait_mutex);
1813 waitreq->next = rf_sparet_wait_queue;
1814 rf_sparet_wait_queue = waitreq;
1815 rf_broadcast_conf2(rf_sparet_wait_cv);
1816 rf_unlock_mutex2(rf_sparet_wait_mutex);
1817 return (0);
1818
1819 /* used by the spare table daemon to deliver a spare table
1820 * into the kernel */
1821 case RAIDFRAME_SEND_SPARET:
1822
1823 /* install the spare table */
1824 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1825
1826 /* respond to the requestor. the return status of the spare
1827 * table installation is passed in the "fcol" field */
1828 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1829 waitreq->fcol = retcode;
1830 rf_lock_mutex2(rf_sparet_wait_mutex);
1831 waitreq->next = rf_sparet_resp_queue;
1832 rf_sparet_resp_queue = waitreq;
1833 rf_broadcast_cond2(rf_sparet_resp_cv);
1834 rf_unlock_mutex2(rf_sparet_wait_mutex);
1835
1836 return (retcode);
1837 #endif
1838
1839 default:
1840 break; /* fall through to the os-specific code below */
1841
1842 }
1843
1844 if (!raidPtr->valid)
1845 return (EINVAL);
1846
1847 /*
1848 * Add support for "regular" device ioctls here.
1849 */
1850
1851 error = disk_ioctl(&rs->sc_dkdev, dev, cmd, data, flag, l);
1852 if (error != EPASSTHROUGH)
1853 return (error);
1854
1855 switch (cmd) {
1856 case DIOCWDINFO:
1857 case DIOCSDINFO:
1858 #ifdef __HAVE_OLD_DISKLABEL
1859 case ODIOCWDINFO:
1860 case ODIOCSDINFO:
1861 #endif
1862 {
1863 struct disklabel *lp;
1864 #ifdef __HAVE_OLD_DISKLABEL
1865 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1866 memset(&newlabel, 0, sizeof newlabel);
1867 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1868 lp = &newlabel;
1869 } else
1870 #endif
1871 lp = (struct disklabel *)data;
1872
1873 if ((error = raidlock(rs)) != 0)
1874 return (error);
1875
1876 rs->sc_flags |= RAIDF_LABELLING;
1877
1878 error = setdisklabel(rs->sc_dkdev.dk_label,
1879 lp, 0, rs->sc_dkdev.dk_cpulabel);
1880 if (error == 0) {
1881 if (cmd == DIOCWDINFO
1882 #ifdef __HAVE_OLD_DISKLABEL
1883 || cmd == ODIOCWDINFO
1884 #endif
1885 )
1886 error = writedisklabel(RAIDLABELDEV(dev),
1887 raidstrategy, rs->sc_dkdev.dk_label,
1888 rs->sc_dkdev.dk_cpulabel);
1889 }
1890 rs->sc_flags &= ~RAIDF_LABELLING;
1891
1892 raidunlock(rs);
1893
1894 if (error)
1895 return (error);
1896 break;
1897 }
1898
1899 case DIOCWLABEL:
1900 if (*(int *) data != 0)
1901 rs->sc_flags |= RAIDF_WLABEL;
1902 else
1903 rs->sc_flags &= ~RAIDF_WLABEL;
1904 break;
1905
1906 case DIOCGDEFLABEL:
1907 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1908 break;
1909
1910 #ifdef __HAVE_OLD_DISKLABEL
1911 case ODIOCGDEFLABEL:
1912 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1913 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1914 return ENOTTY;
1915 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1916 break;
1917 #endif
1918
1919 case DIOCCACHESYNC:
1920 return rf_sync_component_caches(raidPtr);
1921
1922 case DIOCGSTRATEGY:
1923 {
1924 struct disk_strategy *dks = (void *)data;
1925
1926 s = splbio();
1927 strlcpy(dks->dks_name, bufq_getstrategyname(rs->buf_queue),
1928 sizeof(dks->dks_name));
1929 splx(s);
1930 dks->dks_paramlen = 0;
1931
1932 return 0;
1933 }
1934
1935 case DIOCSSTRATEGY:
1936 {
1937 struct disk_strategy *dks = (void *)data;
1938 struct bufq_state *new;
1939 struct bufq_state *old;
1940
1941 if (dks->dks_param != NULL) {
1942 return EINVAL;
1943 }
1944 dks->dks_name[sizeof(dks->dks_name) - 1] = 0; /* ensure term */
1945 error = bufq_alloc(&new, dks->dks_name,
1946 BUFQ_EXACT|BUFQ_SORT_RAWBLOCK);
1947 if (error) {
1948 return error;
1949 }
1950 s = splbio();
1951 old = rs->buf_queue;
1952 bufq_move(new, old);
1953 rs->buf_queue = new;
1954 splx(s);
1955 bufq_free(old);
1956
1957 return 0;
1958 }
1959
1960 default:
1961 retcode = ENOTTY;
1962 }
1963 return (retcode);
1964
1965 }
1966
1967
1968 /* raidinit -- complete the rest of the initialization for the
1969 RAIDframe device. */
1970
1971
1972 static void
1973 raidinit(struct raid_softc *rs)
1974 {
1975 cfdata_t cf;
1976 int unit;
1977 RF_Raid_t *raidPtr = &rs->sc_r;
1978
1979 unit = raidPtr->raidid;
1980
1981
1982 /* XXX should check return code first... */
1983 rs->sc_flags |= RAIDF_INITED;
1984
1985 /* XXX doesn't check bounds. */
1986 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1987
1988 /* attach the pseudo device */
1989 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1990 cf->cf_name = raid_cd.cd_name;
1991 cf->cf_atname = raid_cd.cd_name;
1992 cf->cf_unit = unit;
1993 cf->cf_fstate = FSTATE_STAR;
1994
1995 rs->sc_dev = config_attach_pseudo(cf);
1996
1997 if (rs->sc_dev == NULL) {
1998 printf("raid%d: config_attach_pseudo failed\n",
1999 raidPtr->raidid);
2000 rs->sc_flags &= ~RAIDF_INITED;
2001 free(cf, M_RAIDFRAME);
2002 return;
2003 }
2004
2005 /* disk_attach actually creates space for the CPU disklabel, among
2006 * other things, so it's critical to call this *BEFORE* we try putzing
2007 * with disklabels. */
2008
2009 disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver);
2010 disk_attach(&rs->sc_dkdev);
2011
2012 /* XXX There may be a weird interaction here between this, and
2013 * protectedSectors, as used in RAIDframe. */
2014
2015 rs->sc_size = raidPtr->totalSectors;
2016
2017 rf_set_geometry(rs, raidPtr);
2018
2019 dkwedge_discover(&rs->sc_dkdev);
2020
2021 }
2022 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
2023 /* wake up the daemon & tell it to get us a spare table
2024 * XXX
2025 * the entries in the queues should be tagged with the raidPtr
2026 * so that in the extremely rare case that two recons happen at once,
2027 * we know for which device were requesting a spare table
2028 * XXX
2029 *
2030 * XXX This code is not currently used. GO
2031 */
2032 int
2033 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
2034 {
2035 int retcode;
2036
2037 rf_lock_mutex2(rf_sparet_wait_mutex);
2038 req->next = rf_sparet_wait_queue;
2039 rf_sparet_wait_queue = req;
2040 rf_broadcast_cond2(rf_sparet_wait_cv);
2041
2042 /* mpsleep unlocks the mutex */
2043 while (!rf_sparet_resp_queue) {
2044 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
2045 }
2046 req = rf_sparet_resp_queue;
2047 rf_sparet_resp_queue = req->next;
2048 rf_unlock_mutex2(rf_sparet_wait_mutex);
2049
2050 retcode = req->fcol;
2051 RF_Free(req, sizeof(*req)); /* this is not the same req as we
2052 * alloc'd */
2053 return (retcode);
2054 }
2055 #endif
2056
2057 /* a wrapper around rf_DoAccess that extracts appropriate info from the
2058 * bp & passes it down.
2059 * any calls originating in the kernel must use non-blocking I/O
2060 * do some extra sanity checking to return "appropriate" error values for
2061 * certain conditions (to make some standard utilities work)
2062 *
2063 * Formerly known as: rf_DoAccessKernel
2064 */
2065 void
2066 raidstart(RF_Raid_t *raidPtr)
2067 {
2068 RF_SectorCount_t num_blocks, pb, sum;
2069 RF_RaidAddr_t raid_addr;
2070 struct partition *pp;
2071 daddr_t blocknum;
2072 struct raid_softc *rs;
2073 int do_async;
2074 struct buf *bp;
2075 int rc;
2076
2077 rs = raidPtr->softc;
2078 /* quick check to see if anything has died recently */
2079 rf_lock_mutex2(raidPtr->mutex);
2080 if (raidPtr->numNewFailures > 0) {
2081 rf_unlock_mutex2(raidPtr->mutex);
2082 rf_update_component_labels(raidPtr,
2083 RF_NORMAL_COMPONENT_UPDATE);
2084 rf_lock_mutex2(raidPtr->mutex);
2085 raidPtr->numNewFailures--;
2086 }
2087
2088 /* Check to see if we're at the limit... */
2089 while (raidPtr->openings > 0) {
2090 rf_unlock_mutex2(raidPtr->mutex);
2091
2092 /* get the next item, if any, from the queue */
2093 if ((bp = bufq_get(rs->buf_queue)) == NULL) {
2094 /* nothing more to do */
2095 return;
2096 }
2097
2098 /* Ok, for the bp we have here, bp->b_blkno is relative to the
2099 * partition.. Need to make it absolute to the underlying
2100 * device.. */
2101
2102 blocknum = bp->b_blkno << DEV_BSHIFT >> raidPtr->logBytesPerSector;
2103 if (DISKPART(bp->b_dev) != RAW_PART) {
2104 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
2105 blocknum += pp->p_offset;
2106 }
2107
2108 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
2109 (int) blocknum));
2110
2111 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
2112 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
2113
2114 /* *THIS* is where we adjust what block we're going to...
2115 * but DO NOT TOUCH bp->b_blkno!!! */
2116 raid_addr = blocknum;
2117
2118 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
2119 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
2120 sum = raid_addr + num_blocks + pb;
2121 if (1 || rf_debugKernelAccess) {
2122 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
2123 (int) raid_addr, (int) sum, (int) num_blocks,
2124 (int) pb, (int) bp->b_resid));
2125 }
2126 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
2127 || (sum < num_blocks) || (sum < pb)) {
2128 bp->b_error = ENOSPC;
2129 bp->b_resid = bp->b_bcount;
2130 biodone(bp);
2131 rf_lock_mutex2(raidPtr->mutex);
2132 continue;
2133 }
2134 /*
2135 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
2136 */
2137
2138 if (bp->b_bcount & raidPtr->sectorMask) {
2139 bp->b_error = EINVAL;
2140 bp->b_resid = bp->b_bcount;
2141 biodone(bp);
2142 rf_lock_mutex2(raidPtr->mutex);
2143 continue;
2144
2145 }
2146 db1_printf(("Calling DoAccess..\n"));
2147
2148
2149 rf_lock_mutex2(raidPtr->mutex);
2150 raidPtr->openings--;
2151 rf_unlock_mutex2(raidPtr->mutex);
2152
2153 /*
2154 * Everything is async.
2155 */
2156 do_async = 1;
2157
2158 disk_busy(&rs->sc_dkdev);
2159
2160 /* XXX we're still at splbio() here... do we *really*
2161 need to be? */
2162
2163 /* don't ever condition on bp->b_flags & B_WRITE.
2164 * always condition on B_READ instead */
2165
2166 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
2167 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
2168 do_async, raid_addr, num_blocks,
2169 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
2170
2171 if (rc) {
2172 bp->b_error = rc;
2173 bp->b_resid = bp->b_bcount;
2174 biodone(bp);
2175 /* continue loop */
2176 }
2177
2178 rf_lock_mutex2(raidPtr->mutex);
2179 }
2180 rf_unlock_mutex2(raidPtr->mutex);
2181 }
2182
2183
2184
2185
2186 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
2187
2188 int
2189 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
2190 {
2191 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2192 struct buf *bp;
2193
2194 req->queue = queue;
2195 bp = req->bp;
2196
2197 switch (req->type) {
2198 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2199 /* XXX need to do something extra here.. */
2200 /* I'm leaving this in, as I've never actually seen it used,
2201 * and I'd like folks to report it... GO */
2202 printf(("WAKEUP CALLED\n"));
2203 queue->numOutstanding++;
2204
2205 bp->b_flags = 0;
2206 bp->b_private = req;
2207
2208 KernelWakeupFunc(bp);
2209 break;
2210
2211 case RF_IO_TYPE_READ:
2212 case RF_IO_TYPE_WRITE:
2213 #if RF_ACC_TRACE > 0
2214 if (req->tracerec) {
2215 RF_ETIMER_START(req->tracerec->timer);
2216 }
2217 #endif
2218 InitBP(bp, queue->rf_cinfo->ci_vp,
2219 op, queue->rf_cinfo->ci_dev,
2220 req->sectorOffset, req->numSector,
2221 req->buf, KernelWakeupFunc, (void *) req,
2222 queue->raidPtr->logBytesPerSector, req->b_proc);
2223
2224 if (rf_debugKernelAccess) {
2225 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2226 (long) bp->b_blkno));
2227 }
2228 queue->numOutstanding++;
2229 queue->last_deq_sector = req->sectorOffset;
2230 /* acc wouldn't have been let in if there were any pending
2231 * reqs at any other priority */
2232 queue->curPriority = req->priority;
2233
2234 db1_printf(("Going for %c to unit %d col %d\n",
2235 req->type, queue->raidPtr->raidid,
2236 queue->col));
2237 db1_printf(("sector %d count %d (%d bytes) %d\n",
2238 (int) req->sectorOffset, (int) req->numSector,
2239 (int) (req->numSector <<
2240 queue->raidPtr->logBytesPerSector),
2241 (int) queue->raidPtr->logBytesPerSector));
2242
2243 /*
2244 * XXX: drop lock here since this can block at
2245 * least with backing SCSI devices. Retake it
2246 * to minimize fuss with calling interfaces.
2247 */
2248
2249 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2250 bdev_strategy(bp);
2251 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2252 break;
2253
2254 default:
2255 panic("bad req->type in rf_DispatchKernelIO");
2256 }
2257 db1_printf(("Exiting from DispatchKernelIO\n"));
2258
2259 return (0);
2260 }
2261 /* this is the callback function associated with a I/O invoked from
2262 kernel code.
2263 */
2264 static void
2265 KernelWakeupFunc(struct buf *bp)
2266 {
2267 RF_DiskQueueData_t *req = NULL;
2268 RF_DiskQueue_t *queue;
2269
2270 db1_printf(("recovering the request queue:\n"));
2271
2272 req = bp->b_private;
2273
2274 queue = (RF_DiskQueue_t *) req->queue;
2275
2276 rf_lock_mutex2(queue->raidPtr->iodone_lock);
2277
2278 #if RF_ACC_TRACE > 0
2279 if (req->tracerec) {
2280 RF_ETIMER_STOP(req->tracerec->timer);
2281 RF_ETIMER_EVAL(req->tracerec->timer);
2282 rf_lock_mutex2(rf_tracing_mutex);
2283 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2284 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2285 req->tracerec->num_phys_ios++;
2286 rf_unlock_mutex2(rf_tracing_mutex);
2287 }
2288 #endif
2289
2290 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2291 * ballistic, and mark the component as hosed... */
2292
2293 if (bp->b_error != 0) {
2294 /* Mark the disk as dead */
2295 /* but only mark it once... */
2296 /* and only if it wouldn't leave this RAID set
2297 completely broken */
2298 if (((queue->raidPtr->Disks[queue->col].status ==
2299 rf_ds_optimal) ||
2300 (queue->raidPtr->Disks[queue->col].status ==
2301 rf_ds_used_spare)) &&
2302 (queue->raidPtr->numFailures <
2303 queue->raidPtr->Layout.map->faultsTolerated)) {
2304 printf("raid%d: IO Error (%d). Marking %s as failed.\n",
2305 queue->raidPtr->raidid,
2306 bp->b_error,
2307 queue->raidPtr->Disks[queue->col].devname);
2308 queue->raidPtr->Disks[queue->col].status =
2309 rf_ds_failed;
2310 queue->raidPtr->status = rf_rs_degraded;
2311 queue->raidPtr->numFailures++;
2312 queue->raidPtr->numNewFailures++;
2313 } else { /* Disk is already dead... */
2314 /* printf("Disk already marked as dead!\n"); */
2315 }
2316
2317 }
2318
2319 /* Fill in the error value */
2320 req->error = bp->b_error;
2321
2322 /* Drop this one on the "finished" queue... */
2323 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2324
2325 /* Let the raidio thread know there is work to be done. */
2326 rf_signal_cond2(queue->raidPtr->iodone_cv);
2327
2328 rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2329 }
2330
2331
2332 /*
2333 * initialize a buf structure for doing an I/O in the kernel.
2334 */
2335 static void
2336 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2337 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2338 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2339 struct proc *b_proc)
2340 {
2341 /* bp->b_flags = B_PHYS | rw_flag; */
2342 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */
2343 bp->b_oflags = 0;
2344 bp->b_cflags = 0;
2345 bp->b_bcount = numSect << logBytesPerSector;
2346 bp->b_bufsize = bp->b_bcount;
2347 bp->b_error = 0;
2348 bp->b_dev = dev;
2349 bp->b_data = bf;
2350 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2351 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2352 if (bp->b_bcount == 0) {
2353 panic("bp->b_bcount is zero in InitBP!!");
2354 }
2355 bp->b_proc = b_proc;
2356 bp->b_iodone = cbFunc;
2357 bp->b_private = cbArg;
2358 }
2359
2360 static void
2361 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2362 struct disklabel *lp)
2363 {
2364 memset(lp, 0, sizeof(*lp));
2365
2366 /* fabricate a label... */
2367 if (raidPtr->totalSectors > UINT32_MAX)
2368 lp->d_secperunit = UINT32_MAX;
2369 else
2370 lp->d_secperunit = raidPtr->totalSectors;
2371 lp->d_secsize = raidPtr->bytesPerSector;
2372 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2373 lp->d_ntracks = 4 * raidPtr->numCol;
2374 lp->d_ncylinders = raidPtr->totalSectors /
2375 (lp->d_nsectors * lp->d_ntracks);
2376 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2377
2378 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2379 lp->d_type = DKTYPE_RAID;
2380 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2381 lp->d_rpm = 3600;
2382 lp->d_interleave = 1;
2383 lp->d_flags = 0;
2384
2385 lp->d_partitions[RAW_PART].p_offset = 0;
2386 lp->d_partitions[RAW_PART].p_size = lp->d_secperunit;
2387 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2388 lp->d_npartitions = RAW_PART + 1;
2389
2390 lp->d_magic = DISKMAGIC;
2391 lp->d_magic2 = DISKMAGIC;
2392 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2393
2394 }
2395 /*
2396 * Read the disklabel from the raid device. If one is not present, fake one
2397 * up.
2398 */
2399 static void
2400 raidgetdisklabel(dev_t dev)
2401 {
2402 int unit = raidunit(dev);
2403 struct raid_softc *rs;
2404 const char *errstring;
2405 struct disklabel *lp;
2406 struct cpu_disklabel *clp;
2407 RF_Raid_t *raidPtr;
2408
2409 if ((rs = raidget(unit, false)) == NULL)
2410 return;
2411
2412 lp = rs->sc_dkdev.dk_label;
2413 clp = rs->sc_dkdev.dk_cpulabel;
2414
2415 db1_printf(("Getting the disklabel...\n"));
2416
2417 memset(clp, 0, sizeof(*clp));
2418
2419 raidPtr = &rs->sc_r;
2420
2421 raidgetdefaultlabel(raidPtr, rs, lp);
2422
2423 /*
2424 * Call the generic disklabel extraction routine.
2425 */
2426 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2427 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2428 if (errstring)
2429 raidmakedisklabel(rs);
2430 else {
2431 int i;
2432 struct partition *pp;
2433
2434 /*
2435 * Sanity check whether the found disklabel is valid.
2436 *
2437 * This is necessary since total size of the raid device
2438 * may vary when an interleave is changed even though exactly
2439 * same components are used, and old disklabel may used
2440 * if that is found.
2441 */
2442 if (lp->d_secperunit < UINT32_MAX ?
2443 lp->d_secperunit != rs->sc_size :
2444 lp->d_secperunit > rs->sc_size)
2445 printf("raid%d: WARNING: %s: "
2446 "total sector size in disklabel (%ju) != "
2447 "the size of raid (%ju)\n", unit, rs->sc_xname,
2448 (uintmax_t)lp->d_secperunit,
2449 (uintmax_t)rs->sc_size);
2450 for (i = 0; i < lp->d_npartitions; i++) {
2451 pp = &lp->d_partitions[i];
2452 if (pp->p_offset + pp->p_size > rs->sc_size)
2453 printf("raid%d: WARNING: %s: end of partition `%c' "
2454 "exceeds the size of raid (%ju)\n",
2455 unit, rs->sc_xname, 'a' + i,
2456 (uintmax_t)rs->sc_size);
2457 }
2458 }
2459
2460 }
2461 /*
2462 * Take care of things one might want to take care of in the event
2463 * that a disklabel isn't present.
2464 */
2465 static void
2466 raidmakedisklabel(struct raid_softc *rs)
2467 {
2468 struct disklabel *lp = rs->sc_dkdev.dk_label;
2469 db1_printf(("Making a label..\n"));
2470
2471 /*
2472 * For historical reasons, if there's no disklabel present
2473 * the raw partition must be marked FS_BSDFFS.
2474 */
2475
2476 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2477
2478 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2479
2480 lp->d_checksum = dkcksum(lp);
2481 }
2482 /*
2483 * Wait interruptibly for an exclusive lock.
2484 *
2485 * XXX
2486 * Several drivers do this; it should be abstracted and made MP-safe.
2487 * (Hmm... where have we seen this warning before :-> GO )
2488 */
2489 static int
2490 raidlock(struct raid_softc *rs)
2491 {
2492 int error;
2493
2494 mutex_enter(&rs->sc_mutex);
2495 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2496 rs->sc_flags |= RAIDF_WANTED;
2497 error = cv_wait_sig(&rs->sc_cv, &rs->sc_mutex);
2498 if (error != 0)
2499 return (error);
2500 }
2501 rs->sc_flags |= RAIDF_LOCKED;
2502 mutex_exit(&rs->sc_mutex);
2503 return (0);
2504 }
2505 /*
2506 * Unlock and wake up any waiters.
2507 */
2508 static void
2509 raidunlock(struct raid_softc *rs)
2510 {
2511
2512 mutex_enter(&rs->sc_mutex);
2513 rs->sc_flags &= ~RAIDF_LOCKED;
2514 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2515 rs->sc_flags &= ~RAIDF_WANTED;
2516 cv_broadcast(&rs->sc_cv);
2517 }
2518 mutex_exit(&rs->sc_mutex);
2519 }
2520
2521
2522 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2523 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2524 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2525
2526 static daddr_t
2527 rf_component_info_offset(void)
2528 {
2529
2530 return RF_COMPONENT_INFO_OFFSET;
2531 }
2532
2533 static daddr_t
2534 rf_component_info_size(unsigned secsize)
2535 {
2536 daddr_t info_size;
2537
2538 KASSERT(secsize);
2539 if (secsize > RF_COMPONENT_INFO_SIZE)
2540 info_size = secsize;
2541 else
2542 info_size = RF_COMPONENT_INFO_SIZE;
2543
2544 return info_size;
2545 }
2546
2547 static daddr_t
2548 rf_parity_map_offset(RF_Raid_t *raidPtr)
2549 {
2550 daddr_t map_offset;
2551
2552 KASSERT(raidPtr->bytesPerSector);
2553 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2554 map_offset = raidPtr->bytesPerSector;
2555 else
2556 map_offset = RF_COMPONENT_INFO_SIZE;
2557 map_offset += rf_component_info_offset();
2558
2559 return map_offset;
2560 }
2561
2562 static daddr_t
2563 rf_parity_map_size(RF_Raid_t *raidPtr)
2564 {
2565 daddr_t map_size;
2566
2567 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2568 map_size = raidPtr->bytesPerSector;
2569 else
2570 map_size = RF_PARITY_MAP_SIZE;
2571
2572 return map_size;
2573 }
2574
2575 int
2576 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2577 {
2578 RF_ComponentLabel_t *clabel;
2579
2580 clabel = raidget_component_label(raidPtr, col);
2581 clabel->clean = RF_RAID_CLEAN;
2582 raidflush_component_label(raidPtr, col);
2583 return(0);
2584 }
2585
2586
2587 int
2588 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2589 {
2590 RF_ComponentLabel_t *clabel;
2591
2592 clabel = raidget_component_label(raidPtr, col);
2593 clabel->clean = RF_RAID_DIRTY;
2594 raidflush_component_label(raidPtr, col);
2595 return(0);
2596 }
2597
2598 int
2599 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2600 {
2601 KASSERT(raidPtr->bytesPerSector);
2602 return raidread_component_label(raidPtr->bytesPerSector,
2603 raidPtr->Disks[col].dev,
2604 raidPtr->raid_cinfo[col].ci_vp,
2605 &raidPtr->raid_cinfo[col].ci_label);
2606 }
2607
2608 RF_ComponentLabel_t *
2609 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2610 {
2611 return &raidPtr->raid_cinfo[col].ci_label;
2612 }
2613
2614 int
2615 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2616 {
2617 RF_ComponentLabel_t *label;
2618
2619 label = &raidPtr->raid_cinfo[col].ci_label;
2620 label->mod_counter = raidPtr->mod_counter;
2621 #ifndef RF_NO_PARITY_MAP
2622 label->parity_map_modcount = label->mod_counter;
2623 #endif
2624 return raidwrite_component_label(raidPtr->bytesPerSector,
2625 raidPtr->Disks[col].dev,
2626 raidPtr->raid_cinfo[col].ci_vp, label);
2627 }
2628
2629
2630 static int
2631 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2632 RF_ComponentLabel_t *clabel)
2633 {
2634 return raidread_component_area(dev, b_vp, clabel,
2635 sizeof(RF_ComponentLabel_t),
2636 rf_component_info_offset(),
2637 rf_component_info_size(secsize));
2638 }
2639
2640 /* ARGSUSED */
2641 static int
2642 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2643 size_t msize, daddr_t offset, daddr_t dsize)
2644 {
2645 struct buf *bp;
2646 int error;
2647
2648 /* XXX should probably ensure that we don't try to do this if
2649 someone has changed rf_protected_sectors. */
2650
2651 if (b_vp == NULL) {
2652 /* For whatever reason, this component is not valid.
2653 Don't try to read a component label from it. */
2654 return(EINVAL);
2655 }
2656
2657 /* get a block of the appropriate size... */
2658 bp = geteblk((int)dsize);
2659 bp->b_dev = dev;
2660
2661 /* get our ducks in a row for the read */
2662 bp->b_blkno = offset / DEV_BSIZE;
2663 bp->b_bcount = dsize;
2664 bp->b_flags |= B_READ;
2665 bp->b_resid = dsize;
2666
2667 bdev_strategy(bp);
2668 error = biowait(bp);
2669
2670 if (!error) {
2671 memcpy(data, bp->b_data, msize);
2672 }
2673
2674 brelse(bp, 0);
2675 return(error);
2676 }
2677
2678
2679 static int
2680 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2681 RF_ComponentLabel_t *clabel)
2682 {
2683 return raidwrite_component_area(dev, b_vp, clabel,
2684 sizeof(RF_ComponentLabel_t),
2685 rf_component_info_offset(),
2686 rf_component_info_size(secsize), 0);
2687 }
2688
2689 /* ARGSUSED */
2690 static int
2691 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2692 size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2693 {
2694 struct buf *bp;
2695 int error;
2696
2697 /* get a block of the appropriate size... */
2698 bp = geteblk((int)dsize);
2699 bp->b_dev = dev;
2700
2701 /* get our ducks in a row for the write */
2702 bp->b_blkno = offset / DEV_BSIZE;
2703 bp->b_bcount = dsize;
2704 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2705 bp->b_resid = dsize;
2706
2707 memset(bp->b_data, 0, dsize);
2708 memcpy(bp->b_data, data, msize);
2709
2710 bdev_strategy(bp);
2711 if (asyncp)
2712 return 0;
2713 error = biowait(bp);
2714 brelse(bp, 0);
2715 if (error) {
2716 #if 1
2717 printf("Failed to write RAID component info!\n");
2718 #endif
2719 }
2720
2721 return(error);
2722 }
2723
2724 void
2725 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2726 {
2727 int c;
2728
2729 for (c = 0; c < raidPtr->numCol; c++) {
2730 /* Skip dead disks. */
2731 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2732 continue;
2733 /* XXXjld: what if an error occurs here? */
2734 raidwrite_component_area(raidPtr->Disks[c].dev,
2735 raidPtr->raid_cinfo[c].ci_vp, map,
2736 RF_PARITYMAP_NBYTE,
2737 rf_parity_map_offset(raidPtr),
2738 rf_parity_map_size(raidPtr), 0);
2739 }
2740 }
2741
2742 void
2743 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2744 {
2745 struct rf_paritymap_ondisk tmp;
2746 int c,first;
2747
2748 first=1;
2749 for (c = 0; c < raidPtr->numCol; c++) {
2750 /* Skip dead disks. */
2751 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2752 continue;
2753 raidread_component_area(raidPtr->Disks[c].dev,
2754 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2755 RF_PARITYMAP_NBYTE,
2756 rf_parity_map_offset(raidPtr),
2757 rf_parity_map_size(raidPtr));
2758 if (first) {
2759 memcpy(map, &tmp, sizeof(*map));
2760 first = 0;
2761 } else {
2762 rf_paritymap_merge(map, &tmp);
2763 }
2764 }
2765 }
2766
2767 void
2768 rf_markalldirty(RF_Raid_t *raidPtr)
2769 {
2770 RF_ComponentLabel_t *clabel;
2771 int sparecol;
2772 int c;
2773 int j;
2774 int scol = -1;
2775
2776 raidPtr->mod_counter++;
2777 for (c = 0; c < raidPtr->numCol; c++) {
2778 /* we don't want to touch (at all) a disk that has
2779 failed */
2780 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2781 clabel = raidget_component_label(raidPtr, c);
2782 if (clabel->status == rf_ds_spared) {
2783 /* XXX do something special...
2784 but whatever you do, don't
2785 try to access it!! */
2786 } else {
2787 raidmarkdirty(raidPtr, c);
2788 }
2789 }
2790 }
2791
2792 for( c = 0; c < raidPtr->numSpare ; c++) {
2793 sparecol = raidPtr->numCol + c;
2794 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2795 /*
2796
2797 we claim this disk is "optimal" if it's
2798 rf_ds_used_spare, as that means it should be
2799 directly substitutable for the disk it replaced.
2800 We note that too...
2801
2802 */
2803
2804 for(j=0;j<raidPtr->numCol;j++) {
2805 if (raidPtr->Disks[j].spareCol == sparecol) {
2806 scol = j;
2807 break;
2808 }
2809 }
2810
2811 clabel = raidget_component_label(raidPtr, sparecol);
2812 /* make sure status is noted */
2813
2814 raid_init_component_label(raidPtr, clabel);
2815
2816 clabel->row = 0;
2817 clabel->column = scol;
2818 /* Note: we *don't* change status from rf_ds_used_spare
2819 to rf_ds_optimal */
2820 /* clabel.status = rf_ds_optimal; */
2821
2822 raidmarkdirty(raidPtr, sparecol);
2823 }
2824 }
2825 }
2826
2827
2828 void
2829 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2830 {
2831 RF_ComponentLabel_t *clabel;
2832 int sparecol;
2833 int c;
2834 int j;
2835 int scol;
2836
2837 scol = -1;
2838
2839 /* XXX should do extra checks to make sure things really are clean,
2840 rather than blindly setting the clean bit... */
2841
2842 raidPtr->mod_counter++;
2843
2844 for (c = 0; c < raidPtr->numCol; c++) {
2845 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2846 clabel = raidget_component_label(raidPtr, c);
2847 /* make sure status is noted */
2848 clabel->status = rf_ds_optimal;
2849
2850 /* note what unit we are configured as */
2851 clabel->last_unit = raidPtr->raidid;
2852
2853 raidflush_component_label(raidPtr, c);
2854 if (final == RF_FINAL_COMPONENT_UPDATE) {
2855 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2856 raidmarkclean(raidPtr, c);
2857 }
2858 }
2859 }
2860 /* else we don't touch it.. */
2861 }
2862
2863 for( c = 0; c < raidPtr->numSpare ; c++) {
2864 sparecol = raidPtr->numCol + c;
2865 /* Need to ensure that the reconstruct actually completed! */
2866 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2867 /*
2868
2869 we claim this disk is "optimal" if it's
2870 rf_ds_used_spare, as that means it should be
2871 directly substitutable for the disk it replaced.
2872 We note that too...
2873
2874 */
2875
2876 for(j=0;j<raidPtr->numCol;j++) {
2877 if (raidPtr->Disks[j].spareCol == sparecol) {
2878 scol = j;
2879 break;
2880 }
2881 }
2882
2883 /* XXX shouldn't *really* need this... */
2884 clabel = raidget_component_label(raidPtr, sparecol);
2885 /* make sure status is noted */
2886
2887 raid_init_component_label(raidPtr, clabel);
2888
2889 clabel->column = scol;
2890 clabel->status = rf_ds_optimal;
2891 clabel->last_unit = raidPtr->raidid;
2892
2893 raidflush_component_label(raidPtr, sparecol);
2894 if (final == RF_FINAL_COMPONENT_UPDATE) {
2895 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2896 raidmarkclean(raidPtr, sparecol);
2897 }
2898 }
2899 }
2900 }
2901 }
2902
2903 void
2904 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2905 {
2906
2907 if (vp != NULL) {
2908 if (auto_configured == 1) {
2909 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2910 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2911 vput(vp);
2912
2913 } else {
2914 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2915 }
2916 }
2917 }
2918
2919
2920 void
2921 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2922 {
2923 int r,c;
2924 struct vnode *vp;
2925 int acd;
2926
2927
2928 /* We take this opportunity to close the vnodes like we should.. */
2929
2930 for (c = 0; c < raidPtr->numCol; c++) {
2931 vp = raidPtr->raid_cinfo[c].ci_vp;
2932 acd = raidPtr->Disks[c].auto_configured;
2933 rf_close_component(raidPtr, vp, acd);
2934 raidPtr->raid_cinfo[c].ci_vp = NULL;
2935 raidPtr->Disks[c].auto_configured = 0;
2936 }
2937
2938 for (r = 0; r < raidPtr->numSpare; r++) {
2939 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2940 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2941 rf_close_component(raidPtr, vp, acd);
2942 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2943 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2944 }
2945 }
2946
2947
2948 void
2949 rf_ReconThread(struct rf_recon_req *req)
2950 {
2951 int s;
2952 RF_Raid_t *raidPtr;
2953
2954 s = splbio();
2955 raidPtr = (RF_Raid_t *) req->raidPtr;
2956 raidPtr->recon_in_progress = 1;
2957
2958 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2959 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2960
2961 RF_Free(req, sizeof(*req));
2962
2963 raidPtr->recon_in_progress = 0;
2964 splx(s);
2965
2966 /* That's all... */
2967 kthread_exit(0); /* does not return */
2968 }
2969
2970 void
2971 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2972 {
2973 int retcode;
2974 int s;
2975
2976 raidPtr->parity_rewrite_stripes_done = 0;
2977 raidPtr->parity_rewrite_in_progress = 1;
2978 s = splbio();
2979 retcode = rf_RewriteParity(raidPtr);
2980 splx(s);
2981 if (retcode) {
2982 printf("raid%d: Error re-writing parity (%d)!\n",
2983 raidPtr->raidid, retcode);
2984 } else {
2985 /* set the clean bit! If we shutdown correctly,
2986 the clean bit on each component label will get
2987 set */
2988 raidPtr->parity_good = RF_RAID_CLEAN;
2989 }
2990 raidPtr->parity_rewrite_in_progress = 0;
2991
2992 /* Anyone waiting for us to stop? If so, inform them... */
2993 if (raidPtr->waitShutdown) {
2994 wakeup(&raidPtr->parity_rewrite_in_progress);
2995 }
2996
2997 /* That's all... */
2998 kthread_exit(0); /* does not return */
2999 }
3000
3001
3002 void
3003 rf_CopybackThread(RF_Raid_t *raidPtr)
3004 {
3005 int s;
3006
3007 raidPtr->copyback_in_progress = 1;
3008 s = splbio();
3009 rf_CopybackReconstructedData(raidPtr);
3010 splx(s);
3011 raidPtr->copyback_in_progress = 0;
3012
3013 /* That's all... */
3014 kthread_exit(0); /* does not return */
3015 }
3016
3017
3018 void
3019 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
3020 {
3021 int s;
3022 RF_Raid_t *raidPtr;
3023
3024 s = splbio();
3025 raidPtr = req->raidPtr;
3026 raidPtr->recon_in_progress = 1;
3027 rf_ReconstructInPlace(raidPtr, req->col);
3028 RF_Free(req, sizeof(*req));
3029 raidPtr->recon_in_progress = 0;
3030 splx(s);
3031
3032 /* That's all... */
3033 kthread_exit(0); /* does not return */
3034 }
3035
3036 static RF_AutoConfig_t *
3037 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
3038 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
3039 unsigned secsize)
3040 {
3041 int good_one = 0;
3042 RF_ComponentLabel_t *clabel;
3043 RF_AutoConfig_t *ac;
3044
3045 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
3046 if (clabel == NULL) {
3047 oomem:
3048 while(ac_list) {
3049 ac = ac_list;
3050 if (ac->clabel)
3051 free(ac->clabel, M_RAIDFRAME);
3052 ac_list = ac_list->next;
3053 free(ac, M_RAIDFRAME);
3054 }
3055 printf("RAID auto config: out of memory!\n");
3056 return NULL; /* XXX probably should panic? */
3057 }
3058
3059 if (!raidread_component_label(secsize, dev, vp, clabel)) {
3060 /* Got the label. Does it look reasonable? */
3061 if (rf_reasonable_label(clabel, numsecs) &&
3062 (rf_component_label_partitionsize(clabel) <= size)) {
3063 #ifdef DEBUG
3064 printf("Component on: %s: %llu\n",
3065 cname, (unsigned long long)size);
3066 rf_print_component_label(clabel);
3067 #endif
3068 /* if it's reasonable, add it, else ignore it. */
3069 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
3070 M_NOWAIT);
3071 if (ac == NULL) {
3072 free(clabel, M_RAIDFRAME);
3073 goto oomem;
3074 }
3075 strlcpy(ac->devname, cname, sizeof(ac->devname));
3076 ac->dev = dev;
3077 ac->vp = vp;
3078 ac->clabel = clabel;
3079 ac->next = ac_list;
3080 ac_list = ac;
3081 good_one = 1;
3082 }
3083 }
3084 if (!good_one) {
3085 /* cleanup */
3086 free(clabel, M_RAIDFRAME);
3087 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3088 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3089 vput(vp);
3090 }
3091 return ac_list;
3092 }
3093
3094 RF_AutoConfig_t *
3095 rf_find_raid_components(void)
3096 {
3097 struct vnode *vp;
3098 struct disklabel label;
3099 device_t dv;
3100 deviter_t di;
3101 dev_t dev;
3102 int bmajor, bminor, wedge, rf_part_found;
3103 int error;
3104 int i;
3105 RF_AutoConfig_t *ac_list;
3106 uint64_t numsecs;
3107 unsigned secsize;
3108
3109 /* initialize the AutoConfig list */
3110 ac_list = NULL;
3111
3112 /* we begin by trolling through *all* the devices on the system */
3113
3114 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
3115 dv = deviter_next(&di)) {
3116
3117 /* we are only interested in disks... */
3118 if (device_class(dv) != DV_DISK)
3119 continue;
3120
3121 /* we don't care about floppies... */
3122 if (device_is_a(dv, "fd")) {
3123 continue;
3124 }
3125
3126 /* we don't care about CD's... */
3127 if (device_is_a(dv, "cd")) {
3128 continue;
3129 }
3130
3131 /* we don't care about md's... */
3132 if (device_is_a(dv, "md")) {
3133 continue;
3134 }
3135
3136 /* hdfd is the Atari/Hades floppy driver */
3137 if (device_is_a(dv, "hdfd")) {
3138 continue;
3139 }
3140
3141 /* fdisa is the Atari/Milan floppy driver */
3142 if (device_is_a(dv, "fdisa")) {
3143 continue;
3144 }
3145
3146 /* need to find the device_name_to_block_device_major stuff */
3147 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
3148
3149 rf_part_found = 0; /*No raid partition as yet*/
3150
3151 /* get a vnode for the raw partition of this disk */
3152
3153 wedge = device_is_a(dv, "dk");
3154 bminor = minor(device_unit(dv));
3155 dev = wedge ? makedev(bmajor, bminor) :
3156 MAKEDISKDEV(bmajor, bminor, RAW_PART);
3157 if (bdevvp(dev, &vp))
3158 panic("RAID can't alloc vnode");
3159
3160 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
3161
3162 if (error) {
3163 /* "Who cares." Continue looking
3164 for something that exists*/
3165 vput(vp);
3166 continue;
3167 }
3168
3169 error = getdisksize(vp, &numsecs, &secsize);
3170 if (error) {
3171 vput(vp);
3172 continue;
3173 }
3174 if (wedge) {
3175 struct dkwedge_info dkw;
3176 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
3177 NOCRED);
3178 if (error) {
3179 printf("RAIDframe: can't get wedge info for "
3180 "dev %s (%d)\n", device_xname(dv), error);
3181 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3182 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3183 vput(vp);
3184 continue;
3185 }
3186
3187 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
3188 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3189 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3190 vput(vp);
3191 continue;
3192 }
3193
3194 ac_list = rf_get_component(ac_list, dev, vp,
3195 device_xname(dv), dkw.dkw_size, numsecs, secsize);
3196 rf_part_found = 1; /*There is a raid component on this disk*/
3197 continue;
3198 }
3199
3200 /* Ok, the disk exists. Go get the disklabel. */
3201 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
3202 if (error) {
3203 /*
3204 * XXX can't happen - open() would
3205 * have errored out (or faked up one)
3206 */
3207 if (error != ENOTTY)
3208 printf("RAIDframe: can't get label for dev "
3209 "%s (%d)\n", device_xname(dv), error);
3210 }
3211
3212 /* don't need this any more. We'll allocate it again
3213 a little later if we really do... */
3214 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3215 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3216 vput(vp);
3217
3218 if (error)
3219 continue;
3220
3221 rf_part_found = 0; /*No raid partitions yet*/
3222 for (i = 0; i < label.d_npartitions; i++) {
3223 char cname[sizeof(ac_list->devname)];
3224
3225 /* We only support partitions marked as RAID */
3226 if (label.d_partitions[i].p_fstype != FS_RAID)
3227 continue;
3228
3229 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
3230 if (bdevvp(dev, &vp))
3231 panic("RAID can't alloc vnode");
3232
3233 error = VOP_OPEN(vp, FREAD, NOCRED);
3234 if (error) {
3235 /* Whatever... */
3236 vput(vp);
3237 continue;
3238 }
3239 snprintf(cname, sizeof(cname), "%s%c",
3240 device_xname(dv), 'a' + i);
3241 ac_list = rf_get_component(ac_list, dev, vp, cname,
3242 label.d_partitions[i].p_size, numsecs, secsize);
3243 rf_part_found = 1; /*There is at least one raid partition on this disk*/
3244 }
3245
3246 /*
3247 *If there is no raid component on this disk, either in a
3248 *disklabel or inside a wedge, check the raw partition as well,
3249 *as it is possible to configure raid components on raw disk
3250 *devices.
3251 */
3252
3253 if (!rf_part_found) {
3254 char cname[sizeof(ac_list->devname)];
3255
3256 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
3257 if (bdevvp(dev, &vp))
3258 panic("RAID can't alloc vnode");
3259
3260 error = VOP_OPEN(vp, FREAD, NOCRED);
3261 if (error) {
3262 /* Whatever... */
3263 vput(vp);
3264 continue;
3265 }
3266 snprintf(cname, sizeof(cname), "%s%c",
3267 device_xname(dv), 'a' + RAW_PART);
3268 ac_list = rf_get_component(ac_list, dev, vp, cname,
3269 label.d_partitions[RAW_PART].p_size, numsecs, secsize);
3270 }
3271 }
3272 deviter_release(&di);
3273 return ac_list;
3274 }
3275
3276
3277 int
3278 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3279 {
3280
3281 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
3282 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
3283 ((clabel->clean == RF_RAID_CLEAN) ||
3284 (clabel->clean == RF_RAID_DIRTY)) &&
3285 clabel->row >=0 &&
3286 clabel->column >= 0 &&
3287 clabel->num_rows > 0 &&
3288 clabel->num_columns > 0 &&
3289 clabel->row < clabel->num_rows &&
3290 clabel->column < clabel->num_columns &&
3291 clabel->blockSize > 0 &&
3292 /*
3293 * numBlocksHi may contain garbage, but it is ok since
3294 * the type is unsigned. If it is really garbage,
3295 * rf_fix_old_label_size() will fix it.
3296 */
3297 rf_component_label_numblocks(clabel) > 0) {
3298 /*
3299 * label looks reasonable enough...
3300 * let's make sure it has no old garbage.
3301 */
3302 if (numsecs)
3303 rf_fix_old_label_size(clabel, numsecs);
3304 return(1);
3305 }
3306 return(0);
3307 }
3308
3309
3310 /*
3311 * For reasons yet unknown, some old component labels have garbage in
3312 * the newer numBlocksHi region, and this causes lossage. Since those
3313 * disks will also have numsecs set to less than 32 bits of sectors,
3314 * we can determine when this corruption has occurred, and fix it.
3315 *
3316 * The exact same problem, with the same unknown reason, happens to
3317 * the partitionSizeHi member as well.
3318 */
3319 static void
3320 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3321 {
3322
3323 if (numsecs < ((uint64_t)1 << 32)) {
3324 if (clabel->numBlocksHi) {
3325 printf("WARNING: total sectors < 32 bits, yet "
3326 "numBlocksHi set\n"
3327 "WARNING: resetting numBlocksHi to zero.\n");
3328 clabel->numBlocksHi = 0;
3329 }
3330
3331 if (clabel->partitionSizeHi) {
3332 printf("WARNING: total sectors < 32 bits, yet "
3333 "partitionSizeHi set\n"
3334 "WARNING: resetting partitionSizeHi to zero.\n");
3335 clabel->partitionSizeHi = 0;
3336 }
3337 }
3338 }
3339
3340
3341 #ifdef DEBUG
3342 void
3343 rf_print_component_label(RF_ComponentLabel_t *clabel)
3344 {
3345 uint64_t numBlocks;
3346 static const char *rp[] = {
3347 "No", "Force", "Soft", "*invalid*"
3348 };
3349
3350
3351 numBlocks = rf_component_label_numblocks(clabel);
3352
3353 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3354 clabel->row, clabel->column,
3355 clabel->num_rows, clabel->num_columns);
3356 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3357 clabel->version, clabel->serial_number,
3358 clabel->mod_counter);
3359 printf(" Clean: %s Status: %d\n",
3360 clabel->clean ? "Yes" : "No", clabel->status);
3361 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3362 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3363 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3364 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3365 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3366 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]);
3367 printf(" Last configured as: raid%d\n", clabel->last_unit);
3368 #if 0
3369 printf(" Config order: %d\n", clabel->config_order);
3370 #endif
3371
3372 }
3373 #endif
3374
3375 RF_ConfigSet_t *
3376 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3377 {
3378 RF_AutoConfig_t *ac;
3379 RF_ConfigSet_t *config_sets;
3380 RF_ConfigSet_t *cset;
3381 RF_AutoConfig_t *ac_next;
3382
3383
3384 config_sets = NULL;
3385
3386 /* Go through the AutoConfig list, and figure out which components
3387 belong to what sets. */
3388 ac = ac_list;
3389 while(ac!=NULL) {
3390 /* we're going to putz with ac->next, so save it here
3391 for use at the end of the loop */
3392 ac_next = ac->next;
3393
3394 if (config_sets == NULL) {
3395 /* will need at least this one... */
3396 config_sets = (RF_ConfigSet_t *)
3397 malloc(sizeof(RF_ConfigSet_t),
3398 M_RAIDFRAME, M_NOWAIT);
3399 if (config_sets == NULL) {
3400 panic("rf_create_auto_sets: No memory!");
3401 }
3402 /* this one is easy :) */
3403 config_sets->ac = ac;
3404 config_sets->next = NULL;
3405 config_sets->rootable = 0;
3406 ac->next = NULL;
3407 } else {
3408 /* which set does this component fit into? */
3409 cset = config_sets;
3410 while(cset!=NULL) {
3411 if (rf_does_it_fit(cset, ac)) {
3412 /* looks like it matches... */
3413 ac->next = cset->ac;
3414 cset->ac = ac;
3415 break;
3416 }
3417 cset = cset->next;
3418 }
3419 if (cset==NULL) {
3420 /* didn't find a match above... new set..*/
3421 cset = (RF_ConfigSet_t *)
3422 malloc(sizeof(RF_ConfigSet_t),
3423 M_RAIDFRAME, M_NOWAIT);
3424 if (cset == NULL) {
3425 panic("rf_create_auto_sets: No memory!");
3426 }
3427 cset->ac = ac;
3428 ac->next = NULL;
3429 cset->next = config_sets;
3430 cset->rootable = 0;
3431 config_sets = cset;
3432 }
3433 }
3434 ac = ac_next;
3435 }
3436
3437
3438 return(config_sets);
3439 }
3440
3441 static int
3442 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3443 {
3444 RF_ComponentLabel_t *clabel1, *clabel2;
3445
3446 /* If this one matches the *first* one in the set, that's good
3447 enough, since the other members of the set would have been
3448 through here too... */
3449 /* note that we are not checking partitionSize here..
3450
3451 Note that we are also not checking the mod_counters here.
3452 If everything else matches except the mod_counter, that's
3453 good enough for this test. We will deal with the mod_counters
3454 a little later in the autoconfiguration process.
3455
3456 (clabel1->mod_counter == clabel2->mod_counter) &&
3457
3458 The reason we don't check for this is that failed disks
3459 will have lower modification counts. If those disks are
3460 not added to the set they used to belong to, then they will
3461 form their own set, which may result in 2 different sets,
3462 for example, competing to be configured at raid0, and
3463 perhaps competing to be the root filesystem set. If the
3464 wrong ones get configured, or both attempt to become /,
3465 weird behaviour and or serious lossage will occur. Thus we
3466 need to bring them into the fold here, and kick them out at
3467 a later point.
3468
3469 */
3470
3471 clabel1 = cset->ac->clabel;
3472 clabel2 = ac->clabel;
3473 if ((clabel1->version == clabel2->version) &&
3474 (clabel1->serial_number == clabel2->serial_number) &&
3475 (clabel1->num_rows == clabel2->num_rows) &&
3476 (clabel1->num_columns == clabel2->num_columns) &&
3477 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3478 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3479 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3480 (clabel1->parityConfig == clabel2->parityConfig) &&
3481 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3482 (clabel1->blockSize == clabel2->blockSize) &&
3483 rf_component_label_numblocks(clabel1) ==
3484 rf_component_label_numblocks(clabel2) &&
3485 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3486 (clabel1->root_partition == clabel2->root_partition) &&
3487 (clabel1->last_unit == clabel2->last_unit) &&
3488 (clabel1->config_order == clabel2->config_order)) {
3489 /* if it get's here, it almost *has* to be a match */
3490 } else {
3491 /* it's not consistent with somebody in the set..
3492 punt */
3493 return(0);
3494 }
3495 /* all was fine.. it must fit... */
3496 return(1);
3497 }
3498
3499 int
3500 rf_have_enough_components(RF_ConfigSet_t *cset)
3501 {
3502 RF_AutoConfig_t *ac;
3503 RF_AutoConfig_t *auto_config;
3504 RF_ComponentLabel_t *clabel;
3505 int c;
3506 int num_cols;
3507 int num_missing;
3508 int mod_counter;
3509 int mod_counter_found;
3510 int even_pair_failed;
3511 char parity_type;
3512
3513
3514 /* check to see that we have enough 'live' components
3515 of this set. If so, we can configure it if necessary */
3516
3517 num_cols = cset->ac->clabel->num_columns;
3518 parity_type = cset->ac->clabel->parityConfig;
3519
3520 /* XXX Check for duplicate components!?!?!? */
3521
3522 /* Determine what the mod_counter is supposed to be for this set. */
3523
3524 mod_counter_found = 0;
3525 mod_counter = 0;
3526 ac = cset->ac;
3527 while(ac!=NULL) {
3528 if (mod_counter_found==0) {
3529 mod_counter = ac->clabel->mod_counter;
3530 mod_counter_found = 1;
3531 } else {
3532 if (ac->clabel->mod_counter > mod_counter) {
3533 mod_counter = ac->clabel->mod_counter;
3534 }
3535 }
3536 ac = ac->next;
3537 }
3538
3539 num_missing = 0;
3540 auto_config = cset->ac;
3541
3542 even_pair_failed = 0;
3543 for(c=0; c<num_cols; c++) {
3544 ac = auto_config;
3545 while(ac!=NULL) {
3546 if ((ac->clabel->column == c) &&
3547 (ac->clabel->mod_counter == mod_counter)) {
3548 /* it's this one... */
3549 #ifdef DEBUG
3550 printf("Found: %s at %d\n",
3551 ac->devname,c);
3552 #endif
3553 break;
3554 }
3555 ac=ac->next;
3556 }
3557 if (ac==NULL) {
3558 /* Didn't find one here! */
3559 /* special case for RAID 1, especially
3560 where there are more than 2
3561 components (where RAIDframe treats
3562 things a little differently :( ) */
3563 if (parity_type == '1') {
3564 if (c%2 == 0) { /* even component */
3565 even_pair_failed = 1;
3566 } else { /* odd component. If
3567 we're failed, and
3568 so is the even
3569 component, it's
3570 "Good Night, Charlie" */
3571 if (even_pair_failed == 1) {
3572 return(0);
3573 }
3574 }
3575 } else {
3576 /* normal accounting */
3577 num_missing++;
3578 }
3579 }
3580 if ((parity_type == '1') && (c%2 == 1)) {
3581 /* Just did an even component, and we didn't
3582 bail.. reset the even_pair_failed flag,
3583 and go on to the next component.... */
3584 even_pair_failed = 0;
3585 }
3586 }
3587
3588 clabel = cset->ac->clabel;
3589
3590 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3591 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3592 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3593 /* XXX this needs to be made *much* more general */
3594 /* Too many failures */
3595 return(0);
3596 }
3597 /* otherwise, all is well, and we've got enough to take a kick
3598 at autoconfiguring this set */
3599 return(1);
3600 }
3601
3602 void
3603 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3604 RF_Raid_t *raidPtr)
3605 {
3606 RF_ComponentLabel_t *clabel;
3607 int i;
3608
3609 clabel = ac->clabel;
3610
3611 /* 1. Fill in the common stuff */
3612 config->numRow = clabel->num_rows = 1;
3613 config->numCol = clabel->num_columns;
3614 config->numSpare = 0; /* XXX should this be set here? */
3615 config->sectPerSU = clabel->sectPerSU;
3616 config->SUsPerPU = clabel->SUsPerPU;
3617 config->SUsPerRU = clabel->SUsPerRU;
3618 config->parityConfig = clabel->parityConfig;
3619 /* XXX... */
3620 strcpy(config->diskQueueType,"fifo");
3621 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3622 config->layoutSpecificSize = 0; /* XXX ?? */
3623
3624 while(ac!=NULL) {
3625 /* row/col values will be in range due to the checks
3626 in reasonable_label() */
3627 strcpy(config->devnames[0][ac->clabel->column],
3628 ac->devname);
3629 ac = ac->next;
3630 }
3631
3632 for(i=0;i<RF_MAXDBGV;i++) {
3633 config->debugVars[i][0] = 0;
3634 }
3635 }
3636
3637 int
3638 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3639 {
3640 RF_ComponentLabel_t *clabel;
3641 int column;
3642 int sparecol;
3643
3644 raidPtr->autoconfigure = new_value;
3645
3646 for(column=0; column<raidPtr->numCol; column++) {
3647 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3648 clabel = raidget_component_label(raidPtr, column);
3649 clabel->autoconfigure = new_value;
3650 raidflush_component_label(raidPtr, column);
3651 }
3652 }
3653 for(column = 0; column < raidPtr->numSpare ; column++) {
3654 sparecol = raidPtr->numCol + column;
3655 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3656 clabel = raidget_component_label(raidPtr, sparecol);
3657 clabel->autoconfigure = new_value;
3658 raidflush_component_label(raidPtr, sparecol);
3659 }
3660 }
3661 return(new_value);
3662 }
3663
3664 int
3665 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3666 {
3667 RF_ComponentLabel_t *clabel;
3668 int column;
3669 int sparecol;
3670
3671 raidPtr->root_partition = new_value;
3672 for(column=0; column<raidPtr->numCol; column++) {
3673 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3674 clabel = raidget_component_label(raidPtr, column);
3675 clabel->root_partition = new_value;
3676 raidflush_component_label(raidPtr, column);
3677 }
3678 }
3679 for(column = 0; column < raidPtr->numSpare ; column++) {
3680 sparecol = raidPtr->numCol + column;
3681 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3682 clabel = raidget_component_label(raidPtr, sparecol);
3683 clabel->root_partition = new_value;
3684 raidflush_component_label(raidPtr, sparecol);
3685 }
3686 }
3687 return(new_value);
3688 }
3689
3690 void
3691 rf_release_all_vps(RF_ConfigSet_t *cset)
3692 {
3693 RF_AutoConfig_t *ac;
3694
3695 ac = cset->ac;
3696 while(ac!=NULL) {
3697 /* Close the vp, and give it back */
3698 if (ac->vp) {
3699 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3700 VOP_CLOSE(ac->vp, FREAD, NOCRED);
3701 vput(ac->vp);
3702 ac->vp = NULL;
3703 }
3704 ac = ac->next;
3705 }
3706 }
3707
3708
3709 void
3710 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3711 {
3712 RF_AutoConfig_t *ac;
3713 RF_AutoConfig_t *next_ac;
3714
3715 ac = cset->ac;
3716 while(ac!=NULL) {
3717 next_ac = ac->next;
3718 /* nuke the label */
3719 free(ac->clabel, M_RAIDFRAME);
3720 /* cleanup the config structure */
3721 free(ac, M_RAIDFRAME);
3722 /* "next.." */
3723 ac = next_ac;
3724 }
3725 /* and, finally, nuke the config set */
3726 free(cset, M_RAIDFRAME);
3727 }
3728
3729
3730 void
3731 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3732 {
3733 /* current version number */
3734 clabel->version = RF_COMPONENT_LABEL_VERSION;
3735 clabel->serial_number = raidPtr->serial_number;
3736 clabel->mod_counter = raidPtr->mod_counter;
3737
3738 clabel->num_rows = 1;
3739 clabel->num_columns = raidPtr->numCol;
3740 clabel->clean = RF_RAID_DIRTY; /* not clean */
3741 clabel->status = rf_ds_optimal; /* "It's good!" */
3742
3743 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3744 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3745 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3746
3747 clabel->blockSize = raidPtr->bytesPerSector;
3748 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3749
3750 /* XXX not portable */
3751 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3752 clabel->maxOutstanding = raidPtr->maxOutstanding;
3753 clabel->autoconfigure = raidPtr->autoconfigure;
3754 clabel->root_partition = raidPtr->root_partition;
3755 clabel->last_unit = raidPtr->raidid;
3756 clabel->config_order = raidPtr->config_order;
3757
3758 #ifndef RF_NO_PARITY_MAP
3759 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3760 #endif
3761 }
3762
3763 struct raid_softc *
3764 rf_auto_config_set(RF_ConfigSet_t *cset)
3765 {
3766 RF_Raid_t *raidPtr;
3767 RF_Config_t *config;
3768 int raidID;
3769 struct raid_softc *sc;
3770
3771 #ifdef DEBUG
3772 printf("RAID autoconfigure\n");
3773 #endif
3774
3775 /* 1. Create a config structure */
3776 config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO);
3777 if (config == NULL) {
3778 printf("%s: Out of mem - config!?!?\n", __func__);
3779 /* XXX do something more intelligent here. */
3780 return NULL;
3781 }
3782
3783 /*
3784 2. Figure out what RAID ID this one is supposed to live at
3785 See if we can get the same RAID dev that it was configured
3786 on last time..
3787 */
3788
3789 raidID = cset->ac->clabel->last_unit;
3790 for (sc = raidget(raidID, false); sc && sc->sc_r.valid != 0;
3791 sc = raidget(++raidID, false))
3792 continue;
3793 #ifdef DEBUG
3794 printf("Configuring raid%d:\n",raidID);
3795 #endif
3796
3797 if (sc == NULL)
3798 sc = raidget(raidID, true);
3799 if (sc == NULL) {
3800 printf("%s: Out of mem - softc!?!?\n", __func__);
3801 /* XXX do something more intelligent here. */
3802 free(config, M_RAIDFRAME);
3803 return NULL;
3804 }
3805
3806 raidPtr = &sc->sc_r;
3807
3808 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3809 raidPtr->softc = sc;
3810 raidPtr->raidid = raidID;
3811 raidPtr->openings = RAIDOUTSTANDING;
3812
3813 /* 3. Build the configuration structure */
3814 rf_create_configuration(cset->ac, config, raidPtr);
3815
3816 /* 4. Do the configuration */
3817 if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3818 raidinit(sc);
3819
3820 rf_markalldirty(raidPtr);
3821 raidPtr->autoconfigure = 1; /* XXX do this here? */
3822 switch (cset->ac->clabel->root_partition) {
3823 case 1: /* Force Root */
3824 case 2: /* Soft Root: root when boot partition part of raid */
3825 /*
3826 * everything configured just fine. Make a note
3827 * that this set is eligible to be root,
3828 * or forced to be root
3829 */
3830 cset->rootable = cset->ac->clabel->root_partition;
3831 /* XXX do this here? */
3832 raidPtr->root_partition = cset->rootable;
3833 break;
3834 default:
3835 break;
3836 }
3837 } else {
3838 raidput(sc);
3839 sc = NULL;
3840 }
3841
3842 /* 5. Cleanup */
3843 free(config, M_RAIDFRAME);
3844 return sc;
3845 }
3846
3847 void
3848 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3849 {
3850 struct buf *bp;
3851 struct raid_softc *rs;
3852
3853 bp = (struct buf *)desc->bp;
3854 rs = desc->raidPtr->softc;
3855 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid),
3856 (bp->b_flags & B_READ));
3857 }
3858
3859 void
3860 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3861 size_t xmin, size_t xmax)
3862 {
3863 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3864 pool_sethiwat(p, xmax);
3865 pool_prime(p, xmin);
3866 pool_setlowat(p, xmin);
3867 }
3868
3869 /*
3870 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buf_queue to see
3871 * if there is IO pending and if that IO could possibly be done for a
3872 * given RAID set. Returns 0 if IO is waiting and can be done, 1
3873 * otherwise.
3874 *
3875 */
3876
3877 int
3878 rf_buf_queue_check(RF_Raid_t *raidPtr)
3879 {
3880 struct raid_softc *rs = raidPtr->softc;
3881 if ((bufq_peek(rs->buf_queue) != NULL) && raidPtr->openings > 0) {
3882 /* there is work to do */
3883 return 0;
3884 }
3885 /* default is nothing to do */
3886 return 1;
3887 }
3888
3889 int
3890 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3891 {
3892 uint64_t numsecs;
3893 unsigned secsize;
3894 int error;
3895
3896 error = getdisksize(vp, &numsecs, &secsize);
3897 if (error == 0) {
3898 diskPtr->blockSize = secsize;
3899 diskPtr->numBlocks = numsecs - rf_protectedSectors;
3900 diskPtr->partitionSize = numsecs;
3901 return 0;
3902 }
3903 return error;
3904 }
3905
3906 static int
3907 raid_match(device_t self, cfdata_t cfdata, void *aux)
3908 {
3909 return 1;
3910 }
3911
3912 static void
3913 raid_attach(device_t parent, device_t self, void *aux)
3914 {
3915
3916 }
3917
3918
3919 static int
3920 raid_detach(device_t self, int flags)
3921 {
3922 int error;
3923 struct raid_softc *rs = raidget(device_unit(self), false);
3924
3925 if (rs == NULL)
3926 return ENXIO;
3927
3928 if ((error = raidlock(rs)) != 0)
3929 return (error);
3930
3931 error = raid_detach_unlocked(rs);
3932
3933 return error;
3934 }
3935
3936 static void
3937 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
3938 {
3939 struct disk_geom *dg = &rs->sc_dkdev.dk_geom;
3940
3941 memset(dg, 0, sizeof(*dg));
3942
3943 dg->dg_secperunit = raidPtr->totalSectors;
3944 dg->dg_secsize = raidPtr->bytesPerSector;
3945 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
3946 dg->dg_ntracks = 4 * raidPtr->numCol;
3947
3948 disk_set_info(rs->sc_dev, &rs->sc_dkdev, NULL);
3949 }
3950
3951 /*
3952 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3953 * We end up returning whatever error was returned by the first cache flush
3954 * that fails.
3955 */
3956
3957 int
3958 rf_sync_component_caches(RF_Raid_t *raidPtr)
3959 {
3960 int c, sparecol;
3961 int e,error;
3962 int force = 1;
3963
3964 error = 0;
3965 for (c = 0; c < raidPtr->numCol; c++) {
3966 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3967 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3968 &force, FWRITE, NOCRED);
3969 if (e) {
3970 if (e != ENODEV)
3971 printf("raid%d: cache flush to component %s failed.\n",
3972 raidPtr->raidid, raidPtr->Disks[c].devname);
3973 if (error == 0) {
3974 error = e;
3975 }
3976 }
3977 }
3978 }
3979
3980 for( c = 0; c < raidPtr->numSpare ; c++) {
3981 sparecol = raidPtr->numCol + c;
3982 /* Need to ensure that the reconstruct actually completed! */
3983 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3984 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3985 DIOCCACHESYNC, &force, FWRITE, NOCRED);
3986 if (e) {
3987 if (e != ENODEV)
3988 printf("raid%d: cache flush to component %s failed.\n",
3989 raidPtr->raidid, raidPtr->Disks[sparecol].devname);
3990 if (error == 0) {
3991 error = e;
3992 }
3993 }
3994 }
3995 }
3996 return error;
3997 }
3998
3999 /*
4000 * Module interface
4001 */
4002
4003 MODULE(MODULE_CLASS_DRIVER, raid, "dk_subr");
4004
4005 #ifdef _MODULE
4006 CFDRIVER_DECL(raid, DV_DISK, NULL);
4007 #endif
4008
4009 static int raid_modcmd(modcmd_t, void *);
4010 static int raid_modcmd_init(void);
4011 static int raid_modcmd_fini(void);
4012
4013 static int
4014 raid_modcmd(modcmd_t cmd, void *data)
4015 {
4016 int error;
4017
4018 error = 0;
4019 switch (cmd) {
4020 case MODULE_CMD_INIT:
4021 error = raid_modcmd_init();
4022 break;
4023 case MODULE_CMD_FINI:
4024 error = raid_modcmd_fini();
4025 break;
4026 default:
4027 error = ENOTTY;
4028 break;
4029 }
4030 return error;
4031 }
4032
4033 static int
4034 raid_modcmd_init(void)
4035 {
4036 int error;
4037 int bmajor, cmajor;
4038
4039 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
4040 mutex_enter(&raid_lock);
4041 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
4042 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
4043 rf_init_cond2(rf_sparet_wait_cv, "sparetw");
4044 rf_init_cond2(rf_sparet_resp_cv, "rfgst");
4045
4046 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
4047 #endif
4048
4049 bmajor = cmajor = -1;
4050 error = devsw_attach("raid", &raid_bdevsw, &bmajor,
4051 &raid_cdevsw, &cmajor);
4052 if (error != 0 && error != EEXIST) {
4053 aprint_error("%s: devsw_attach failed %d\n", __func__, error);
4054 mutex_exit(&raid_lock);
4055 return error;
4056 }
4057 #ifdef _MODULE
4058 error = config_cfdriver_attach(&raid_cd);
4059 if (error != 0) {
4060 aprint_error("%s: config_cfdriver_attach failed %d\n",
4061 __func__, error);
4062 devsw_detach(&raid_bdevsw, &raid_cdevsw);
4063 mutex_exit(&raid_lock);
4064 return error;
4065 }
4066 #endif
4067 error = config_cfattach_attach(raid_cd.cd_name, &raid_ca);
4068 if (error != 0) {
4069 aprint_error("%s: config_cfattach_attach failed %d\n",
4070 __func__, error);
4071 #ifdef _MODULE
4072 config_cfdriver_detach(&raid_cd);
4073 #endif
4074 devsw_detach(&raid_bdevsw, &raid_cdevsw);
4075 mutex_exit(&raid_lock);
4076 return error;
4077 }
4078
4079 raidautoconfigdone = false;
4080
4081 mutex_exit(&raid_lock);
4082
4083 if (error == 0) {
4084 if (rf_BootRaidframe(true) == 0)
4085 aprint_verbose("Kernelized RAIDframe activated\n");
4086 else
4087 panic("Serious error activating RAID!!");
4088 }
4089
4090 /*
4091 * Register a finalizer which will be used to auto-config RAID
4092 * sets once all real hardware devices have been found.
4093 */
4094 error = config_finalize_register(NULL, rf_autoconfig);
4095 if (error != 0) {
4096 aprint_error("WARNING: unable to register RAIDframe "
4097 "finalizer\n");
4098 error = 0;
4099 }
4100
4101 return error;
4102 }
4103
4104 static int
4105 raid_modcmd_fini(void)
4106 {
4107 int error;
4108
4109 mutex_enter(&raid_lock);
4110
4111 /* Don't allow unload if raid device(s) exist. */
4112 if (!LIST_EMPTY(&raids)) {
4113 mutex_exit(&raid_lock);
4114 return EBUSY;
4115 }
4116
4117 error = config_cfattach_detach(raid_cd.cd_name, &raid_ca);
4118 if (error != 0) {
4119 mutex_exit(&raid_lock);
4120 return error;
4121 }
4122 #ifdef _MODULE
4123 error = config_cfdriver_detach(&raid_cd);
4124 if (error != 0) {
4125 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
4126 mutex_exit(&raid_lock);
4127 return error;
4128 }
4129 #endif
4130 error = devsw_detach(&raid_bdevsw, &raid_cdevsw);
4131 if (error != 0) {
4132 #ifdef _MODULE
4133 config_cfdriver_attach(&raid_cd);
4134 #endif
4135 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
4136 mutex_exit(&raid_lock);
4137 return error;
4138 }
4139 rf_BootRaidframe(false);
4140 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
4141 rf_destroy_mutex2(rf_sparet_wait_mutex);
4142 rf_destroy_cond2(rf_sparet_wait_cv);
4143 rf_destroy_cond2(rf_sparet_resp_cv);
4144 #endif
4145 mutex_exit(&raid_lock);
4146 mutex_destroy(&raid_lock);
4147
4148 return error;
4149 }
4150