rf_netbsdkintf.c revision 1.333 1 /* $NetBSD: rf_netbsdkintf.c,v 1.333 2016/01/02 16:10:06 mlelstv Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: cd.c 1.6 90/11/28$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
89 * School of Computer Science
90 * Carnegie Mellon University
91 * Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97 /***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.333 2016/01/02 16:10:06 mlelstv Exp $");
105
106 #ifdef _KERNEL_OPT
107 #include "opt_compat_netbsd.h"
108 #include "opt_raid_autoconfig.h"
109 #endif
110
111 #include <sys/param.h>
112 #include <sys/errno.h>
113 #include <sys/pool.h>
114 #include <sys/proc.h>
115 #include <sys/queue.h>
116 #include <sys/disk.h>
117 #include <sys/device.h>
118 #include <sys/stat.h>
119 #include <sys/ioctl.h>
120 #include <sys/fcntl.h>
121 #include <sys/systm.h>
122 #include <sys/vnode.h>
123 #include <sys/disklabel.h>
124 #include <sys/conf.h>
125 #include <sys/buf.h>
126 #include <sys/bufq.h>
127 #include <sys/reboot.h>
128 #include <sys/kauth.h>
129 #include <sys/module.h>
130
131 #include <prop/proplib.h>
132
133 #include <dev/raidframe/raidframevar.h>
134 #include <dev/raidframe/raidframeio.h>
135 #include <dev/raidframe/rf_paritymap.h>
136
137 #include "rf_raid.h"
138 #include "rf_copyback.h"
139 #include "rf_dag.h"
140 #include "rf_dagflags.h"
141 #include "rf_desc.h"
142 #include "rf_diskqueue.h"
143 #include "rf_etimer.h"
144 #include "rf_general.h"
145 #include "rf_kintf.h"
146 #include "rf_options.h"
147 #include "rf_driver.h"
148 #include "rf_parityscan.h"
149 #include "rf_threadstuff.h"
150
151 #ifdef COMPAT_50
152 #include "rf_compat50.h"
153 #endif
154
155 #include "ioconf.h"
156
157 #ifdef DEBUG
158 int rf_kdebug_level = 0;
159 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
160 #else /* DEBUG */
161 #define db1_printf(a) { }
162 #endif /* DEBUG */
163
164 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
165 static rf_declare_mutex2(rf_sparet_wait_mutex);
166 static rf_declare_cond2(rf_sparet_wait_cv);
167 static rf_declare_cond2(rf_sparet_resp_cv);
168
169 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
170 * spare table */
171 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
172 * installation process */
173 #endif
174
175 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
176
177 /* prototypes */
178 static void KernelWakeupFunc(struct buf *);
179 static void InitBP(struct buf *, struct vnode *, unsigned,
180 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
181 void *, int, struct proc *);
182 struct raid_softc;
183 static void raidinit(struct raid_softc *);
184
185 static int raid_match(device_t, cfdata_t, void *);
186 static void raid_attach(device_t, device_t, void *);
187 static int raid_detach(device_t, int);
188
189 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
190 daddr_t, daddr_t);
191 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
192 daddr_t, daddr_t, int);
193
194 static int raidwrite_component_label(unsigned,
195 dev_t, struct vnode *, RF_ComponentLabel_t *);
196 static int raidread_component_label(unsigned,
197 dev_t, struct vnode *, RF_ComponentLabel_t *);
198
199
200 static dev_type_open(raidopen);
201 static dev_type_close(raidclose);
202 static dev_type_read(raidread);
203 static dev_type_write(raidwrite);
204 static dev_type_ioctl(raidioctl);
205 static dev_type_strategy(raidstrategy);
206 static dev_type_dump(raiddump);
207 static dev_type_size(raidsize);
208
209 const struct bdevsw raid_bdevsw = {
210 .d_open = raidopen,
211 .d_close = raidclose,
212 .d_strategy = raidstrategy,
213 .d_ioctl = raidioctl,
214 .d_dump = raiddump,
215 .d_psize = raidsize,
216 .d_discard = nodiscard,
217 .d_flag = D_DISK
218 };
219
220 const struct cdevsw raid_cdevsw = {
221 .d_open = raidopen,
222 .d_close = raidclose,
223 .d_read = raidread,
224 .d_write = raidwrite,
225 .d_ioctl = raidioctl,
226 .d_stop = nostop,
227 .d_tty = notty,
228 .d_poll = nopoll,
229 .d_mmap = nommap,
230 .d_kqfilter = nokqfilter,
231 .d_discard = nodiscard,
232 .d_flag = D_DISK
233 };
234
235 static struct dkdriver rf_dkdriver = {
236 .d_strategy = raidstrategy,
237 .d_minphys = minphys
238 };
239
240 struct raid_softc {
241 device_t sc_dev;
242 int sc_unit;
243 int sc_flags; /* flags */
244 int sc_cflags; /* configuration flags */
245 kmutex_t sc_mutex; /* interlock mutex */
246 kcondvar_t sc_cv; /* and the condvar */
247 uint64_t sc_size; /* size of the raid device */
248 char sc_xname[20]; /* XXX external name */
249 struct disk sc_dkdev; /* generic disk device info */
250 struct bufq_state *buf_queue; /* used for the device queue */
251 RF_Raid_t sc_r;
252 LIST_ENTRY(raid_softc) sc_link;
253 };
254 /* sc_flags */
255 #define RAIDF_INITED 0x01 /* unit has been initialized */
256 #define RAIDF_WLABEL 0x02 /* label area is writable */
257 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
258 #define RAIDF_SHUTDOWN 0x08 /* unit is being shutdown */
259 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
260 #define RAIDF_LOCKED 0x80 /* unit is locked */
261
262 #define raidunit(x) DISKUNIT(x)
263
264 extern struct cfdriver raid_cd;
265 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
266 raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
267 DVF_DETACH_SHUTDOWN);
268
269 /*
270 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
271 * Be aware that large numbers can allow the driver to consume a lot of
272 * kernel memory, especially on writes, and in degraded mode reads.
273 *
274 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
275 * a single 64K write will typically require 64K for the old data,
276 * 64K for the old parity, and 64K for the new parity, for a total
277 * of 192K (if the parity buffer is not re-used immediately).
278 * Even it if is used immediately, that's still 128K, which when multiplied
279 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
280 *
281 * Now in degraded mode, for example, a 64K read on the above setup may
282 * require data reconstruction, which will require *all* of the 4 remaining
283 * disks to participate -- 4 * 32K/disk == 128K again.
284 */
285
286 #ifndef RAIDOUTSTANDING
287 #define RAIDOUTSTANDING 6
288 #endif
289
290 #define RAIDLABELDEV(dev) \
291 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
292
293 /* declared here, and made public, for the benefit of KVM stuff.. */
294
295 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
296 struct disklabel *);
297 static void raidgetdisklabel(dev_t);
298 static void raidmakedisklabel(struct raid_softc *);
299
300 static int raidlock(struct raid_softc *);
301 static void raidunlock(struct raid_softc *);
302
303 static int raid_detach_unlocked(struct raid_softc *);
304
305 static void rf_markalldirty(RF_Raid_t *);
306 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
307
308 void rf_ReconThread(struct rf_recon_req *);
309 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
310 void rf_CopybackThread(RF_Raid_t *raidPtr);
311 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
312 int rf_autoconfig(device_t);
313 void rf_buildroothack(RF_ConfigSet_t *);
314
315 RF_AutoConfig_t *rf_find_raid_components(void);
316 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
317 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
318 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
319 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
320 int rf_set_autoconfig(RF_Raid_t *, int);
321 int rf_set_rootpartition(RF_Raid_t *, int);
322 void rf_release_all_vps(RF_ConfigSet_t *);
323 void rf_cleanup_config_set(RF_ConfigSet_t *);
324 int rf_have_enough_components(RF_ConfigSet_t *);
325 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
326 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
327
328 /*
329 * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
330 * Note that this is overridden by having RAID_AUTOCONFIG as an option
331 * in the kernel config file.
332 */
333 #ifdef RAID_AUTOCONFIG
334 int raidautoconfig = 1;
335 #else
336 int raidautoconfig = 0;
337 #endif
338 static bool raidautoconfigdone = false;
339
340 struct RF_Pools_s rf_pools;
341
342 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
343 static kmutex_t raid_lock;
344
345 static struct raid_softc *
346 raidcreate(int unit) {
347 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
348 if (sc == NULL) {
349 #ifdef DIAGNOSTIC
350 printf("%s: out of memory\n", __func__);
351 #endif
352 return NULL;
353 }
354 sc->sc_unit = unit;
355 bufq_alloc(&sc->buf_queue, "fcfs", BUFQ_SORT_RAWBLOCK);
356 cv_init(&sc->sc_cv, "raidunit");
357 mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_NONE);
358 return sc;
359 }
360
361 static void
362 raiddestroy(struct raid_softc *sc) {
363 cv_destroy(&sc->sc_cv);
364 mutex_destroy(&sc->sc_mutex);
365 bufq_free(sc->buf_queue);
366 kmem_free(sc, sizeof(*sc));
367 }
368
369 static struct raid_softc *
370 raidget(int unit, bool create) {
371 struct raid_softc *sc;
372 if (unit < 0) {
373 #ifdef DIAGNOSTIC
374 panic("%s: unit %d!", __func__, unit);
375 #endif
376 return NULL;
377 }
378 mutex_enter(&raid_lock);
379 LIST_FOREACH(sc, &raids, sc_link) {
380 if (sc->sc_unit == unit) {
381 mutex_exit(&raid_lock);
382 return sc;
383 }
384 }
385 mutex_exit(&raid_lock);
386 if (!create)
387 return NULL;
388 if ((sc = raidcreate(unit)) == NULL)
389 return NULL;
390 mutex_enter(&raid_lock);
391 LIST_INSERT_HEAD(&raids, sc, sc_link);
392 mutex_exit(&raid_lock);
393 return sc;
394 }
395
396 static void
397 raidput(struct raid_softc *sc) {
398 mutex_enter(&raid_lock);
399 LIST_REMOVE(sc, sc_link);
400 mutex_exit(&raid_lock);
401 raiddestroy(sc);
402 }
403
404 void
405 raidattach(int num)
406 {
407
408 /*
409 * Device attachment and associated initialization now occurs
410 * as part of the module initialization.
411 */
412 }
413
414 int
415 rf_autoconfig(device_t self)
416 {
417 RF_AutoConfig_t *ac_list;
418 RF_ConfigSet_t *config_sets;
419
420 if (!raidautoconfig || raidautoconfigdone == true)
421 return (0);
422
423 /* XXX This code can only be run once. */
424 raidautoconfigdone = true;
425
426 #ifdef __HAVE_CPU_BOOTCONF
427 /*
428 * 0. find the boot device if needed first so we can use it later
429 * this needs to be done before we autoconfigure any raid sets,
430 * because if we use wedges we are not going to be able to open
431 * the boot device later
432 */
433 if (booted_device == NULL)
434 cpu_bootconf();
435 #endif
436 /* 1. locate all RAID components on the system */
437 aprint_debug("Searching for RAID components...\n");
438 ac_list = rf_find_raid_components();
439
440 /* 2. Sort them into their respective sets. */
441 config_sets = rf_create_auto_sets(ac_list);
442
443 /*
444 * 3. Evaluate each set and configure the valid ones.
445 * This gets done in rf_buildroothack().
446 */
447 rf_buildroothack(config_sets);
448
449 return 1;
450 }
451
452 static int
453 rf_containsboot(RF_Raid_t *r, device_t bdv) {
454 const char *bootname = device_xname(bdv);
455 size_t len = strlen(bootname);
456
457 for (int col = 0; col < r->numCol; col++) {
458 const char *devname = r->Disks[col].devname;
459 devname += sizeof("/dev/") - 1;
460 if (strncmp(devname, "dk", 2) == 0) {
461 const char *parent =
462 dkwedge_get_parent_name(r->Disks[col].dev);
463 if (parent != NULL)
464 devname = parent;
465 }
466 if (strncmp(devname, bootname, len) == 0) {
467 struct raid_softc *sc = r->softc;
468 aprint_debug("raid%d includes boot device %s\n",
469 sc->sc_unit, devname);
470 return 1;
471 }
472 }
473 return 0;
474 }
475
476 void
477 rf_buildroothack(RF_ConfigSet_t *config_sets)
478 {
479 RF_ConfigSet_t *cset;
480 RF_ConfigSet_t *next_cset;
481 int num_root;
482 struct raid_softc *sc, *rsc;
483
484 sc = rsc = NULL;
485 num_root = 0;
486 cset = config_sets;
487 while (cset != NULL) {
488 next_cset = cset->next;
489 if (rf_have_enough_components(cset) &&
490 cset->ac->clabel->autoconfigure == 1) {
491 sc = rf_auto_config_set(cset);
492 if (sc != NULL) {
493 aprint_debug("raid%d: configured ok\n",
494 sc->sc_unit);
495 if (cset->rootable) {
496 rsc = sc;
497 num_root++;
498 }
499 } else {
500 /* The autoconfig didn't work :( */
501 aprint_debug("Autoconfig failed\n");
502 rf_release_all_vps(cset);
503 }
504 } else {
505 /* we're not autoconfiguring this set...
506 release the associated resources */
507 rf_release_all_vps(cset);
508 }
509 /* cleanup */
510 rf_cleanup_config_set(cset);
511 cset = next_cset;
512 }
513
514 /* if the user has specified what the root device should be
515 then we don't touch booted_device or boothowto... */
516
517 if (rootspec != NULL)
518 return;
519
520 /* we found something bootable... */
521
522 /*
523 * XXX: The following code assumes that the root raid
524 * is the first ('a') partition. This is about the best
525 * we can do with a BSD disklabel, but we might be able
526 * to do better with a GPT label, by setting a specified
527 * attribute to indicate the root partition. We can then
528 * stash the partition number in the r->root_partition
529 * high bits (the bottom 2 bits are already used). For
530 * now we just set booted_partition to 0 when we override
531 * root.
532 */
533 if (num_root == 1) {
534 device_t candidate_root;
535 if (rsc->sc_dkdev.dk_nwedges != 0) {
536 char cname[sizeof(cset->ac->devname)];
537 /* XXX: assume 'a' */
538 snprintf(cname, sizeof(cname), "%s%c",
539 device_xname(rsc->sc_dev), 'a');
540 candidate_root = dkwedge_find_by_wname(cname);
541 } else
542 candidate_root = rsc->sc_dev;
543 if (booted_device == NULL ||
544 rsc->sc_r.root_partition == 1 ||
545 rf_containsboot(&rsc->sc_r, booted_device)) {
546 booted_device = candidate_root;
547 booted_partition = 0; /* XXX assume 'a' */
548 }
549 } else if (num_root > 1) {
550
551 /*
552 * Maybe the MD code can help. If it cannot, then
553 * setroot() will discover that we have no
554 * booted_device and will ask the user if nothing was
555 * hardwired in the kernel config file
556 */
557 if (booted_device == NULL)
558 return;
559
560 num_root = 0;
561 mutex_enter(&raid_lock);
562 LIST_FOREACH(sc, &raids, sc_link) {
563 RF_Raid_t *r = &sc->sc_r;
564 if (r->valid == 0)
565 continue;
566
567 if (r->root_partition == 0)
568 continue;
569
570 if (rf_containsboot(r, booted_device)) {
571 num_root++;
572 rsc = sc;
573 }
574 }
575 mutex_exit(&raid_lock);
576
577 if (num_root == 1) {
578 booted_device = rsc->sc_dev;
579 booted_partition = 0; /* XXX assume 'a' */
580 } else {
581 /* we can't guess.. require the user to answer... */
582 boothowto |= RB_ASKNAME;
583 }
584 }
585 }
586
587 static int
588 raidsize(dev_t dev)
589 {
590 struct raid_softc *rs;
591 struct disklabel *lp;
592 int part, unit, omask, size;
593
594 unit = raidunit(dev);
595 if ((rs = raidget(unit, false)) == NULL)
596 return -1;
597 if ((rs->sc_flags & RAIDF_INITED) == 0)
598 return (-1);
599
600 part = DISKPART(dev);
601 omask = rs->sc_dkdev.dk_openmask & (1 << part);
602 lp = rs->sc_dkdev.dk_label;
603
604 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
605 return (-1);
606
607 if (lp->d_partitions[part].p_fstype != FS_SWAP)
608 size = -1;
609 else
610 size = lp->d_partitions[part].p_size *
611 (lp->d_secsize / DEV_BSIZE);
612
613 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
614 return (-1);
615
616 return (size);
617
618 }
619
620 static int
621 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
622 {
623 int unit = raidunit(dev);
624 struct raid_softc *rs;
625 const struct bdevsw *bdev;
626 struct disklabel *lp;
627 RF_Raid_t *raidPtr;
628 daddr_t offset;
629 int part, c, sparecol, j, scol, dumpto;
630 int error = 0;
631
632 if ((rs = raidget(unit, false)) == NULL)
633 return ENXIO;
634
635 raidPtr = &rs->sc_r;
636
637 if ((rs->sc_flags & RAIDF_INITED) == 0)
638 return ENXIO;
639
640 /* we only support dumping to RAID 1 sets */
641 if (raidPtr->Layout.numDataCol != 1 ||
642 raidPtr->Layout.numParityCol != 1)
643 return EINVAL;
644
645 if ((error = raidlock(rs)) != 0)
646 return error;
647
648 if (size % DEV_BSIZE != 0) {
649 error = EINVAL;
650 goto out;
651 }
652
653 if (blkno + size / DEV_BSIZE > rs->sc_size) {
654 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
655 "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
656 size / DEV_BSIZE, rs->sc_size);
657 error = EINVAL;
658 goto out;
659 }
660
661 part = DISKPART(dev);
662 lp = rs->sc_dkdev.dk_label;
663 offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
664
665 /* figure out what device is alive.. */
666
667 /*
668 Look for a component to dump to. The preference for the
669 component to dump to is as follows:
670 1) the master
671 2) a used_spare of the master
672 3) the slave
673 4) a used_spare of the slave
674 */
675
676 dumpto = -1;
677 for (c = 0; c < raidPtr->numCol; c++) {
678 if (raidPtr->Disks[c].status == rf_ds_optimal) {
679 /* this might be the one */
680 dumpto = c;
681 break;
682 }
683 }
684
685 /*
686 At this point we have possibly selected a live master or a
687 live slave. We now check to see if there is a spared
688 master (or a spared slave), if we didn't find a live master
689 or a live slave.
690 */
691
692 for (c = 0; c < raidPtr->numSpare; c++) {
693 sparecol = raidPtr->numCol + c;
694 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
695 /* How about this one? */
696 scol = -1;
697 for(j=0;j<raidPtr->numCol;j++) {
698 if (raidPtr->Disks[j].spareCol == sparecol) {
699 scol = j;
700 break;
701 }
702 }
703 if (scol == 0) {
704 /*
705 We must have found a spared master!
706 We'll take that over anything else
707 found so far. (We couldn't have
708 found a real master before, since
709 this is a used spare, and it's
710 saying that it's replacing the
711 master.) On reboot (with
712 autoconfiguration turned on)
713 sparecol will become the 1st
714 component (component0) of this set.
715 */
716 dumpto = sparecol;
717 break;
718 } else if (scol != -1) {
719 /*
720 Must be a spared slave. We'll dump
721 to that if we havn't found anything
722 else so far.
723 */
724 if (dumpto == -1)
725 dumpto = sparecol;
726 }
727 }
728 }
729
730 if (dumpto == -1) {
731 /* we couldn't find any live components to dump to!?!?
732 */
733 error = EINVAL;
734 goto out;
735 }
736
737 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
738
739 /*
740 Note that blkno is relative to this particular partition.
741 By adding the offset of this partition in the RAID
742 set, and also adding RF_PROTECTED_SECTORS, we get a
743 value that is relative to the partition used for the
744 underlying component.
745 */
746
747 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
748 blkno + offset, va, size);
749
750 out:
751 raidunlock(rs);
752
753 return error;
754 }
755
756 /* ARGSUSED */
757 static int
758 raidopen(dev_t dev, int flags, int fmt,
759 struct lwp *l)
760 {
761 int unit = raidunit(dev);
762 struct raid_softc *rs;
763 struct disklabel *lp;
764 int part, pmask;
765 int error = 0;
766
767 if ((rs = raidget(unit, true)) == NULL)
768 return ENXIO;
769 if ((error = raidlock(rs)) != 0)
770 return (error);
771
772 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
773 error = EBUSY;
774 goto bad;
775 }
776
777 lp = rs->sc_dkdev.dk_label;
778
779 part = DISKPART(dev);
780
781 /*
782 * If there are wedges, and this is not RAW_PART, then we
783 * need to fail.
784 */
785 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
786 error = EBUSY;
787 goto bad;
788 }
789 pmask = (1 << part);
790
791 if ((rs->sc_flags & RAIDF_INITED) &&
792 (rs->sc_dkdev.dk_nwedges == 0) &&
793 (rs->sc_dkdev.dk_openmask == 0))
794 raidgetdisklabel(dev);
795
796 /* make sure that this partition exists */
797
798 if (part != RAW_PART) {
799 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
800 ((part >= lp->d_npartitions) ||
801 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
802 error = ENXIO;
803 goto bad;
804 }
805 }
806 /* Prevent this unit from being unconfigured while open. */
807 switch (fmt) {
808 case S_IFCHR:
809 rs->sc_dkdev.dk_copenmask |= pmask;
810 break;
811
812 case S_IFBLK:
813 rs->sc_dkdev.dk_bopenmask |= pmask;
814 break;
815 }
816
817 if ((rs->sc_dkdev.dk_openmask == 0) &&
818 ((rs->sc_flags & RAIDF_INITED) != 0)) {
819 /* First one... mark things as dirty... Note that we *MUST*
820 have done a configure before this. I DO NOT WANT TO BE
821 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
822 THAT THEY BELONG TOGETHER!!!!! */
823 /* XXX should check to see if we're only open for reading
824 here... If so, we needn't do this, but then need some
825 other way of keeping track of what's happened.. */
826
827 rf_markalldirty(&rs->sc_r);
828 }
829
830
831 rs->sc_dkdev.dk_openmask =
832 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
833
834 bad:
835 raidunlock(rs);
836
837 return (error);
838
839
840 }
841
842 /* ARGSUSED */
843 static int
844 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
845 {
846 int unit = raidunit(dev);
847 struct raid_softc *rs;
848 int error = 0;
849 int part;
850
851 if ((rs = raidget(unit, false)) == NULL)
852 return ENXIO;
853
854 if ((error = raidlock(rs)) != 0)
855 return (error);
856
857 part = DISKPART(dev);
858
859 /* ...that much closer to allowing unconfiguration... */
860 switch (fmt) {
861 case S_IFCHR:
862 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
863 break;
864
865 case S_IFBLK:
866 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
867 break;
868 }
869 rs->sc_dkdev.dk_openmask =
870 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
871
872 if ((rs->sc_dkdev.dk_openmask == 0) &&
873 ((rs->sc_flags & RAIDF_INITED) != 0)) {
874 /* Last one... device is not unconfigured yet.
875 Device shutdown has taken care of setting the
876 clean bits if RAIDF_INITED is not set
877 mark things as clean... */
878
879 rf_update_component_labels(&rs->sc_r,
880 RF_FINAL_COMPONENT_UPDATE);
881 }
882 if ((rs->sc_dkdev.dk_openmask == 0) &&
883 ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)) {
884 /*
885 * Detach this raid unit
886 */
887 cfdata_t cf = NULL;
888 int retcode = 0;
889
890 if (rs->sc_dev != NULL) {
891 cf = device_cfdata(rs->sc_dev);
892
893 raidunlock(rs);
894 retcode = config_detach(rs->sc_dev, DETACH_QUIET);
895 if (retcode == 0)
896 /* free the pseudo device attach bits */
897 free(cf, M_RAIDFRAME);
898 } else {
899 raidput(rs);
900 }
901 return retcode;
902 }
903
904 raidunlock(rs);
905 return (0);
906 }
907
908 static void
909 raidstrategy(struct buf *bp)
910 {
911 unsigned int unit = raidunit(bp->b_dev);
912 RF_Raid_t *raidPtr;
913 int wlabel;
914 struct raid_softc *rs;
915
916 if ((rs = raidget(unit, false)) == NULL) {
917 bp->b_error = ENXIO;
918 goto done;
919 }
920 if ((rs->sc_flags & RAIDF_INITED) == 0) {
921 bp->b_error = ENXIO;
922 goto done;
923 }
924 raidPtr = &rs->sc_r;
925 if (!raidPtr->valid) {
926 bp->b_error = ENODEV;
927 goto done;
928 }
929 if (bp->b_bcount == 0) {
930 db1_printf(("b_bcount is zero..\n"));
931 goto done;
932 }
933
934 /*
935 * Do bounds checking and adjust transfer. If there's an
936 * error, the bounds check will flag that for us.
937 */
938
939 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
940 if (DISKPART(bp->b_dev) == RAW_PART) {
941 uint64_t size; /* device size in DEV_BSIZE unit */
942
943 if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
944 size = raidPtr->totalSectors <<
945 (raidPtr->logBytesPerSector - DEV_BSHIFT);
946 } else {
947 size = raidPtr->totalSectors >>
948 (DEV_BSHIFT - raidPtr->logBytesPerSector);
949 }
950 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
951 goto done;
952 }
953 } else {
954 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
955 db1_printf(("Bounds check failed!!:%d %d\n",
956 (int) bp->b_blkno, (int) wlabel));
957 goto done;
958 }
959 }
960
961 rf_lock_mutex2(raidPtr->iodone_lock);
962
963 bp->b_resid = 0;
964
965 /* stuff it onto our queue */
966 bufq_put(rs->buf_queue, bp);
967
968 /* scheduled the IO to happen at the next convenient time */
969 rf_signal_cond2(raidPtr->iodone_cv);
970 rf_unlock_mutex2(raidPtr->iodone_lock);
971
972 return;
973
974 done:
975 bp->b_resid = bp->b_bcount;
976 biodone(bp);
977 }
978
979 /* ARGSUSED */
980 static int
981 raidread(dev_t dev, struct uio *uio, int flags)
982 {
983 int unit = raidunit(dev);
984 struct raid_softc *rs;
985
986 if ((rs = raidget(unit, false)) == NULL)
987 return ENXIO;
988
989 if ((rs->sc_flags & RAIDF_INITED) == 0)
990 return (ENXIO);
991
992 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
993
994 }
995
996 /* ARGSUSED */
997 static int
998 raidwrite(dev_t dev, struct uio *uio, int flags)
999 {
1000 int unit = raidunit(dev);
1001 struct raid_softc *rs;
1002
1003 if ((rs = raidget(unit, false)) == NULL)
1004 return ENXIO;
1005
1006 if ((rs->sc_flags & RAIDF_INITED) == 0)
1007 return (ENXIO);
1008
1009 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
1010
1011 }
1012
1013 static int
1014 raid_detach_unlocked(struct raid_softc *rs)
1015 {
1016 int error;
1017 RF_Raid_t *raidPtr;
1018
1019 raidPtr = &rs->sc_r;
1020
1021 /*
1022 * If somebody has a partition mounted, we shouldn't
1023 * shutdown.
1024 */
1025 if (rs->sc_dkdev.dk_openmask != 0)
1026 return EBUSY;
1027
1028 if ((rs->sc_flags & RAIDF_INITED) == 0)
1029 return 0;
1030
1031 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1032
1033 if ((error = rf_Shutdown(raidPtr)) != 0)
1034 return error;
1035
1036 /* Detach the disk. */
1037 dkwedge_delall(&rs->sc_dkdev);
1038 disk_detach(&rs->sc_dkdev);
1039 disk_destroy(&rs->sc_dkdev);
1040
1041 rs->sc_flags &= ~RAIDF_INITED;
1042
1043 /* Free the softc */
1044 aprint_normal_dev(rs->sc_dev, "detached\n");
1045
1046 return 0;
1047 }
1048
1049 static int
1050 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1051 {
1052 int unit = raidunit(dev);
1053 int error = 0;
1054 int part, pmask, s;
1055 cfdata_t cf;
1056 struct raid_softc *rs;
1057 RF_Config_t *k_cfg, *u_cfg;
1058 RF_Raid_t *raidPtr;
1059 RF_RaidDisk_t *diskPtr;
1060 RF_AccTotals_t *totals;
1061 RF_DeviceConfig_t *d_cfg, **ucfgp;
1062 u_char *specific_buf;
1063 int retcode = 0;
1064 int column;
1065 /* int raidid; */
1066 struct rf_recon_req *rrcopy, *rr;
1067 RF_ComponentLabel_t *clabel;
1068 RF_ComponentLabel_t *ci_label;
1069 RF_ComponentLabel_t **clabel_ptr;
1070 RF_SingleComponent_t *sparePtr,*componentPtr;
1071 RF_SingleComponent_t component;
1072 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
1073 int i, j, d;
1074 #ifdef __HAVE_OLD_DISKLABEL
1075 struct disklabel newlabel;
1076 #endif
1077
1078 if ((rs = raidget(unit, false)) == NULL)
1079 return ENXIO;
1080 raidPtr = &rs->sc_r;
1081
1082 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1083 (int) DISKPART(dev), (int) unit, cmd));
1084
1085 /* Must be open for writes for these commands... */
1086 switch (cmd) {
1087 #ifdef DIOCGSECTORSIZE
1088 case DIOCGSECTORSIZE:
1089 *(u_int *)data = raidPtr->bytesPerSector;
1090 return 0;
1091 case DIOCGMEDIASIZE:
1092 *(off_t *)data =
1093 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
1094 return 0;
1095 #endif
1096 case DIOCSDINFO:
1097 case DIOCWDINFO:
1098 #ifdef __HAVE_OLD_DISKLABEL
1099 case ODIOCWDINFO:
1100 case ODIOCSDINFO:
1101 #endif
1102 case DIOCWLABEL:
1103 case DIOCAWEDGE:
1104 case DIOCDWEDGE:
1105 case DIOCMWEDGES:
1106 case DIOCSSTRATEGY:
1107 if ((flag & FWRITE) == 0)
1108 return (EBADF);
1109 }
1110
1111 /* Must be initialized for these... */
1112 switch (cmd) {
1113 case DIOCGDINFO:
1114 case DIOCSDINFO:
1115 case DIOCWDINFO:
1116 #ifdef __HAVE_OLD_DISKLABEL
1117 case ODIOCGDINFO:
1118 case ODIOCWDINFO:
1119 case ODIOCSDINFO:
1120 case ODIOCGDEFLABEL:
1121 #endif
1122 case DIOCGPARTINFO:
1123 case DIOCWLABEL:
1124 case DIOCGDEFLABEL:
1125 case DIOCAWEDGE:
1126 case DIOCDWEDGE:
1127 case DIOCLWEDGES:
1128 case DIOCMWEDGES:
1129 case DIOCCACHESYNC:
1130 case RAIDFRAME_SHUTDOWN:
1131 case RAIDFRAME_REWRITEPARITY:
1132 case RAIDFRAME_GET_INFO:
1133 case RAIDFRAME_RESET_ACCTOTALS:
1134 case RAIDFRAME_GET_ACCTOTALS:
1135 case RAIDFRAME_KEEP_ACCTOTALS:
1136 case RAIDFRAME_GET_SIZE:
1137 case RAIDFRAME_FAIL_DISK:
1138 case RAIDFRAME_COPYBACK:
1139 case RAIDFRAME_CHECK_RECON_STATUS:
1140 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1141 case RAIDFRAME_GET_COMPONENT_LABEL:
1142 case RAIDFRAME_SET_COMPONENT_LABEL:
1143 case RAIDFRAME_ADD_HOT_SPARE:
1144 case RAIDFRAME_REMOVE_HOT_SPARE:
1145 case RAIDFRAME_INIT_LABELS:
1146 case RAIDFRAME_REBUILD_IN_PLACE:
1147 case RAIDFRAME_CHECK_PARITY:
1148 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1149 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1150 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1151 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1152 case RAIDFRAME_SET_AUTOCONFIG:
1153 case RAIDFRAME_SET_ROOT:
1154 case RAIDFRAME_DELETE_COMPONENT:
1155 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1156 case RAIDFRAME_PARITYMAP_STATUS:
1157 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1158 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1159 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1160 case DIOCGSTRATEGY:
1161 case DIOCSSTRATEGY:
1162 if ((rs->sc_flags & RAIDF_INITED) == 0)
1163 return (ENXIO);
1164 }
1165
1166 switch (cmd) {
1167 #ifdef COMPAT_50
1168 case RAIDFRAME_GET_INFO50:
1169 return rf_get_info50(raidPtr, data);
1170
1171 case RAIDFRAME_CONFIGURE50:
1172 if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0)
1173 return retcode;
1174 goto config;
1175 #endif
1176 /* configure the system */
1177 case RAIDFRAME_CONFIGURE:
1178
1179 if (raidPtr->valid) {
1180 /* There is a valid RAID set running on this unit! */
1181 printf("raid%d: Device already configured!\n",unit);
1182 return(EINVAL);
1183 }
1184
1185 /* copy-in the configuration information */
1186 /* data points to a pointer to the configuration structure */
1187
1188 u_cfg = *((RF_Config_t **) data);
1189 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1190 if (k_cfg == NULL) {
1191 return (ENOMEM);
1192 }
1193 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1194 if (retcode) {
1195 RF_Free(k_cfg, sizeof(RF_Config_t));
1196 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1197 retcode));
1198 goto no_config;
1199 }
1200 goto config;
1201 config:
1202 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1203
1204 /* allocate a buffer for the layout-specific data, and copy it
1205 * in */
1206 if (k_cfg->layoutSpecificSize) {
1207 if (k_cfg->layoutSpecificSize > 10000) {
1208 /* sanity check */
1209 RF_Free(k_cfg, sizeof(RF_Config_t));
1210 retcode = EINVAL;
1211 goto no_config;
1212 }
1213 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1214 (u_char *));
1215 if (specific_buf == NULL) {
1216 RF_Free(k_cfg, sizeof(RF_Config_t));
1217 retcode = ENOMEM;
1218 goto no_config;
1219 }
1220 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1221 k_cfg->layoutSpecificSize);
1222 if (retcode) {
1223 RF_Free(k_cfg, sizeof(RF_Config_t));
1224 RF_Free(specific_buf,
1225 k_cfg->layoutSpecificSize);
1226 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1227 retcode));
1228 goto no_config;
1229 }
1230 } else
1231 specific_buf = NULL;
1232 k_cfg->layoutSpecific = specific_buf;
1233
1234 /* should do some kind of sanity check on the configuration.
1235 * Store the sum of all the bytes in the last byte? */
1236
1237 /* configure the system */
1238
1239 /*
1240 * Clear the entire RAID descriptor, just to make sure
1241 * there is no stale data left in the case of a
1242 * reconfiguration
1243 */
1244 memset(raidPtr, 0, sizeof(*raidPtr));
1245 raidPtr->softc = rs;
1246 raidPtr->raidid = unit;
1247
1248 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1249
1250 if (retcode == 0) {
1251
1252 /* allow this many simultaneous IO's to
1253 this RAID device */
1254 raidPtr->openings = RAIDOUTSTANDING;
1255
1256 raidinit(rs);
1257 rf_markalldirty(raidPtr);
1258 }
1259 /* free the buffers. No return code here. */
1260 if (k_cfg->layoutSpecificSize) {
1261 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1262 }
1263 RF_Free(k_cfg, sizeof(RF_Config_t));
1264
1265 no_config:
1266 /*
1267 * If configuration failed, set sc_flags so that we
1268 * will detach the device when we close it.
1269 */
1270 if (retcode != 0)
1271 rs->sc_flags |= RAIDF_SHUTDOWN;
1272 return (retcode);
1273
1274 /* shutdown the system */
1275 case RAIDFRAME_SHUTDOWN:
1276
1277 part = DISKPART(dev);
1278 pmask = (1 << part);
1279
1280 if ((error = raidlock(rs)) != 0)
1281 return (error);
1282
1283 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
1284 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1285 (rs->sc_dkdev.dk_copenmask & pmask)))
1286 retcode = EBUSY;
1287 else {
1288 rs->sc_flags |= RAIDF_SHUTDOWN;
1289 rs->sc_dkdev.dk_copenmask &= ~pmask;
1290 rs->sc_dkdev.dk_bopenmask &= ~pmask;
1291 rs->sc_dkdev.dk_openmask &= ~pmask;
1292 retcode = 0;
1293 }
1294
1295 raidunlock(rs);
1296
1297 if (retcode != 0)
1298 return retcode;
1299
1300 /* free the pseudo device attach bits */
1301
1302 cf = device_cfdata(rs->sc_dev);
1303 if ((retcode = config_detach(rs->sc_dev, DETACH_QUIET)) == 0)
1304 free(cf, M_RAIDFRAME);
1305
1306 return (retcode);
1307 case RAIDFRAME_GET_COMPONENT_LABEL:
1308 clabel_ptr = (RF_ComponentLabel_t **) data;
1309 /* need to read the component label for the disk indicated
1310 by row,column in clabel */
1311
1312 /*
1313 * Perhaps there should be an option to skip the in-core
1314 * copy and hit the disk, as with disklabel(8).
1315 */
1316 RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *));
1317
1318 retcode = copyin(*clabel_ptr, clabel, sizeof(*clabel));
1319
1320 if (retcode) {
1321 RF_Free(clabel, sizeof(*clabel));
1322 return retcode;
1323 }
1324
1325 clabel->row = 0; /* Don't allow looking at anything else.*/
1326
1327 column = clabel->column;
1328
1329 if ((column < 0) || (column >= raidPtr->numCol +
1330 raidPtr->numSpare)) {
1331 RF_Free(clabel, sizeof(*clabel));
1332 return EINVAL;
1333 }
1334
1335 RF_Free(clabel, sizeof(*clabel));
1336
1337 clabel = raidget_component_label(raidPtr, column);
1338
1339 return copyout(clabel, *clabel_ptr, sizeof(**clabel_ptr));
1340
1341 #if 0
1342 case RAIDFRAME_SET_COMPONENT_LABEL:
1343 clabel = (RF_ComponentLabel_t *) data;
1344
1345 /* XXX check the label for valid stuff... */
1346 /* Note that some things *should not* get modified --
1347 the user should be re-initing the labels instead of
1348 trying to patch things.
1349 */
1350
1351 raidid = raidPtr->raidid;
1352 #ifdef DEBUG
1353 printf("raid%d: Got component label:\n", raidid);
1354 printf("raid%d: Version: %d\n", raidid, clabel->version);
1355 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1356 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1357 printf("raid%d: Column: %d\n", raidid, clabel->column);
1358 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1359 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1360 printf("raid%d: Status: %d\n", raidid, clabel->status);
1361 #endif
1362 clabel->row = 0;
1363 column = clabel->column;
1364
1365 if ((column < 0) || (column >= raidPtr->numCol)) {
1366 return(EINVAL);
1367 }
1368
1369 /* XXX this isn't allowed to do anything for now :-) */
1370
1371 /* XXX and before it is, we need to fill in the rest
1372 of the fields!?!?!?! */
1373 memcpy(raidget_component_label(raidPtr, column),
1374 clabel, sizeof(*clabel));
1375 raidflush_component_label(raidPtr, column);
1376 return (0);
1377 #endif
1378
1379 case RAIDFRAME_INIT_LABELS:
1380 clabel = (RF_ComponentLabel_t *) data;
1381 /*
1382 we only want the serial number from
1383 the above. We get all the rest of the information
1384 from the config that was used to create this RAID
1385 set.
1386 */
1387
1388 raidPtr->serial_number = clabel->serial_number;
1389
1390 for(column=0;column<raidPtr->numCol;column++) {
1391 diskPtr = &raidPtr->Disks[column];
1392 if (!RF_DEAD_DISK(diskPtr->status)) {
1393 ci_label = raidget_component_label(raidPtr,
1394 column);
1395 /* Zeroing this is important. */
1396 memset(ci_label, 0, sizeof(*ci_label));
1397 raid_init_component_label(raidPtr, ci_label);
1398 ci_label->serial_number =
1399 raidPtr->serial_number;
1400 ci_label->row = 0; /* we dont' pretend to support more */
1401 rf_component_label_set_partitionsize(ci_label,
1402 diskPtr->partitionSize);
1403 ci_label->column = column;
1404 raidflush_component_label(raidPtr, column);
1405 }
1406 /* XXXjld what about the spares? */
1407 }
1408
1409 return (retcode);
1410 case RAIDFRAME_SET_AUTOCONFIG:
1411 d = rf_set_autoconfig(raidPtr, *(int *) data);
1412 printf("raid%d: New autoconfig value is: %d\n",
1413 raidPtr->raidid, d);
1414 *(int *) data = d;
1415 return (retcode);
1416
1417 case RAIDFRAME_SET_ROOT:
1418 d = rf_set_rootpartition(raidPtr, *(int *) data);
1419 printf("raid%d: New rootpartition value is: %d\n",
1420 raidPtr->raidid, d);
1421 *(int *) data = d;
1422 return (retcode);
1423
1424 /* initialize all parity */
1425 case RAIDFRAME_REWRITEPARITY:
1426
1427 if (raidPtr->Layout.map->faultsTolerated == 0) {
1428 /* Parity for RAID 0 is trivially correct */
1429 raidPtr->parity_good = RF_RAID_CLEAN;
1430 return(0);
1431 }
1432
1433 if (raidPtr->parity_rewrite_in_progress == 1) {
1434 /* Re-write is already in progress! */
1435 return(EINVAL);
1436 }
1437
1438 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1439 rf_RewriteParityThread,
1440 raidPtr,"raid_parity");
1441 return (retcode);
1442
1443
1444 case RAIDFRAME_ADD_HOT_SPARE:
1445 sparePtr = (RF_SingleComponent_t *) data;
1446 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1447 retcode = rf_add_hot_spare(raidPtr, &component);
1448 return(retcode);
1449
1450 case RAIDFRAME_REMOVE_HOT_SPARE:
1451 return(retcode);
1452
1453 case RAIDFRAME_DELETE_COMPONENT:
1454 componentPtr = (RF_SingleComponent_t *)data;
1455 memcpy( &component, componentPtr,
1456 sizeof(RF_SingleComponent_t));
1457 retcode = rf_delete_component(raidPtr, &component);
1458 return(retcode);
1459
1460 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1461 componentPtr = (RF_SingleComponent_t *)data;
1462 memcpy( &component, componentPtr,
1463 sizeof(RF_SingleComponent_t));
1464 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1465 return(retcode);
1466
1467 case RAIDFRAME_REBUILD_IN_PLACE:
1468
1469 if (raidPtr->Layout.map->faultsTolerated == 0) {
1470 /* Can't do this on a RAID 0!! */
1471 return(EINVAL);
1472 }
1473
1474 if (raidPtr->recon_in_progress == 1) {
1475 /* a reconstruct is already in progress! */
1476 return(EINVAL);
1477 }
1478
1479 componentPtr = (RF_SingleComponent_t *) data;
1480 memcpy( &component, componentPtr,
1481 sizeof(RF_SingleComponent_t));
1482 component.row = 0; /* we don't support any more */
1483 column = component.column;
1484
1485 if ((column < 0) || (column >= raidPtr->numCol)) {
1486 return(EINVAL);
1487 }
1488
1489 rf_lock_mutex2(raidPtr->mutex);
1490 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1491 (raidPtr->numFailures > 0)) {
1492 /* XXX 0 above shouldn't be constant!!! */
1493 /* some component other than this has failed.
1494 Let's not make things worse than they already
1495 are... */
1496 printf("raid%d: Unable to reconstruct to disk at:\n",
1497 raidPtr->raidid);
1498 printf("raid%d: Col: %d Too many failures.\n",
1499 raidPtr->raidid, column);
1500 rf_unlock_mutex2(raidPtr->mutex);
1501 return (EINVAL);
1502 }
1503 if (raidPtr->Disks[column].status ==
1504 rf_ds_reconstructing) {
1505 printf("raid%d: Unable to reconstruct to disk at:\n",
1506 raidPtr->raidid);
1507 printf("raid%d: Col: %d Reconstruction already occurring!\n", raidPtr->raidid, column);
1508
1509 rf_unlock_mutex2(raidPtr->mutex);
1510 return (EINVAL);
1511 }
1512 if (raidPtr->Disks[column].status == rf_ds_spared) {
1513 rf_unlock_mutex2(raidPtr->mutex);
1514 return (EINVAL);
1515 }
1516 rf_unlock_mutex2(raidPtr->mutex);
1517
1518 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1519 if (rrcopy == NULL)
1520 return(ENOMEM);
1521
1522 rrcopy->raidPtr = (void *) raidPtr;
1523 rrcopy->col = column;
1524
1525 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1526 rf_ReconstructInPlaceThread,
1527 rrcopy,"raid_reconip");
1528 return(retcode);
1529
1530 case RAIDFRAME_GET_INFO:
1531 if (!raidPtr->valid)
1532 return (ENODEV);
1533 ucfgp = (RF_DeviceConfig_t **) data;
1534 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1535 (RF_DeviceConfig_t *));
1536 if (d_cfg == NULL)
1537 return (ENOMEM);
1538 d_cfg->rows = 1; /* there is only 1 row now */
1539 d_cfg->cols = raidPtr->numCol;
1540 d_cfg->ndevs = raidPtr->numCol;
1541 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1542 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1543 return (ENOMEM);
1544 }
1545 d_cfg->nspares = raidPtr->numSpare;
1546 if (d_cfg->nspares >= RF_MAX_DISKS) {
1547 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1548 return (ENOMEM);
1549 }
1550 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1551 d = 0;
1552 for (j = 0; j < d_cfg->cols; j++) {
1553 d_cfg->devs[d] = raidPtr->Disks[j];
1554 d++;
1555 }
1556 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1557 d_cfg->spares[i] = raidPtr->Disks[j];
1558 if (d_cfg->spares[i].status == rf_ds_rebuilding_spare) {
1559 /* XXX: raidctl(8) expects to see this as a used spare */
1560 d_cfg->spares[i].status = rf_ds_used_spare;
1561 }
1562 }
1563 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1564 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1565
1566 return (retcode);
1567
1568 case RAIDFRAME_CHECK_PARITY:
1569 *(int *) data = raidPtr->parity_good;
1570 return (0);
1571
1572 case RAIDFRAME_PARITYMAP_STATUS:
1573 if (rf_paritymap_ineligible(raidPtr))
1574 return EINVAL;
1575 rf_paritymap_status(raidPtr->parity_map,
1576 (struct rf_pmstat *)data);
1577 return 0;
1578
1579 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1580 if (rf_paritymap_ineligible(raidPtr))
1581 return EINVAL;
1582 if (raidPtr->parity_map == NULL)
1583 return ENOENT; /* ??? */
1584 if (0 != rf_paritymap_set_params(raidPtr->parity_map,
1585 (struct rf_pmparams *)data, 1))
1586 return EINVAL;
1587 return 0;
1588
1589 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1590 if (rf_paritymap_ineligible(raidPtr))
1591 return EINVAL;
1592 *(int *) data = rf_paritymap_get_disable(raidPtr);
1593 return 0;
1594
1595 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1596 if (rf_paritymap_ineligible(raidPtr))
1597 return EINVAL;
1598 rf_paritymap_set_disable(raidPtr, *(int *)data);
1599 /* XXX should errors be passed up? */
1600 return 0;
1601
1602 case RAIDFRAME_RESET_ACCTOTALS:
1603 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1604 return (0);
1605
1606 case RAIDFRAME_GET_ACCTOTALS:
1607 totals = (RF_AccTotals_t *) data;
1608 *totals = raidPtr->acc_totals;
1609 return (0);
1610
1611 case RAIDFRAME_KEEP_ACCTOTALS:
1612 raidPtr->keep_acc_totals = *(int *)data;
1613 return (0);
1614
1615 case RAIDFRAME_GET_SIZE:
1616 *(int *) data = raidPtr->totalSectors;
1617 return (0);
1618
1619 /* fail a disk & optionally start reconstruction */
1620 case RAIDFRAME_FAIL_DISK:
1621
1622 if (raidPtr->Layout.map->faultsTolerated == 0) {
1623 /* Can't do this on a RAID 0!! */
1624 return(EINVAL);
1625 }
1626
1627 rr = (struct rf_recon_req *) data;
1628 rr->row = 0;
1629 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1630 return (EINVAL);
1631
1632
1633 rf_lock_mutex2(raidPtr->mutex);
1634 if (raidPtr->status == rf_rs_reconstructing) {
1635 /* you can't fail a disk while we're reconstructing! */
1636 /* XXX wrong for RAID6 */
1637 rf_unlock_mutex2(raidPtr->mutex);
1638 return (EINVAL);
1639 }
1640 if ((raidPtr->Disks[rr->col].status ==
1641 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1642 /* some other component has failed. Let's not make
1643 things worse. XXX wrong for RAID6 */
1644 rf_unlock_mutex2(raidPtr->mutex);
1645 return (EINVAL);
1646 }
1647 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1648 /* Can't fail a spared disk! */
1649 rf_unlock_mutex2(raidPtr->mutex);
1650 return (EINVAL);
1651 }
1652 rf_unlock_mutex2(raidPtr->mutex);
1653
1654 /* make a copy of the recon request so that we don't rely on
1655 * the user's buffer */
1656 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1657 if (rrcopy == NULL)
1658 return(ENOMEM);
1659 memcpy(rrcopy, rr, sizeof(*rr));
1660 rrcopy->raidPtr = (void *) raidPtr;
1661
1662 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1663 rf_ReconThread,
1664 rrcopy,"raid_recon");
1665 return (0);
1666
1667 /* invoke a copyback operation after recon on whatever disk
1668 * needs it, if any */
1669 case RAIDFRAME_COPYBACK:
1670
1671 if (raidPtr->Layout.map->faultsTolerated == 0) {
1672 /* This makes no sense on a RAID 0!! */
1673 return(EINVAL);
1674 }
1675
1676 if (raidPtr->copyback_in_progress == 1) {
1677 /* Copyback is already in progress! */
1678 return(EINVAL);
1679 }
1680
1681 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1682 rf_CopybackThread,
1683 raidPtr,"raid_copyback");
1684 return (retcode);
1685
1686 /* return the percentage completion of reconstruction */
1687 case RAIDFRAME_CHECK_RECON_STATUS:
1688 if (raidPtr->Layout.map->faultsTolerated == 0) {
1689 /* This makes no sense on a RAID 0, so tell the
1690 user it's done. */
1691 *(int *) data = 100;
1692 return(0);
1693 }
1694 if (raidPtr->status != rf_rs_reconstructing)
1695 *(int *) data = 100;
1696 else {
1697 if (raidPtr->reconControl->numRUsTotal > 0) {
1698 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1699 } else {
1700 *(int *) data = 0;
1701 }
1702 }
1703 return (0);
1704 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1705 progressInfoPtr = (RF_ProgressInfo_t **) data;
1706 if (raidPtr->status != rf_rs_reconstructing) {
1707 progressInfo.remaining = 0;
1708 progressInfo.completed = 100;
1709 progressInfo.total = 100;
1710 } else {
1711 progressInfo.total =
1712 raidPtr->reconControl->numRUsTotal;
1713 progressInfo.completed =
1714 raidPtr->reconControl->numRUsComplete;
1715 progressInfo.remaining = progressInfo.total -
1716 progressInfo.completed;
1717 }
1718 retcode = copyout(&progressInfo, *progressInfoPtr,
1719 sizeof(RF_ProgressInfo_t));
1720 return (retcode);
1721
1722 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1723 if (raidPtr->Layout.map->faultsTolerated == 0) {
1724 /* This makes no sense on a RAID 0, so tell the
1725 user it's done. */
1726 *(int *) data = 100;
1727 return(0);
1728 }
1729 if (raidPtr->parity_rewrite_in_progress == 1) {
1730 *(int *) data = 100 *
1731 raidPtr->parity_rewrite_stripes_done /
1732 raidPtr->Layout.numStripe;
1733 } else {
1734 *(int *) data = 100;
1735 }
1736 return (0);
1737
1738 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1739 progressInfoPtr = (RF_ProgressInfo_t **) data;
1740 if (raidPtr->parity_rewrite_in_progress == 1) {
1741 progressInfo.total = raidPtr->Layout.numStripe;
1742 progressInfo.completed =
1743 raidPtr->parity_rewrite_stripes_done;
1744 progressInfo.remaining = progressInfo.total -
1745 progressInfo.completed;
1746 } else {
1747 progressInfo.remaining = 0;
1748 progressInfo.completed = 100;
1749 progressInfo.total = 100;
1750 }
1751 retcode = copyout(&progressInfo, *progressInfoPtr,
1752 sizeof(RF_ProgressInfo_t));
1753 return (retcode);
1754
1755 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1756 if (raidPtr->Layout.map->faultsTolerated == 0) {
1757 /* This makes no sense on a RAID 0 */
1758 *(int *) data = 100;
1759 return(0);
1760 }
1761 if (raidPtr->copyback_in_progress == 1) {
1762 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1763 raidPtr->Layout.numStripe;
1764 } else {
1765 *(int *) data = 100;
1766 }
1767 return (0);
1768
1769 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1770 progressInfoPtr = (RF_ProgressInfo_t **) data;
1771 if (raidPtr->copyback_in_progress == 1) {
1772 progressInfo.total = raidPtr->Layout.numStripe;
1773 progressInfo.completed =
1774 raidPtr->copyback_stripes_done;
1775 progressInfo.remaining = progressInfo.total -
1776 progressInfo.completed;
1777 } else {
1778 progressInfo.remaining = 0;
1779 progressInfo.completed = 100;
1780 progressInfo.total = 100;
1781 }
1782 retcode = copyout(&progressInfo, *progressInfoPtr,
1783 sizeof(RF_ProgressInfo_t));
1784 return (retcode);
1785
1786 /* the sparetable daemon calls this to wait for the kernel to
1787 * need a spare table. this ioctl does not return until a
1788 * spare table is needed. XXX -- calling mpsleep here in the
1789 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1790 * -- I should either compute the spare table in the kernel,
1791 * or have a different -- XXX XXX -- interface (a different
1792 * character device) for delivering the table -- XXX */
1793 #if 0
1794 case RAIDFRAME_SPARET_WAIT:
1795 rf_lock_mutex2(rf_sparet_wait_mutex);
1796 while (!rf_sparet_wait_queue)
1797 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1798 waitreq = rf_sparet_wait_queue;
1799 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1800 rf_unlock_mutex2(rf_sparet_wait_mutex);
1801
1802 /* structure assignment */
1803 *((RF_SparetWait_t *) data) = *waitreq;
1804
1805 RF_Free(waitreq, sizeof(*waitreq));
1806 return (0);
1807
1808 /* wakes up a process waiting on SPARET_WAIT and puts an error
1809 * code in it that will cause the dameon to exit */
1810 case RAIDFRAME_ABORT_SPARET_WAIT:
1811 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1812 waitreq->fcol = -1;
1813 rf_lock_mutex2(rf_sparet_wait_mutex);
1814 waitreq->next = rf_sparet_wait_queue;
1815 rf_sparet_wait_queue = waitreq;
1816 rf_broadcast_conf2(rf_sparet_wait_cv);
1817 rf_unlock_mutex2(rf_sparet_wait_mutex);
1818 return (0);
1819
1820 /* used by the spare table daemon to deliver a spare table
1821 * into the kernel */
1822 case RAIDFRAME_SEND_SPARET:
1823
1824 /* install the spare table */
1825 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1826
1827 /* respond to the requestor. the return status of the spare
1828 * table installation is passed in the "fcol" field */
1829 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1830 waitreq->fcol = retcode;
1831 rf_lock_mutex2(rf_sparet_wait_mutex);
1832 waitreq->next = rf_sparet_resp_queue;
1833 rf_sparet_resp_queue = waitreq;
1834 rf_broadcast_cond2(rf_sparet_resp_cv);
1835 rf_unlock_mutex2(rf_sparet_wait_mutex);
1836
1837 return (retcode);
1838 #endif
1839
1840 default:
1841 break; /* fall through to the os-specific code below */
1842
1843 }
1844
1845 if (!raidPtr->valid)
1846 return (EINVAL);
1847
1848 /*
1849 * Add support for "regular" device ioctls here.
1850 */
1851
1852 error = disk_ioctl(&rs->sc_dkdev, dev, cmd, data, flag, l);
1853 if (error != EPASSTHROUGH)
1854 return (error);
1855
1856 switch (cmd) {
1857 case DIOCWDINFO:
1858 case DIOCSDINFO:
1859 #ifdef __HAVE_OLD_DISKLABEL
1860 case ODIOCWDINFO:
1861 case ODIOCSDINFO:
1862 #endif
1863 {
1864 struct disklabel *lp;
1865 #ifdef __HAVE_OLD_DISKLABEL
1866 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1867 memset(&newlabel, 0, sizeof newlabel);
1868 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1869 lp = &newlabel;
1870 } else
1871 #endif
1872 lp = (struct disklabel *)data;
1873
1874 if ((error = raidlock(rs)) != 0)
1875 return (error);
1876
1877 rs->sc_flags |= RAIDF_LABELLING;
1878
1879 error = setdisklabel(rs->sc_dkdev.dk_label,
1880 lp, 0, rs->sc_dkdev.dk_cpulabel);
1881 if (error == 0) {
1882 if (cmd == DIOCWDINFO
1883 #ifdef __HAVE_OLD_DISKLABEL
1884 || cmd == ODIOCWDINFO
1885 #endif
1886 )
1887 error = writedisklabel(RAIDLABELDEV(dev),
1888 raidstrategy, rs->sc_dkdev.dk_label,
1889 rs->sc_dkdev.dk_cpulabel);
1890 }
1891 rs->sc_flags &= ~RAIDF_LABELLING;
1892
1893 raidunlock(rs);
1894
1895 if (error)
1896 return (error);
1897 break;
1898 }
1899
1900 case DIOCWLABEL:
1901 if (*(int *) data != 0)
1902 rs->sc_flags |= RAIDF_WLABEL;
1903 else
1904 rs->sc_flags &= ~RAIDF_WLABEL;
1905 break;
1906
1907 case DIOCGDEFLABEL:
1908 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1909 break;
1910
1911 #ifdef __HAVE_OLD_DISKLABEL
1912 case ODIOCGDEFLABEL:
1913 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1914 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1915 return ENOTTY;
1916 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1917 break;
1918 #endif
1919
1920 case DIOCCACHESYNC:
1921 return rf_sync_component_caches(raidPtr);
1922
1923 case DIOCGSTRATEGY:
1924 {
1925 struct disk_strategy *dks = (void *)data;
1926
1927 s = splbio();
1928 strlcpy(dks->dks_name, bufq_getstrategyname(rs->buf_queue),
1929 sizeof(dks->dks_name));
1930 splx(s);
1931 dks->dks_paramlen = 0;
1932
1933 return 0;
1934 }
1935
1936 case DIOCSSTRATEGY:
1937 {
1938 struct disk_strategy *dks = (void *)data;
1939 struct bufq_state *new;
1940 struct bufq_state *old;
1941
1942 if (dks->dks_param != NULL) {
1943 return EINVAL;
1944 }
1945 dks->dks_name[sizeof(dks->dks_name) - 1] = 0; /* ensure term */
1946 error = bufq_alloc(&new, dks->dks_name,
1947 BUFQ_EXACT|BUFQ_SORT_RAWBLOCK);
1948 if (error) {
1949 return error;
1950 }
1951 s = splbio();
1952 old = rs->buf_queue;
1953 bufq_move(new, old);
1954 rs->buf_queue = new;
1955 splx(s);
1956 bufq_free(old);
1957
1958 return 0;
1959 }
1960
1961 default:
1962 retcode = ENOTTY;
1963 }
1964 return (retcode);
1965
1966 }
1967
1968
1969 /* raidinit -- complete the rest of the initialization for the
1970 RAIDframe device. */
1971
1972
1973 static void
1974 raidinit(struct raid_softc *rs)
1975 {
1976 cfdata_t cf;
1977 int unit;
1978 RF_Raid_t *raidPtr = &rs->sc_r;
1979
1980 unit = raidPtr->raidid;
1981
1982
1983 /* XXX should check return code first... */
1984 rs->sc_flags |= RAIDF_INITED;
1985
1986 /* XXX doesn't check bounds. */
1987 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1988
1989 /* attach the pseudo device */
1990 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1991 cf->cf_name = raid_cd.cd_name;
1992 cf->cf_atname = raid_cd.cd_name;
1993 cf->cf_unit = unit;
1994 cf->cf_fstate = FSTATE_STAR;
1995
1996 rs->sc_dev = config_attach_pseudo(cf);
1997
1998 if (rs->sc_dev == NULL) {
1999 printf("raid%d: config_attach_pseudo failed\n",
2000 raidPtr->raidid);
2001 rs->sc_flags &= ~RAIDF_INITED;
2002 free(cf, M_RAIDFRAME);
2003 return;
2004 }
2005
2006 /* disk_attach actually creates space for the CPU disklabel, among
2007 * other things, so it's critical to call this *BEFORE* we try putzing
2008 * with disklabels. */
2009
2010 disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver);
2011 disk_attach(&rs->sc_dkdev);
2012
2013 /* XXX There may be a weird interaction here between this, and
2014 * protectedSectors, as used in RAIDframe. */
2015
2016 rs->sc_size = raidPtr->totalSectors;
2017
2018 rf_set_geometry(rs, raidPtr);
2019
2020 dkwedge_discover(&rs->sc_dkdev);
2021
2022 }
2023 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
2024 /* wake up the daemon & tell it to get us a spare table
2025 * XXX
2026 * the entries in the queues should be tagged with the raidPtr
2027 * so that in the extremely rare case that two recons happen at once,
2028 * we know for which device were requesting a spare table
2029 * XXX
2030 *
2031 * XXX This code is not currently used. GO
2032 */
2033 int
2034 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
2035 {
2036 int retcode;
2037
2038 rf_lock_mutex2(rf_sparet_wait_mutex);
2039 req->next = rf_sparet_wait_queue;
2040 rf_sparet_wait_queue = req;
2041 rf_broadcast_cond2(rf_sparet_wait_cv);
2042
2043 /* mpsleep unlocks the mutex */
2044 while (!rf_sparet_resp_queue) {
2045 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
2046 }
2047 req = rf_sparet_resp_queue;
2048 rf_sparet_resp_queue = req->next;
2049 rf_unlock_mutex2(rf_sparet_wait_mutex);
2050
2051 retcode = req->fcol;
2052 RF_Free(req, sizeof(*req)); /* this is not the same req as we
2053 * alloc'd */
2054 return (retcode);
2055 }
2056 #endif
2057
2058 /* a wrapper around rf_DoAccess that extracts appropriate info from the
2059 * bp & passes it down.
2060 * any calls originating in the kernel must use non-blocking I/O
2061 * do some extra sanity checking to return "appropriate" error values for
2062 * certain conditions (to make some standard utilities work)
2063 *
2064 * Formerly known as: rf_DoAccessKernel
2065 */
2066 void
2067 raidstart(RF_Raid_t *raidPtr)
2068 {
2069 RF_SectorCount_t num_blocks, pb, sum;
2070 RF_RaidAddr_t raid_addr;
2071 struct partition *pp;
2072 daddr_t blocknum;
2073 struct raid_softc *rs;
2074 int do_async;
2075 struct buf *bp;
2076 int rc;
2077
2078 rs = raidPtr->softc;
2079 /* quick check to see if anything has died recently */
2080 rf_lock_mutex2(raidPtr->mutex);
2081 if (raidPtr->numNewFailures > 0) {
2082 rf_unlock_mutex2(raidPtr->mutex);
2083 rf_update_component_labels(raidPtr,
2084 RF_NORMAL_COMPONENT_UPDATE);
2085 rf_lock_mutex2(raidPtr->mutex);
2086 raidPtr->numNewFailures--;
2087 }
2088
2089 /* Check to see if we're at the limit... */
2090 while (raidPtr->openings > 0) {
2091 rf_unlock_mutex2(raidPtr->mutex);
2092
2093 /* get the next item, if any, from the queue */
2094 if ((bp = bufq_get(rs->buf_queue)) == NULL) {
2095 /* nothing more to do */
2096 return;
2097 }
2098
2099 /* Ok, for the bp we have here, bp->b_blkno is relative to the
2100 * partition.. Need to make it absolute to the underlying
2101 * device.. */
2102
2103 blocknum = bp->b_blkno << DEV_BSHIFT >> raidPtr->logBytesPerSector;
2104 if (DISKPART(bp->b_dev) != RAW_PART) {
2105 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
2106 blocknum += pp->p_offset;
2107 }
2108
2109 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
2110 (int) blocknum));
2111
2112 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
2113 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
2114
2115 /* *THIS* is where we adjust what block we're going to...
2116 * but DO NOT TOUCH bp->b_blkno!!! */
2117 raid_addr = blocknum;
2118
2119 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
2120 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
2121 sum = raid_addr + num_blocks + pb;
2122 if (1 || rf_debugKernelAccess) {
2123 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
2124 (int) raid_addr, (int) sum, (int) num_blocks,
2125 (int) pb, (int) bp->b_resid));
2126 }
2127 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
2128 || (sum < num_blocks) || (sum < pb)) {
2129 bp->b_error = ENOSPC;
2130 bp->b_resid = bp->b_bcount;
2131 biodone(bp);
2132 rf_lock_mutex2(raidPtr->mutex);
2133 continue;
2134 }
2135 /*
2136 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
2137 */
2138
2139 if (bp->b_bcount & raidPtr->sectorMask) {
2140 bp->b_error = EINVAL;
2141 bp->b_resid = bp->b_bcount;
2142 biodone(bp);
2143 rf_lock_mutex2(raidPtr->mutex);
2144 continue;
2145
2146 }
2147 db1_printf(("Calling DoAccess..\n"));
2148
2149
2150 rf_lock_mutex2(raidPtr->mutex);
2151 raidPtr->openings--;
2152 rf_unlock_mutex2(raidPtr->mutex);
2153
2154 /*
2155 * Everything is async.
2156 */
2157 do_async = 1;
2158
2159 disk_busy(&rs->sc_dkdev);
2160
2161 /* XXX we're still at splbio() here... do we *really*
2162 need to be? */
2163
2164 /* don't ever condition on bp->b_flags & B_WRITE.
2165 * always condition on B_READ instead */
2166
2167 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
2168 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
2169 do_async, raid_addr, num_blocks,
2170 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
2171
2172 if (rc) {
2173 bp->b_error = rc;
2174 bp->b_resid = bp->b_bcount;
2175 biodone(bp);
2176 /* continue loop */
2177 }
2178
2179 rf_lock_mutex2(raidPtr->mutex);
2180 }
2181 rf_unlock_mutex2(raidPtr->mutex);
2182 }
2183
2184
2185
2186
2187 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
2188
2189 int
2190 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
2191 {
2192 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2193 struct buf *bp;
2194
2195 req->queue = queue;
2196 bp = req->bp;
2197
2198 switch (req->type) {
2199 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2200 /* XXX need to do something extra here.. */
2201 /* I'm leaving this in, as I've never actually seen it used,
2202 * and I'd like folks to report it... GO */
2203 printf(("WAKEUP CALLED\n"));
2204 queue->numOutstanding++;
2205
2206 bp->b_flags = 0;
2207 bp->b_private = req;
2208
2209 KernelWakeupFunc(bp);
2210 break;
2211
2212 case RF_IO_TYPE_READ:
2213 case RF_IO_TYPE_WRITE:
2214 #if RF_ACC_TRACE > 0
2215 if (req->tracerec) {
2216 RF_ETIMER_START(req->tracerec->timer);
2217 }
2218 #endif
2219 InitBP(bp, queue->rf_cinfo->ci_vp,
2220 op, queue->rf_cinfo->ci_dev,
2221 req->sectorOffset, req->numSector,
2222 req->buf, KernelWakeupFunc, (void *) req,
2223 queue->raidPtr->logBytesPerSector, req->b_proc);
2224
2225 if (rf_debugKernelAccess) {
2226 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2227 (long) bp->b_blkno));
2228 }
2229 queue->numOutstanding++;
2230 queue->last_deq_sector = req->sectorOffset;
2231 /* acc wouldn't have been let in if there were any pending
2232 * reqs at any other priority */
2233 queue->curPriority = req->priority;
2234
2235 db1_printf(("Going for %c to unit %d col %d\n",
2236 req->type, queue->raidPtr->raidid,
2237 queue->col));
2238 db1_printf(("sector %d count %d (%d bytes) %d\n",
2239 (int) req->sectorOffset, (int) req->numSector,
2240 (int) (req->numSector <<
2241 queue->raidPtr->logBytesPerSector),
2242 (int) queue->raidPtr->logBytesPerSector));
2243
2244 /*
2245 * XXX: drop lock here since this can block at
2246 * least with backing SCSI devices. Retake it
2247 * to minimize fuss with calling interfaces.
2248 */
2249
2250 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2251 bdev_strategy(bp);
2252 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2253 break;
2254
2255 default:
2256 panic("bad req->type in rf_DispatchKernelIO");
2257 }
2258 db1_printf(("Exiting from DispatchKernelIO\n"));
2259
2260 return (0);
2261 }
2262 /* this is the callback function associated with a I/O invoked from
2263 kernel code.
2264 */
2265 static void
2266 KernelWakeupFunc(struct buf *bp)
2267 {
2268 RF_DiskQueueData_t *req = NULL;
2269 RF_DiskQueue_t *queue;
2270
2271 db1_printf(("recovering the request queue:\n"));
2272
2273 req = bp->b_private;
2274
2275 queue = (RF_DiskQueue_t *) req->queue;
2276
2277 rf_lock_mutex2(queue->raidPtr->iodone_lock);
2278
2279 #if RF_ACC_TRACE > 0
2280 if (req->tracerec) {
2281 RF_ETIMER_STOP(req->tracerec->timer);
2282 RF_ETIMER_EVAL(req->tracerec->timer);
2283 rf_lock_mutex2(rf_tracing_mutex);
2284 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2285 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2286 req->tracerec->num_phys_ios++;
2287 rf_unlock_mutex2(rf_tracing_mutex);
2288 }
2289 #endif
2290
2291 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2292 * ballistic, and mark the component as hosed... */
2293
2294 if (bp->b_error != 0) {
2295 /* Mark the disk as dead */
2296 /* but only mark it once... */
2297 /* and only if it wouldn't leave this RAID set
2298 completely broken */
2299 if (((queue->raidPtr->Disks[queue->col].status ==
2300 rf_ds_optimal) ||
2301 (queue->raidPtr->Disks[queue->col].status ==
2302 rf_ds_used_spare)) &&
2303 (queue->raidPtr->numFailures <
2304 queue->raidPtr->Layout.map->faultsTolerated)) {
2305 printf("raid%d: IO Error (%d). Marking %s as failed.\n",
2306 queue->raidPtr->raidid,
2307 bp->b_error,
2308 queue->raidPtr->Disks[queue->col].devname);
2309 queue->raidPtr->Disks[queue->col].status =
2310 rf_ds_failed;
2311 queue->raidPtr->status = rf_rs_degraded;
2312 queue->raidPtr->numFailures++;
2313 queue->raidPtr->numNewFailures++;
2314 } else { /* Disk is already dead... */
2315 /* printf("Disk already marked as dead!\n"); */
2316 }
2317
2318 }
2319
2320 /* Fill in the error value */
2321 req->error = bp->b_error;
2322
2323 /* Drop this one on the "finished" queue... */
2324 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2325
2326 /* Let the raidio thread know there is work to be done. */
2327 rf_signal_cond2(queue->raidPtr->iodone_cv);
2328
2329 rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2330 }
2331
2332
2333 /*
2334 * initialize a buf structure for doing an I/O in the kernel.
2335 */
2336 static void
2337 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2338 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2339 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2340 struct proc *b_proc)
2341 {
2342 /* bp->b_flags = B_PHYS | rw_flag; */
2343 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */
2344 bp->b_oflags = 0;
2345 bp->b_cflags = 0;
2346 bp->b_bcount = numSect << logBytesPerSector;
2347 bp->b_bufsize = bp->b_bcount;
2348 bp->b_error = 0;
2349 bp->b_dev = dev;
2350 bp->b_data = bf;
2351 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2352 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2353 if (bp->b_bcount == 0) {
2354 panic("bp->b_bcount is zero in InitBP!!");
2355 }
2356 bp->b_proc = b_proc;
2357 bp->b_iodone = cbFunc;
2358 bp->b_private = cbArg;
2359 }
2360
2361 static void
2362 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2363 struct disklabel *lp)
2364 {
2365 memset(lp, 0, sizeof(*lp));
2366
2367 /* fabricate a label... */
2368 if (raidPtr->totalSectors > UINT32_MAX)
2369 lp->d_secperunit = UINT32_MAX;
2370 else
2371 lp->d_secperunit = raidPtr->totalSectors;
2372 lp->d_secsize = raidPtr->bytesPerSector;
2373 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2374 lp->d_ntracks = 4 * raidPtr->numCol;
2375 lp->d_ncylinders = raidPtr->totalSectors /
2376 (lp->d_nsectors * lp->d_ntracks);
2377 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2378
2379 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2380 lp->d_type = DKTYPE_RAID;
2381 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2382 lp->d_rpm = 3600;
2383 lp->d_interleave = 1;
2384 lp->d_flags = 0;
2385
2386 lp->d_partitions[RAW_PART].p_offset = 0;
2387 lp->d_partitions[RAW_PART].p_size = lp->d_secperunit;
2388 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2389 lp->d_npartitions = RAW_PART + 1;
2390
2391 lp->d_magic = DISKMAGIC;
2392 lp->d_magic2 = DISKMAGIC;
2393 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2394
2395 }
2396 /*
2397 * Read the disklabel from the raid device. If one is not present, fake one
2398 * up.
2399 */
2400 static void
2401 raidgetdisklabel(dev_t dev)
2402 {
2403 int unit = raidunit(dev);
2404 struct raid_softc *rs;
2405 const char *errstring;
2406 struct disklabel *lp;
2407 struct cpu_disklabel *clp;
2408 RF_Raid_t *raidPtr;
2409
2410 if ((rs = raidget(unit, false)) == NULL)
2411 return;
2412
2413 lp = rs->sc_dkdev.dk_label;
2414 clp = rs->sc_dkdev.dk_cpulabel;
2415
2416 db1_printf(("Getting the disklabel...\n"));
2417
2418 memset(clp, 0, sizeof(*clp));
2419
2420 raidPtr = &rs->sc_r;
2421
2422 raidgetdefaultlabel(raidPtr, rs, lp);
2423
2424 /*
2425 * Call the generic disklabel extraction routine.
2426 */
2427 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2428 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2429 if (errstring)
2430 raidmakedisklabel(rs);
2431 else {
2432 int i;
2433 struct partition *pp;
2434
2435 /*
2436 * Sanity check whether the found disklabel is valid.
2437 *
2438 * This is necessary since total size of the raid device
2439 * may vary when an interleave is changed even though exactly
2440 * same components are used, and old disklabel may used
2441 * if that is found.
2442 */
2443 if (lp->d_secperunit < UINT32_MAX ?
2444 lp->d_secperunit != rs->sc_size :
2445 lp->d_secperunit > rs->sc_size)
2446 printf("raid%d: WARNING: %s: "
2447 "total sector size in disklabel (%ju) != "
2448 "the size of raid (%ju)\n", unit, rs->sc_xname,
2449 (uintmax_t)lp->d_secperunit,
2450 (uintmax_t)rs->sc_size);
2451 for (i = 0; i < lp->d_npartitions; i++) {
2452 pp = &lp->d_partitions[i];
2453 if (pp->p_offset + pp->p_size > rs->sc_size)
2454 printf("raid%d: WARNING: %s: end of partition `%c' "
2455 "exceeds the size of raid (%ju)\n",
2456 unit, rs->sc_xname, 'a' + i,
2457 (uintmax_t)rs->sc_size);
2458 }
2459 }
2460
2461 }
2462 /*
2463 * Take care of things one might want to take care of in the event
2464 * that a disklabel isn't present.
2465 */
2466 static void
2467 raidmakedisklabel(struct raid_softc *rs)
2468 {
2469 struct disklabel *lp = rs->sc_dkdev.dk_label;
2470 db1_printf(("Making a label..\n"));
2471
2472 /*
2473 * For historical reasons, if there's no disklabel present
2474 * the raw partition must be marked FS_BSDFFS.
2475 */
2476
2477 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2478
2479 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2480
2481 lp->d_checksum = dkcksum(lp);
2482 }
2483 /*
2484 * Wait interruptibly for an exclusive lock.
2485 *
2486 * XXX
2487 * Several drivers do this; it should be abstracted and made MP-safe.
2488 * (Hmm... where have we seen this warning before :-> GO )
2489 */
2490 static int
2491 raidlock(struct raid_softc *rs)
2492 {
2493 int error;
2494
2495 mutex_enter(&rs->sc_mutex);
2496 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2497 rs->sc_flags |= RAIDF_WANTED;
2498 error = cv_wait_sig(&rs->sc_cv, &rs->sc_mutex);
2499 if (error != 0)
2500 return (error);
2501 }
2502 rs->sc_flags |= RAIDF_LOCKED;
2503 mutex_exit(&rs->sc_mutex);
2504 return (0);
2505 }
2506 /*
2507 * Unlock and wake up any waiters.
2508 */
2509 static void
2510 raidunlock(struct raid_softc *rs)
2511 {
2512
2513 mutex_enter(&rs->sc_mutex);
2514 rs->sc_flags &= ~RAIDF_LOCKED;
2515 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2516 rs->sc_flags &= ~RAIDF_WANTED;
2517 cv_broadcast(&rs->sc_cv);
2518 }
2519 mutex_exit(&rs->sc_mutex);
2520 }
2521
2522
2523 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2524 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2525 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2526
2527 static daddr_t
2528 rf_component_info_offset(void)
2529 {
2530
2531 return RF_COMPONENT_INFO_OFFSET;
2532 }
2533
2534 static daddr_t
2535 rf_component_info_size(unsigned secsize)
2536 {
2537 daddr_t info_size;
2538
2539 KASSERT(secsize);
2540 if (secsize > RF_COMPONENT_INFO_SIZE)
2541 info_size = secsize;
2542 else
2543 info_size = RF_COMPONENT_INFO_SIZE;
2544
2545 return info_size;
2546 }
2547
2548 static daddr_t
2549 rf_parity_map_offset(RF_Raid_t *raidPtr)
2550 {
2551 daddr_t map_offset;
2552
2553 KASSERT(raidPtr->bytesPerSector);
2554 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2555 map_offset = raidPtr->bytesPerSector;
2556 else
2557 map_offset = RF_COMPONENT_INFO_SIZE;
2558 map_offset += rf_component_info_offset();
2559
2560 return map_offset;
2561 }
2562
2563 static daddr_t
2564 rf_parity_map_size(RF_Raid_t *raidPtr)
2565 {
2566 daddr_t map_size;
2567
2568 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2569 map_size = raidPtr->bytesPerSector;
2570 else
2571 map_size = RF_PARITY_MAP_SIZE;
2572
2573 return map_size;
2574 }
2575
2576 int
2577 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2578 {
2579 RF_ComponentLabel_t *clabel;
2580
2581 clabel = raidget_component_label(raidPtr, col);
2582 clabel->clean = RF_RAID_CLEAN;
2583 raidflush_component_label(raidPtr, col);
2584 return(0);
2585 }
2586
2587
2588 int
2589 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2590 {
2591 RF_ComponentLabel_t *clabel;
2592
2593 clabel = raidget_component_label(raidPtr, col);
2594 clabel->clean = RF_RAID_DIRTY;
2595 raidflush_component_label(raidPtr, col);
2596 return(0);
2597 }
2598
2599 int
2600 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2601 {
2602 KASSERT(raidPtr->bytesPerSector);
2603 return raidread_component_label(raidPtr->bytesPerSector,
2604 raidPtr->Disks[col].dev,
2605 raidPtr->raid_cinfo[col].ci_vp,
2606 &raidPtr->raid_cinfo[col].ci_label);
2607 }
2608
2609 RF_ComponentLabel_t *
2610 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2611 {
2612 return &raidPtr->raid_cinfo[col].ci_label;
2613 }
2614
2615 int
2616 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2617 {
2618 RF_ComponentLabel_t *label;
2619
2620 label = &raidPtr->raid_cinfo[col].ci_label;
2621 label->mod_counter = raidPtr->mod_counter;
2622 #ifndef RF_NO_PARITY_MAP
2623 label->parity_map_modcount = label->mod_counter;
2624 #endif
2625 return raidwrite_component_label(raidPtr->bytesPerSector,
2626 raidPtr->Disks[col].dev,
2627 raidPtr->raid_cinfo[col].ci_vp, label);
2628 }
2629
2630
2631 static int
2632 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2633 RF_ComponentLabel_t *clabel)
2634 {
2635 return raidread_component_area(dev, b_vp, clabel,
2636 sizeof(RF_ComponentLabel_t),
2637 rf_component_info_offset(),
2638 rf_component_info_size(secsize));
2639 }
2640
2641 /* ARGSUSED */
2642 static int
2643 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2644 size_t msize, daddr_t offset, daddr_t dsize)
2645 {
2646 struct buf *bp;
2647 int error;
2648
2649 /* XXX should probably ensure that we don't try to do this if
2650 someone has changed rf_protected_sectors. */
2651
2652 if (b_vp == NULL) {
2653 /* For whatever reason, this component is not valid.
2654 Don't try to read a component label from it. */
2655 return(EINVAL);
2656 }
2657
2658 /* get a block of the appropriate size... */
2659 bp = geteblk((int)dsize);
2660 bp->b_dev = dev;
2661
2662 /* get our ducks in a row for the read */
2663 bp->b_blkno = offset / DEV_BSIZE;
2664 bp->b_bcount = dsize;
2665 bp->b_flags |= B_READ;
2666 bp->b_resid = dsize;
2667
2668 bdev_strategy(bp);
2669 error = biowait(bp);
2670
2671 if (!error) {
2672 memcpy(data, bp->b_data, msize);
2673 }
2674
2675 brelse(bp, 0);
2676 return(error);
2677 }
2678
2679
2680 static int
2681 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2682 RF_ComponentLabel_t *clabel)
2683 {
2684 return raidwrite_component_area(dev, b_vp, clabel,
2685 sizeof(RF_ComponentLabel_t),
2686 rf_component_info_offset(),
2687 rf_component_info_size(secsize), 0);
2688 }
2689
2690 /* ARGSUSED */
2691 static int
2692 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2693 size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2694 {
2695 struct buf *bp;
2696 int error;
2697
2698 /* get a block of the appropriate size... */
2699 bp = geteblk((int)dsize);
2700 bp->b_dev = dev;
2701
2702 /* get our ducks in a row for the write */
2703 bp->b_blkno = offset / DEV_BSIZE;
2704 bp->b_bcount = dsize;
2705 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2706 bp->b_resid = dsize;
2707
2708 memset(bp->b_data, 0, dsize);
2709 memcpy(bp->b_data, data, msize);
2710
2711 bdev_strategy(bp);
2712 if (asyncp)
2713 return 0;
2714 error = biowait(bp);
2715 brelse(bp, 0);
2716 if (error) {
2717 #if 1
2718 printf("Failed to write RAID component info!\n");
2719 #endif
2720 }
2721
2722 return(error);
2723 }
2724
2725 void
2726 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2727 {
2728 int c;
2729
2730 for (c = 0; c < raidPtr->numCol; c++) {
2731 /* Skip dead disks. */
2732 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2733 continue;
2734 /* XXXjld: what if an error occurs here? */
2735 raidwrite_component_area(raidPtr->Disks[c].dev,
2736 raidPtr->raid_cinfo[c].ci_vp, map,
2737 RF_PARITYMAP_NBYTE,
2738 rf_parity_map_offset(raidPtr),
2739 rf_parity_map_size(raidPtr), 0);
2740 }
2741 }
2742
2743 void
2744 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2745 {
2746 struct rf_paritymap_ondisk tmp;
2747 int c,first;
2748
2749 first=1;
2750 for (c = 0; c < raidPtr->numCol; c++) {
2751 /* Skip dead disks. */
2752 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2753 continue;
2754 raidread_component_area(raidPtr->Disks[c].dev,
2755 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2756 RF_PARITYMAP_NBYTE,
2757 rf_parity_map_offset(raidPtr),
2758 rf_parity_map_size(raidPtr));
2759 if (first) {
2760 memcpy(map, &tmp, sizeof(*map));
2761 first = 0;
2762 } else {
2763 rf_paritymap_merge(map, &tmp);
2764 }
2765 }
2766 }
2767
2768 void
2769 rf_markalldirty(RF_Raid_t *raidPtr)
2770 {
2771 RF_ComponentLabel_t *clabel;
2772 int sparecol;
2773 int c;
2774 int j;
2775 int scol = -1;
2776
2777 raidPtr->mod_counter++;
2778 for (c = 0; c < raidPtr->numCol; c++) {
2779 /* we don't want to touch (at all) a disk that has
2780 failed */
2781 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2782 clabel = raidget_component_label(raidPtr, c);
2783 if (clabel->status == rf_ds_spared) {
2784 /* XXX do something special...
2785 but whatever you do, don't
2786 try to access it!! */
2787 } else {
2788 raidmarkdirty(raidPtr, c);
2789 }
2790 }
2791 }
2792
2793 for( c = 0; c < raidPtr->numSpare ; c++) {
2794 sparecol = raidPtr->numCol + c;
2795 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2796 /*
2797
2798 we claim this disk is "optimal" if it's
2799 rf_ds_used_spare, as that means it should be
2800 directly substitutable for the disk it replaced.
2801 We note that too...
2802
2803 */
2804
2805 for(j=0;j<raidPtr->numCol;j++) {
2806 if (raidPtr->Disks[j].spareCol == sparecol) {
2807 scol = j;
2808 break;
2809 }
2810 }
2811
2812 clabel = raidget_component_label(raidPtr, sparecol);
2813 /* make sure status is noted */
2814
2815 raid_init_component_label(raidPtr, clabel);
2816
2817 clabel->row = 0;
2818 clabel->column = scol;
2819 /* Note: we *don't* change status from rf_ds_used_spare
2820 to rf_ds_optimal */
2821 /* clabel.status = rf_ds_optimal; */
2822
2823 raidmarkdirty(raidPtr, sparecol);
2824 }
2825 }
2826 }
2827
2828
2829 void
2830 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2831 {
2832 RF_ComponentLabel_t *clabel;
2833 int sparecol;
2834 int c;
2835 int j;
2836 int scol;
2837
2838 scol = -1;
2839
2840 /* XXX should do extra checks to make sure things really are clean,
2841 rather than blindly setting the clean bit... */
2842
2843 raidPtr->mod_counter++;
2844
2845 for (c = 0; c < raidPtr->numCol; c++) {
2846 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2847 clabel = raidget_component_label(raidPtr, c);
2848 /* make sure status is noted */
2849 clabel->status = rf_ds_optimal;
2850
2851 /* note what unit we are configured as */
2852 clabel->last_unit = raidPtr->raidid;
2853
2854 raidflush_component_label(raidPtr, c);
2855 if (final == RF_FINAL_COMPONENT_UPDATE) {
2856 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2857 raidmarkclean(raidPtr, c);
2858 }
2859 }
2860 }
2861 /* else we don't touch it.. */
2862 }
2863
2864 for( c = 0; c < raidPtr->numSpare ; c++) {
2865 sparecol = raidPtr->numCol + c;
2866 /* Need to ensure that the reconstruct actually completed! */
2867 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2868 /*
2869
2870 we claim this disk is "optimal" if it's
2871 rf_ds_used_spare, as that means it should be
2872 directly substitutable for the disk it replaced.
2873 We note that too...
2874
2875 */
2876
2877 for(j=0;j<raidPtr->numCol;j++) {
2878 if (raidPtr->Disks[j].spareCol == sparecol) {
2879 scol = j;
2880 break;
2881 }
2882 }
2883
2884 /* XXX shouldn't *really* need this... */
2885 clabel = raidget_component_label(raidPtr, sparecol);
2886 /* make sure status is noted */
2887
2888 raid_init_component_label(raidPtr, clabel);
2889
2890 clabel->column = scol;
2891 clabel->status = rf_ds_optimal;
2892 clabel->last_unit = raidPtr->raidid;
2893
2894 raidflush_component_label(raidPtr, sparecol);
2895 if (final == RF_FINAL_COMPONENT_UPDATE) {
2896 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2897 raidmarkclean(raidPtr, sparecol);
2898 }
2899 }
2900 }
2901 }
2902 }
2903
2904 void
2905 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2906 {
2907
2908 if (vp != NULL) {
2909 if (auto_configured == 1) {
2910 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2911 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2912 vput(vp);
2913
2914 } else {
2915 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2916 }
2917 }
2918 }
2919
2920
2921 void
2922 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2923 {
2924 int r,c;
2925 struct vnode *vp;
2926 int acd;
2927
2928
2929 /* We take this opportunity to close the vnodes like we should.. */
2930
2931 for (c = 0; c < raidPtr->numCol; c++) {
2932 vp = raidPtr->raid_cinfo[c].ci_vp;
2933 acd = raidPtr->Disks[c].auto_configured;
2934 rf_close_component(raidPtr, vp, acd);
2935 raidPtr->raid_cinfo[c].ci_vp = NULL;
2936 raidPtr->Disks[c].auto_configured = 0;
2937 }
2938
2939 for (r = 0; r < raidPtr->numSpare; r++) {
2940 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2941 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2942 rf_close_component(raidPtr, vp, acd);
2943 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2944 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2945 }
2946 }
2947
2948
2949 void
2950 rf_ReconThread(struct rf_recon_req *req)
2951 {
2952 int s;
2953 RF_Raid_t *raidPtr;
2954
2955 s = splbio();
2956 raidPtr = (RF_Raid_t *) req->raidPtr;
2957 raidPtr->recon_in_progress = 1;
2958
2959 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2960 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2961
2962 RF_Free(req, sizeof(*req));
2963
2964 raidPtr->recon_in_progress = 0;
2965 splx(s);
2966
2967 /* That's all... */
2968 kthread_exit(0); /* does not return */
2969 }
2970
2971 void
2972 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2973 {
2974 int retcode;
2975 int s;
2976
2977 raidPtr->parity_rewrite_stripes_done = 0;
2978 raidPtr->parity_rewrite_in_progress = 1;
2979 s = splbio();
2980 retcode = rf_RewriteParity(raidPtr);
2981 splx(s);
2982 if (retcode) {
2983 printf("raid%d: Error re-writing parity (%d)!\n",
2984 raidPtr->raidid, retcode);
2985 } else {
2986 /* set the clean bit! If we shutdown correctly,
2987 the clean bit on each component label will get
2988 set */
2989 raidPtr->parity_good = RF_RAID_CLEAN;
2990 }
2991 raidPtr->parity_rewrite_in_progress = 0;
2992
2993 /* Anyone waiting for us to stop? If so, inform them... */
2994 if (raidPtr->waitShutdown) {
2995 wakeup(&raidPtr->parity_rewrite_in_progress);
2996 }
2997
2998 /* That's all... */
2999 kthread_exit(0); /* does not return */
3000 }
3001
3002
3003 void
3004 rf_CopybackThread(RF_Raid_t *raidPtr)
3005 {
3006 int s;
3007
3008 raidPtr->copyback_in_progress = 1;
3009 s = splbio();
3010 rf_CopybackReconstructedData(raidPtr);
3011 splx(s);
3012 raidPtr->copyback_in_progress = 0;
3013
3014 /* That's all... */
3015 kthread_exit(0); /* does not return */
3016 }
3017
3018
3019 void
3020 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
3021 {
3022 int s;
3023 RF_Raid_t *raidPtr;
3024
3025 s = splbio();
3026 raidPtr = req->raidPtr;
3027 raidPtr->recon_in_progress = 1;
3028 rf_ReconstructInPlace(raidPtr, req->col);
3029 RF_Free(req, sizeof(*req));
3030 raidPtr->recon_in_progress = 0;
3031 splx(s);
3032
3033 /* That's all... */
3034 kthread_exit(0); /* does not return */
3035 }
3036
3037 static RF_AutoConfig_t *
3038 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
3039 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
3040 unsigned secsize)
3041 {
3042 int good_one = 0;
3043 RF_ComponentLabel_t *clabel;
3044 RF_AutoConfig_t *ac;
3045
3046 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
3047 if (clabel == NULL) {
3048 oomem:
3049 while(ac_list) {
3050 ac = ac_list;
3051 if (ac->clabel)
3052 free(ac->clabel, M_RAIDFRAME);
3053 ac_list = ac_list->next;
3054 free(ac, M_RAIDFRAME);
3055 }
3056 printf("RAID auto config: out of memory!\n");
3057 return NULL; /* XXX probably should panic? */
3058 }
3059
3060 if (!raidread_component_label(secsize, dev, vp, clabel)) {
3061 /* Got the label. Does it look reasonable? */
3062 if (rf_reasonable_label(clabel, numsecs) &&
3063 (rf_component_label_partitionsize(clabel) <= size)) {
3064 #ifdef DEBUG
3065 printf("Component on: %s: %llu\n",
3066 cname, (unsigned long long)size);
3067 rf_print_component_label(clabel);
3068 #endif
3069 /* if it's reasonable, add it, else ignore it. */
3070 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
3071 M_NOWAIT);
3072 if (ac == NULL) {
3073 free(clabel, M_RAIDFRAME);
3074 goto oomem;
3075 }
3076 strlcpy(ac->devname, cname, sizeof(ac->devname));
3077 ac->dev = dev;
3078 ac->vp = vp;
3079 ac->clabel = clabel;
3080 ac->next = ac_list;
3081 ac_list = ac;
3082 good_one = 1;
3083 }
3084 }
3085 if (!good_one) {
3086 /* cleanup */
3087 free(clabel, M_RAIDFRAME);
3088 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3089 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3090 vput(vp);
3091 }
3092 return ac_list;
3093 }
3094
3095 RF_AutoConfig_t *
3096 rf_find_raid_components(void)
3097 {
3098 struct vnode *vp;
3099 struct disklabel label;
3100 device_t dv;
3101 deviter_t di;
3102 dev_t dev;
3103 int bmajor, bminor, wedge, rf_part_found;
3104 int error;
3105 int i;
3106 RF_AutoConfig_t *ac_list;
3107 uint64_t numsecs;
3108 unsigned secsize;
3109
3110 /* initialize the AutoConfig list */
3111 ac_list = NULL;
3112
3113 /* we begin by trolling through *all* the devices on the system */
3114
3115 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
3116 dv = deviter_next(&di)) {
3117
3118 /* we are only interested in disks... */
3119 if (device_class(dv) != DV_DISK)
3120 continue;
3121
3122 /* we don't care about floppies... */
3123 if (device_is_a(dv, "fd")) {
3124 continue;
3125 }
3126
3127 /* we don't care about CD's... */
3128 if (device_is_a(dv, "cd")) {
3129 continue;
3130 }
3131
3132 /* we don't care about md's... */
3133 if (device_is_a(dv, "md")) {
3134 continue;
3135 }
3136
3137 /* hdfd is the Atari/Hades floppy driver */
3138 if (device_is_a(dv, "hdfd")) {
3139 continue;
3140 }
3141
3142 /* fdisa is the Atari/Milan floppy driver */
3143 if (device_is_a(dv, "fdisa")) {
3144 continue;
3145 }
3146
3147 /* need to find the device_name_to_block_device_major stuff */
3148 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
3149
3150 rf_part_found = 0; /*No raid partition as yet*/
3151
3152 /* get a vnode for the raw partition of this disk */
3153
3154 wedge = device_is_a(dv, "dk");
3155 bminor = minor(device_unit(dv));
3156 dev = wedge ? makedev(bmajor, bminor) :
3157 MAKEDISKDEV(bmajor, bminor, RAW_PART);
3158 if (bdevvp(dev, &vp))
3159 panic("RAID can't alloc vnode");
3160
3161 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
3162
3163 if (error) {
3164 /* "Who cares." Continue looking
3165 for something that exists*/
3166 vput(vp);
3167 continue;
3168 }
3169
3170 error = getdisksize(vp, &numsecs, &secsize);
3171 if (error) {
3172 vput(vp);
3173 continue;
3174 }
3175 if (wedge) {
3176 struct dkwedge_info dkw;
3177 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
3178 NOCRED);
3179 if (error) {
3180 printf("RAIDframe: can't get wedge info for "
3181 "dev %s (%d)\n", device_xname(dv), error);
3182 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3183 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3184 vput(vp);
3185 continue;
3186 }
3187
3188 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
3189 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3190 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3191 vput(vp);
3192 continue;
3193 }
3194
3195 ac_list = rf_get_component(ac_list, dev, vp,
3196 device_xname(dv), dkw.dkw_size, numsecs, secsize);
3197 rf_part_found = 1; /*There is a raid component on this disk*/
3198 continue;
3199 }
3200
3201 /* Ok, the disk exists. Go get the disklabel. */
3202 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
3203 if (error) {
3204 /*
3205 * XXX can't happen - open() would
3206 * have errored out (or faked up one)
3207 */
3208 if (error != ENOTTY)
3209 printf("RAIDframe: can't get label for dev "
3210 "%s (%d)\n", device_xname(dv), error);
3211 }
3212
3213 /* don't need this any more. We'll allocate it again
3214 a little later if we really do... */
3215 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3216 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3217 vput(vp);
3218
3219 if (error)
3220 continue;
3221
3222 rf_part_found = 0; /*No raid partitions yet*/
3223 for (i = 0; i < label.d_npartitions; i++) {
3224 char cname[sizeof(ac_list->devname)];
3225
3226 /* We only support partitions marked as RAID */
3227 if (label.d_partitions[i].p_fstype != FS_RAID)
3228 continue;
3229
3230 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
3231 if (bdevvp(dev, &vp))
3232 panic("RAID can't alloc vnode");
3233
3234 error = VOP_OPEN(vp, FREAD, NOCRED);
3235 if (error) {
3236 /* Whatever... */
3237 vput(vp);
3238 continue;
3239 }
3240 snprintf(cname, sizeof(cname), "%s%c",
3241 device_xname(dv), 'a' + i);
3242 ac_list = rf_get_component(ac_list, dev, vp, cname,
3243 label.d_partitions[i].p_size, numsecs, secsize);
3244 rf_part_found = 1; /*There is at least one raid partition on this disk*/
3245 }
3246
3247 /*
3248 *If there is no raid component on this disk, either in a
3249 *disklabel or inside a wedge, check the raw partition as well,
3250 *as it is possible to configure raid components on raw disk
3251 *devices.
3252 */
3253
3254 if (!rf_part_found) {
3255 char cname[sizeof(ac_list->devname)];
3256
3257 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
3258 if (bdevvp(dev, &vp))
3259 panic("RAID can't alloc vnode");
3260
3261 error = VOP_OPEN(vp, FREAD, NOCRED);
3262 if (error) {
3263 /* Whatever... */
3264 vput(vp);
3265 continue;
3266 }
3267 snprintf(cname, sizeof(cname), "%s%c",
3268 device_xname(dv), 'a' + RAW_PART);
3269 ac_list = rf_get_component(ac_list, dev, vp, cname,
3270 label.d_partitions[RAW_PART].p_size, numsecs, secsize);
3271 }
3272 }
3273 deviter_release(&di);
3274 return ac_list;
3275 }
3276
3277
3278 int
3279 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3280 {
3281
3282 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
3283 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
3284 ((clabel->clean == RF_RAID_CLEAN) ||
3285 (clabel->clean == RF_RAID_DIRTY)) &&
3286 clabel->row >=0 &&
3287 clabel->column >= 0 &&
3288 clabel->num_rows > 0 &&
3289 clabel->num_columns > 0 &&
3290 clabel->row < clabel->num_rows &&
3291 clabel->column < clabel->num_columns &&
3292 clabel->blockSize > 0 &&
3293 /*
3294 * numBlocksHi may contain garbage, but it is ok since
3295 * the type is unsigned. If it is really garbage,
3296 * rf_fix_old_label_size() will fix it.
3297 */
3298 rf_component_label_numblocks(clabel) > 0) {
3299 /*
3300 * label looks reasonable enough...
3301 * let's make sure it has no old garbage.
3302 */
3303 if (numsecs)
3304 rf_fix_old_label_size(clabel, numsecs);
3305 return(1);
3306 }
3307 return(0);
3308 }
3309
3310
3311 /*
3312 * For reasons yet unknown, some old component labels have garbage in
3313 * the newer numBlocksHi region, and this causes lossage. Since those
3314 * disks will also have numsecs set to less than 32 bits of sectors,
3315 * we can determine when this corruption has occurred, and fix it.
3316 *
3317 * The exact same problem, with the same unknown reason, happens to
3318 * the partitionSizeHi member as well.
3319 */
3320 static void
3321 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3322 {
3323
3324 if (numsecs < ((uint64_t)1 << 32)) {
3325 if (clabel->numBlocksHi) {
3326 printf("WARNING: total sectors < 32 bits, yet "
3327 "numBlocksHi set\n"
3328 "WARNING: resetting numBlocksHi to zero.\n");
3329 clabel->numBlocksHi = 0;
3330 }
3331
3332 if (clabel->partitionSizeHi) {
3333 printf("WARNING: total sectors < 32 bits, yet "
3334 "partitionSizeHi set\n"
3335 "WARNING: resetting partitionSizeHi to zero.\n");
3336 clabel->partitionSizeHi = 0;
3337 }
3338 }
3339 }
3340
3341
3342 #ifdef DEBUG
3343 void
3344 rf_print_component_label(RF_ComponentLabel_t *clabel)
3345 {
3346 uint64_t numBlocks;
3347 static const char *rp[] = {
3348 "No", "Force", "Soft", "*invalid*"
3349 };
3350
3351
3352 numBlocks = rf_component_label_numblocks(clabel);
3353
3354 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3355 clabel->row, clabel->column,
3356 clabel->num_rows, clabel->num_columns);
3357 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3358 clabel->version, clabel->serial_number,
3359 clabel->mod_counter);
3360 printf(" Clean: %s Status: %d\n",
3361 clabel->clean ? "Yes" : "No", clabel->status);
3362 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3363 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3364 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3365 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3366 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3367 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]);
3368 printf(" Last configured as: raid%d\n", clabel->last_unit);
3369 #if 0
3370 printf(" Config order: %d\n", clabel->config_order);
3371 #endif
3372
3373 }
3374 #endif
3375
3376 RF_ConfigSet_t *
3377 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3378 {
3379 RF_AutoConfig_t *ac;
3380 RF_ConfigSet_t *config_sets;
3381 RF_ConfigSet_t *cset;
3382 RF_AutoConfig_t *ac_next;
3383
3384
3385 config_sets = NULL;
3386
3387 /* Go through the AutoConfig list, and figure out which components
3388 belong to what sets. */
3389 ac = ac_list;
3390 while(ac!=NULL) {
3391 /* we're going to putz with ac->next, so save it here
3392 for use at the end of the loop */
3393 ac_next = ac->next;
3394
3395 if (config_sets == NULL) {
3396 /* will need at least this one... */
3397 config_sets = (RF_ConfigSet_t *)
3398 malloc(sizeof(RF_ConfigSet_t),
3399 M_RAIDFRAME, M_NOWAIT);
3400 if (config_sets == NULL) {
3401 panic("rf_create_auto_sets: No memory!");
3402 }
3403 /* this one is easy :) */
3404 config_sets->ac = ac;
3405 config_sets->next = NULL;
3406 config_sets->rootable = 0;
3407 ac->next = NULL;
3408 } else {
3409 /* which set does this component fit into? */
3410 cset = config_sets;
3411 while(cset!=NULL) {
3412 if (rf_does_it_fit(cset, ac)) {
3413 /* looks like it matches... */
3414 ac->next = cset->ac;
3415 cset->ac = ac;
3416 break;
3417 }
3418 cset = cset->next;
3419 }
3420 if (cset==NULL) {
3421 /* didn't find a match above... new set..*/
3422 cset = (RF_ConfigSet_t *)
3423 malloc(sizeof(RF_ConfigSet_t),
3424 M_RAIDFRAME, M_NOWAIT);
3425 if (cset == NULL) {
3426 panic("rf_create_auto_sets: No memory!");
3427 }
3428 cset->ac = ac;
3429 ac->next = NULL;
3430 cset->next = config_sets;
3431 cset->rootable = 0;
3432 config_sets = cset;
3433 }
3434 }
3435 ac = ac_next;
3436 }
3437
3438
3439 return(config_sets);
3440 }
3441
3442 static int
3443 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3444 {
3445 RF_ComponentLabel_t *clabel1, *clabel2;
3446
3447 /* If this one matches the *first* one in the set, that's good
3448 enough, since the other members of the set would have been
3449 through here too... */
3450 /* note that we are not checking partitionSize here..
3451
3452 Note that we are also not checking the mod_counters here.
3453 If everything else matches except the mod_counter, that's
3454 good enough for this test. We will deal with the mod_counters
3455 a little later in the autoconfiguration process.
3456
3457 (clabel1->mod_counter == clabel2->mod_counter) &&
3458
3459 The reason we don't check for this is that failed disks
3460 will have lower modification counts. If those disks are
3461 not added to the set they used to belong to, then they will
3462 form their own set, which may result in 2 different sets,
3463 for example, competing to be configured at raid0, and
3464 perhaps competing to be the root filesystem set. If the
3465 wrong ones get configured, or both attempt to become /,
3466 weird behaviour and or serious lossage will occur. Thus we
3467 need to bring them into the fold here, and kick them out at
3468 a later point.
3469
3470 */
3471
3472 clabel1 = cset->ac->clabel;
3473 clabel2 = ac->clabel;
3474 if ((clabel1->version == clabel2->version) &&
3475 (clabel1->serial_number == clabel2->serial_number) &&
3476 (clabel1->num_rows == clabel2->num_rows) &&
3477 (clabel1->num_columns == clabel2->num_columns) &&
3478 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3479 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3480 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3481 (clabel1->parityConfig == clabel2->parityConfig) &&
3482 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3483 (clabel1->blockSize == clabel2->blockSize) &&
3484 rf_component_label_numblocks(clabel1) ==
3485 rf_component_label_numblocks(clabel2) &&
3486 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3487 (clabel1->root_partition == clabel2->root_partition) &&
3488 (clabel1->last_unit == clabel2->last_unit) &&
3489 (clabel1->config_order == clabel2->config_order)) {
3490 /* if it get's here, it almost *has* to be a match */
3491 } else {
3492 /* it's not consistent with somebody in the set..
3493 punt */
3494 return(0);
3495 }
3496 /* all was fine.. it must fit... */
3497 return(1);
3498 }
3499
3500 int
3501 rf_have_enough_components(RF_ConfigSet_t *cset)
3502 {
3503 RF_AutoConfig_t *ac;
3504 RF_AutoConfig_t *auto_config;
3505 RF_ComponentLabel_t *clabel;
3506 int c;
3507 int num_cols;
3508 int num_missing;
3509 int mod_counter;
3510 int mod_counter_found;
3511 int even_pair_failed;
3512 char parity_type;
3513
3514
3515 /* check to see that we have enough 'live' components
3516 of this set. If so, we can configure it if necessary */
3517
3518 num_cols = cset->ac->clabel->num_columns;
3519 parity_type = cset->ac->clabel->parityConfig;
3520
3521 /* XXX Check for duplicate components!?!?!? */
3522
3523 /* Determine what the mod_counter is supposed to be for this set. */
3524
3525 mod_counter_found = 0;
3526 mod_counter = 0;
3527 ac = cset->ac;
3528 while(ac!=NULL) {
3529 if (mod_counter_found==0) {
3530 mod_counter = ac->clabel->mod_counter;
3531 mod_counter_found = 1;
3532 } else {
3533 if (ac->clabel->mod_counter > mod_counter) {
3534 mod_counter = ac->clabel->mod_counter;
3535 }
3536 }
3537 ac = ac->next;
3538 }
3539
3540 num_missing = 0;
3541 auto_config = cset->ac;
3542
3543 even_pair_failed = 0;
3544 for(c=0; c<num_cols; c++) {
3545 ac = auto_config;
3546 while(ac!=NULL) {
3547 if ((ac->clabel->column == c) &&
3548 (ac->clabel->mod_counter == mod_counter)) {
3549 /* it's this one... */
3550 #ifdef DEBUG
3551 printf("Found: %s at %d\n",
3552 ac->devname,c);
3553 #endif
3554 break;
3555 }
3556 ac=ac->next;
3557 }
3558 if (ac==NULL) {
3559 /* Didn't find one here! */
3560 /* special case for RAID 1, especially
3561 where there are more than 2
3562 components (where RAIDframe treats
3563 things a little differently :( ) */
3564 if (parity_type == '1') {
3565 if (c%2 == 0) { /* even component */
3566 even_pair_failed = 1;
3567 } else { /* odd component. If
3568 we're failed, and
3569 so is the even
3570 component, it's
3571 "Good Night, Charlie" */
3572 if (even_pair_failed == 1) {
3573 return(0);
3574 }
3575 }
3576 } else {
3577 /* normal accounting */
3578 num_missing++;
3579 }
3580 }
3581 if ((parity_type == '1') && (c%2 == 1)) {
3582 /* Just did an even component, and we didn't
3583 bail.. reset the even_pair_failed flag,
3584 and go on to the next component.... */
3585 even_pair_failed = 0;
3586 }
3587 }
3588
3589 clabel = cset->ac->clabel;
3590
3591 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3592 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3593 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3594 /* XXX this needs to be made *much* more general */
3595 /* Too many failures */
3596 return(0);
3597 }
3598 /* otherwise, all is well, and we've got enough to take a kick
3599 at autoconfiguring this set */
3600 return(1);
3601 }
3602
3603 void
3604 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3605 RF_Raid_t *raidPtr)
3606 {
3607 RF_ComponentLabel_t *clabel;
3608 int i;
3609
3610 clabel = ac->clabel;
3611
3612 /* 1. Fill in the common stuff */
3613 config->numRow = clabel->num_rows = 1;
3614 config->numCol = clabel->num_columns;
3615 config->numSpare = 0; /* XXX should this be set here? */
3616 config->sectPerSU = clabel->sectPerSU;
3617 config->SUsPerPU = clabel->SUsPerPU;
3618 config->SUsPerRU = clabel->SUsPerRU;
3619 config->parityConfig = clabel->parityConfig;
3620 /* XXX... */
3621 strcpy(config->diskQueueType,"fifo");
3622 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3623 config->layoutSpecificSize = 0; /* XXX ?? */
3624
3625 while(ac!=NULL) {
3626 /* row/col values will be in range due to the checks
3627 in reasonable_label() */
3628 strcpy(config->devnames[0][ac->clabel->column],
3629 ac->devname);
3630 ac = ac->next;
3631 }
3632
3633 for(i=0;i<RF_MAXDBGV;i++) {
3634 config->debugVars[i][0] = 0;
3635 }
3636 }
3637
3638 int
3639 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3640 {
3641 RF_ComponentLabel_t *clabel;
3642 int column;
3643 int sparecol;
3644
3645 raidPtr->autoconfigure = new_value;
3646
3647 for(column=0; column<raidPtr->numCol; column++) {
3648 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3649 clabel = raidget_component_label(raidPtr, column);
3650 clabel->autoconfigure = new_value;
3651 raidflush_component_label(raidPtr, column);
3652 }
3653 }
3654 for(column = 0; column < raidPtr->numSpare ; column++) {
3655 sparecol = raidPtr->numCol + column;
3656 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3657 clabel = raidget_component_label(raidPtr, sparecol);
3658 clabel->autoconfigure = new_value;
3659 raidflush_component_label(raidPtr, sparecol);
3660 }
3661 }
3662 return(new_value);
3663 }
3664
3665 int
3666 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3667 {
3668 RF_ComponentLabel_t *clabel;
3669 int column;
3670 int sparecol;
3671
3672 raidPtr->root_partition = new_value;
3673 for(column=0; column<raidPtr->numCol; column++) {
3674 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3675 clabel = raidget_component_label(raidPtr, column);
3676 clabel->root_partition = new_value;
3677 raidflush_component_label(raidPtr, column);
3678 }
3679 }
3680 for(column = 0; column < raidPtr->numSpare ; column++) {
3681 sparecol = raidPtr->numCol + column;
3682 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3683 clabel = raidget_component_label(raidPtr, sparecol);
3684 clabel->root_partition = new_value;
3685 raidflush_component_label(raidPtr, sparecol);
3686 }
3687 }
3688 return(new_value);
3689 }
3690
3691 void
3692 rf_release_all_vps(RF_ConfigSet_t *cset)
3693 {
3694 RF_AutoConfig_t *ac;
3695
3696 ac = cset->ac;
3697 while(ac!=NULL) {
3698 /* Close the vp, and give it back */
3699 if (ac->vp) {
3700 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3701 VOP_CLOSE(ac->vp, FREAD, NOCRED);
3702 vput(ac->vp);
3703 ac->vp = NULL;
3704 }
3705 ac = ac->next;
3706 }
3707 }
3708
3709
3710 void
3711 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3712 {
3713 RF_AutoConfig_t *ac;
3714 RF_AutoConfig_t *next_ac;
3715
3716 ac = cset->ac;
3717 while(ac!=NULL) {
3718 next_ac = ac->next;
3719 /* nuke the label */
3720 free(ac->clabel, M_RAIDFRAME);
3721 /* cleanup the config structure */
3722 free(ac, M_RAIDFRAME);
3723 /* "next.." */
3724 ac = next_ac;
3725 }
3726 /* and, finally, nuke the config set */
3727 free(cset, M_RAIDFRAME);
3728 }
3729
3730
3731 void
3732 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3733 {
3734 /* current version number */
3735 clabel->version = RF_COMPONENT_LABEL_VERSION;
3736 clabel->serial_number = raidPtr->serial_number;
3737 clabel->mod_counter = raidPtr->mod_counter;
3738
3739 clabel->num_rows = 1;
3740 clabel->num_columns = raidPtr->numCol;
3741 clabel->clean = RF_RAID_DIRTY; /* not clean */
3742 clabel->status = rf_ds_optimal; /* "It's good!" */
3743
3744 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3745 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3746 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3747
3748 clabel->blockSize = raidPtr->bytesPerSector;
3749 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3750
3751 /* XXX not portable */
3752 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3753 clabel->maxOutstanding = raidPtr->maxOutstanding;
3754 clabel->autoconfigure = raidPtr->autoconfigure;
3755 clabel->root_partition = raidPtr->root_partition;
3756 clabel->last_unit = raidPtr->raidid;
3757 clabel->config_order = raidPtr->config_order;
3758
3759 #ifndef RF_NO_PARITY_MAP
3760 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3761 #endif
3762 }
3763
3764 struct raid_softc *
3765 rf_auto_config_set(RF_ConfigSet_t *cset)
3766 {
3767 RF_Raid_t *raidPtr;
3768 RF_Config_t *config;
3769 int raidID;
3770 struct raid_softc *sc;
3771
3772 #ifdef DEBUG
3773 printf("RAID autoconfigure\n");
3774 #endif
3775
3776 /* 1. Create a config structure */
3777 config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO);
3778 if (config == NULL) {
3779 printf("%s: Out of mem - config!?!?\n", __func__);
3780 /* XXX do something more intelligent here. */
3781 return NULL;
3782 }
3783
3784 /*
3785 2. Figure out what RAID ID this one is supposed to live at
3786 See if we can get the same RAID dev that it was configured
3787 on last time..
3788 */
3789
3790 raidID = cset->ac->clabel->last_unit;
3791 for (sc = raidget(raidID, false); sc && sc->sc_r.valid != 0;
3792 sc = raidget(++raidID, false))
3793 continue;
3794 #ifdef DEBUG
3795 printf("Configuring raid%d:\n",raidID);
3796 #endif
3797
3798 if (sc == NULL)
3799 sc = raidget(raidID, true);
3800 if (sc == NULL) {
3801 printf("%s: Out of mem - softc!?!?\n", __func__);
3802 /* XXX do something more intelligent here. */
3803 free(config, M_RAIDFRAME);
3804 return NULL;
3805 }
3806
3807 raidPtr = &sc->sc_r;
3808
3809 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3810 raidPtr->softc = sc;
3811 raidPtr->raidid = raidID;
3812 raidPtr->openings = RAIDOUTSTANDING;
3813
3814 /* 3. Build the configuration structure */
3815 rf_create_configuration(cset->ac, config, raidPtr);
3816
3817 /* 4. Do the configuration */
3818 if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3819 raidinit(sc);
3820
3821 rf_markalldirty(raidPtr);
3822 raidPtr->autoconfigure = 1; /* XXX do this here? */
3823 switch (cset->ac->clabel->root_partition) {
3824 case 1: /* Force Root */
3825 case 2: /* Soft Root: root when boot partition part of raid */
3826 /*
3827 * everything configured just fine. Make a note
3828 * that this set is eligible to be root,
3829 * or forced to be root
3830 */
3831 cset->rootable = cset->ac->clabel->root_partition;
3832 /* XXX do this here? */
3833 raidPtr->root_partition = cset->rootable;
3834 break;
3835 default:
3836 break;
3837 }
3838 } else {
3839 raidput(sc);
3840 sc = NULL;
3841 }
3842
3843 /* 5. Cleanup */
3844 free(config, M_RAIDFRAME);
3845 return sc;
3846 }
3847
3848 void
3849 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3850 {
3851 struct buf *bp;
3852 struct raid_softc *rs;
3853
3854 bp = (struct buf *)desc->bp;
3855 rs = desc->raidPtr->softc;
3856 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid),
3857 (bp->b_flags & B_READ));
3858 }
3859
3860 void
3861 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3862 size_t xmin, size_t xmax)
3863 {
3864 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3865 pool_sethiwat(p, xmax);
3866 pool_prime(p, xmin);
3867 pool_setlowat(p, xmin);
3868 }
3869
3870 /*
3871 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buf_queue to see
3872 * if there is IO pending and if that IO could possibly be done for a
3873 * given RAID set. Returns 0 if IO is waiting and can be done, 1
3874 * otherwise.
3875 *
3876 */
3877
3878 int
3879 rf_buf_queue_check(RF_Raid_t *raidPtr)
3880 {
3881 struct raid_softc *rs = raidPtr->softc;
3882 if ((bufq_peek(rs->buf_queue) != NULL) && raidPtr->openings > 0) {
3883 /* there is work to do */
3884 return 0;
3885 }
3886 /* default is nothing to do */
3887 return 1;
3888 }
3889
3890 int
3891 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3892 {
3893 uint64_t numsecs;
3894 unsigned secsize;
3895 int error;
3896
3897 error = getdisksize(vp, &numsecs, &secsize);
3898 if (error == 0) {
3899 diskPtr->blockSize = secsize;
3900 diskPtr->numBlocks = numsecs - rf_protectedSectors;
3901 diskPtr->partitionSize = numsecs;
3902 return 0;
3903 }
3904 return error;
3905 }
3906
3907 static int
3908 raid_match(device_t self, cfdata_t cfdata, void *aux)
3909 {
3910 return 1;
3911 }
3912
3913 static void
3914 raid_attach(device_t parent, device_t self, void *aux)
3915 {
3916
3917 }
3918
3919
3920 static int
3921 raid_detach(device_t self, int flags)
3922 {
3923 int error;
3924 struct raid_softc *rs = raidget(device_unit(self), false);
3925
3926 if (rs == NULL)
3927 return ENXIO;
3928
3929 if ((error = raidlock(rs)) != 0)
3930 return (error);
3931
3932 error = raid_detach_unlocked(rs);
3933
3934 raidunlock(rs);
3935
3936 /* XXX raid can be referenced here */
3937
3938 if (error)
3939 return error;
3940
3941 /* Free the softc */
3942 raidput(rs);
3943
3944 return 0;
3945 }
3946
3947 static void
3948 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
3949 {
3950 struct disk_geom *dg = &rs->sc_dkdev.dk_geom;
3951
3952 memset(dg, 0, sizeof(*dg));
3953
3954 dg->dg_secperunit = raidPtr->totalSectors;
3955 dg->dg_secsize = raidPtr->bytesPerSector;
3956 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
3957 dg->dg_ntracks = 4 * raidPtr->numCol;
3958
3959 disk_set_info(rs->sc_dev, &rs->sc_dkdev, NULL);
3960 }
3961
3962 /*
3963 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3964 * We end up returning whatever error was returned by the first cache flush
3965 * that fails.
3966 */
3967
3968 int
3969 rf_sync_component_caches(RF_Raid_t *raidPtr)
3970 {
3971 int c, sparecol;
3972 int e,error;
3973 int force = 1;
3974
3975 error = 0;
3976 for (c = 0; c < raidPtr->numCol; c++) {
3977 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3978 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3979 &force, FWRITE, NOCRED);
3980 if (e) {
3981 if (e != ENODEV)
3982 printf("raid%d: cache flush to component %s failed.\n",
3983 raidPtr->raidid, raidPtr->Disks[c].devname);
3984 if (error == 0) {
3985 error = e;
3986 }
3987 }
3988 }
3989 }
3990
3991 for( c = 0; c < raidPtr->numSpare ; c++) {
3992 sparecol = raidPtr->numCol + c;
3993 /* Need to ensure that the reconstruct actually completed! */
3994 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3995 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3996 DIOCCACHESYNC, &force, FWRITE, NOCRED);
3997 if (e) {
3998 if (e != ENODEV)
3999 printf("raid%d: cache flush to component %s failed.\n",
4000 raidPtr->raidid, raidPtr->Disks[sparecol].devname);
4001 if (error == 0) {
4002 error = e;
4003 }
4004 }
4005 }
4006 }
4007 return error;
4008 }
4009
4010 /*
4011 * Module interface
4012 */
4013
4014 MODULE(MODULE_CLASS_DRIVER, raid, "dk_subr");
4015
4016 #ifdef _MODULE
4017 CFDRIVER_DECL(raid, DV_DISK, NULL);
4018 #endif
4019
4020 static int raid_modcmd(modcmd_t, void *);
4021 static int raid_modcmd_init(void);
4022 static int raid_modcmd_fini(void);
4023
4024 static int
4025 raid_modcmd(modcmd_t cmd, void *data)
4026 {
4027 int error;
4028
4029 error = 0;
4030 switch (cmd) {
4031 case MODULE_CMD_INIT:
4032 error = raid_modcmd_init();
4033 break;
4034 case MODULE_CMD_FINI:
4035 error = raid_modcmd_fini();
4036 break;
4037 default:
4038 error = ENOTTY;
4039 break;
4040 }
4041 return error;
4042 }
4043
4044 static int
4045 raid_modcmd_init(void)
4046 {
4047 int error;
4048 int bmajor, cmajor;
4049
4050 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
4051 mutex_enter(&raid_lock);
4052 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
4053 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
4054 rf_init_cond2(rf_sparet_wait_cv, "sparetw");
4055 rf_init_cond2(rf_sparet_resp_cv, "rfgst");
4056
4057 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
4058 #endif
4059
4060 bmajor = cmajor = -1;
4061 error = devsw_attach("raid", &raid_bdevsw, &bmajor,
4062 &raid_cdevsw, &cmajor);
4063 if (error != 0 && error != EEXIST) {
4064 aprint_error("%s: devsw_attach failed %d\n", __func__, error);
4065 mutex_exit(&raid_lock);
4066 return error;
4067 }
4068 #ifdef _MODULE
4069 error = config_cfdriver_attach(&raid_cd);
4070 if (error != 0) {
4071 aprint_error("%s: config_cfdriver_attach failed %d\n",
4072 __func__, error);
4073 devsw_detach(&raid_bdevsw, &raid_cdevsw);
4074 mutex_exit(&raid_lock);
4075 return error;
4076 }
4077 #endif
4078 error = config_cfattach_attach(raid_cd.cd_name, &raid_ca);
4079 if (error != 0) {
4080 aprint_error("%s: config_cfattach_attach failed %d\n",
4081 __func__, error);
4082 #ifdef _MODULE
4083 config_cfdriver_detach(&raid_cd);
4084 #endif
4085 devsw_detach(&raid_bdevsw, &raid_cdevsw);
4086 mutex_exit(&raid_lock);
4087 return error;
4088 }
4089
4090 raidautoconfigdone = false;
4091
4092 mutex_exit(&raid_lock);
4093
4094 if (error == 0) {
4095 if (rf_BootRaidframe(true) == 0)
4096 aprint_verbose("Kernelized RAIDframe activated\n");
4097 else
4098 panic("Serious error activating RAID!!");
4099 }
4100
4101 /*
4102 * Register a finalizer which will be used to auto-config RAID
4103 * sets once all real hardware devices have been found.
4104 */
4105 error = config_finalize_register(NULL, rf_autoconfig);
4106 if (error != 0) {
4107 aprint_error("WARNING: unable to register RAIDframe "
4108 "finalizer\n");
4109 error = 0;
4110 }
4111
4112 return error;
4113 }
4114
4115 static int
4116 raid_modcmd_fini(void)
4117 {
4118 int error;
4119
4120 mutex_enter(&raid_lock);
4121
4122 /* Don't allow unload if raid device(s) exist. */
4123 if (!LIST_EMPTY(&raids)) {
4124 mutex_exit(&raid_lock);
4125 return EBUSY;
4126 }
4127
4128 error = config_cfattach_detach(raid_cd.cd_name, &raid_ca);
4129 if (error != 0) {
4130 mutex_exit(&raid_lock);
4131 return error;
4132 }
4133 #ifdef _MODULE
4134 error = config_cfdriver_detach(&raid_cd);
4135 if (error != 0) {
4136 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
4137 mutex_exit(&raid_lock);
4138 return error;
4139 }
4140 #endif
4141 error = devsw_detach(&raid_bdevsw, &raid_cdevsw);
4142 if (error != 0) {
4143 #ifdef _MODULE
4144 config_cfdriver_attach(&raid_cd);
4145 #endif
4146 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
4147 mutex_exit(&raid_lock);
4148 return error;
4149 }
4150 rf_BootRaidframe(false);
4151 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
4152 rf_destroy_mutex2(rf_sparet_wait_mutex);
4153 rf_destroy_cond2(rf_sparet_wait_cv);
4154 rf_destroy_cond2(rf_sparet_resp_cv);
4155 #endif
4156 mutex_exit(&raid_lock);
4157 mutex_destroy(&raid_lock);
4158
4159 return error;
4160 }
4161