rf_netbsdkintf.c revision 1.307 1 /* $NetBSD: rf_netbsdkintf.c,v 1.307 2014/04/03 15:30:52 christos Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: cd.c 1.6 90/11/28$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
89 * School of Computer Science
90 * Carnegie Mellon University
91 * Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97 /***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.307 2014/04/03 15:30:52 christos Exp $");
105
106 #ifdef _KERNEL_OPT
107 #include "opt_compat_netbsd.h"
108 #include "opt_raid_autoconfig.h"
109 #endif
110
111 #include <sys/param.h>
112 #include <sys/errno.h>
113 #include <sys/pool.h>
114 #include <sys/proc.h>
115 #include <sys/queue.h>
116 #include <sys/disk.h>
117 #include <sys/device.h>
118 #include <sys/stat.h>
119 #include <sys/ioctl.h>
120 #include <sys/fcntl.h>
121 #include <sys/systm.h>
122 #include <sys/vnode.h>
123 #include <sys/disklabel.h>
124 #include <sys/conf.h>
125 #include <sys/buf.h>
126 #include <sys/bufq.h>
127 #include <sys/reboot.h>
128 #include <sys/kauth.h>
129
130 #include <prop/proplib.h>
131
132 #include <dev/raidframe/raidframevar.h>
133 #include <dev/raidframe/raidframeio.h>
134 #include <dev/raidframe/rf_paritymap.h>
135
136 #include "rf_raid.h"
137 #include "rf_copyback.h"
138 #include "rf_dag.h"
139 #include "rf_dagflags.h"
140 #include "rf_desc.h"
141 #include "rf_diskqueue.h"
142 #include "rf_etimer.h"
143 #include "rf_general.h"
144 #include "rf_kintf.h"
145 #include "rf_options.h"
146 #include "rf_driver.h"
147 #include "rf_parityscan.h"
148 #include "rf_threadstuff.h"
149
150 #ifdef COMPAT_50
151 #include "rf_compat50.h"
152 #endif
153
154 #ifdef DEBUG
155 int rf_kdebug_level = 0;
156 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
157 #else /* DEBUG */
158 #define db1_printf(a) { }
159 #endif /* DEBUG */
160
161 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
162 static rf_declare_mutex2(rf_sparet_wait_mutex);
163 static rf_declare_cond2(rf_sparet_wait_cv);
164 static rf_declare_cond2(rf_sparet_resp_cv);
165
166 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
167 * spare table */
168 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
169 * installation process */
170 #endif
171
172 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
173
174 /* prototypes */
175 static void KernelWakeupFunc(struct buf *);
176 static void InitBP(struct buf *, struct vnode *, unsigned,
177 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
178 void *, int, struct proc *);
179 struct raid_softc;
180 static void raidinit(struct raid_softc *);
181
182 void raidattach(int);
183 static int raid_match(device_t, cfdata_t, void *);
184 static void raid_attach(device_t, device_t, void *);
185 static int raid_detach(device_t, int);
186
187 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
188 daddr_t, daddr_t);
189 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
190 daddr_t, daddr_t, int);
191
192 static int raidwrite_component_label(unsigned,
193 dev_t, struct vnode *, RF_ComponentLabel_t *);
194 static int raidread_component_label(unsigned,
195 dev_t, struct vnode *, RF_ComponentLabel_t *);
196
197
198 dev_type_open(raidopen);
199 dev_type_close(raidclose);
200 dev_type_read(raidread);
201 dev_type_write(raidwrite);
202 dev_type_ioctl(raidioctl);
203 dev_type_strategy(raidstrategy);
204 dev_type_dump(raiddump);
205 dev_type_size(raidsize);
206
207 const struct bdevsw raid_bdevsw = {
208 .d_open = raidopen,
209 .d_close = raidclose,
210 .d_strategy = raidstrategy,
211 .d_ioctl = raidioctl,
212 .d_dump = raiddump,
213 .d_psize = raidsize,
214 .d_flag = D_DISK
215 };
216
217 const struct cdevsw raid_cdevsw = {
218 .d_open = raidopen,
219 .d_close = raidclose,
220 .d_read = raidread,
221 .d_write = raidwrite,
222 .d_ioctl = raidioctl,
223 .d_stop = nostop,
224 .d_tty = notty,
225 .d_poll = nopoll,
226 .d_mmap = nommap,
227 .d_kqfilter = nokqfilter,
228 .d_flag = D_DISK
229 };
230
231 static struct dkdriver rf_dkdriver = { raidstrategy, minphys };
232
233 struct raid_softc {
234 device_t sc_dev;
235 int sc_unit;
236 int sc_flags; /* flags */
237 int sc_cflags; /* configuration flags */
238 uint64_t sc_size; /* size of the raid device */
239 char sc_xname[20]; /* XXX external name */
240 struct disk sc_dkdev; /* generic disk device info */
241 struct bufq_state *buf_queue; /* used for the device queue */
242 RF_Raid_t sc_r;
243 LIST_ENTRY(raid_softc) sc_link;
244 };
245 /* sc_flags */
246 #define RAIDF_INITED 0x01 /* unit has been initialized */
247 #define RAIDF_WLABEL 0x02 /* label area is writable */
248 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
249 #define RAIDF_SHUTDOWN 0x08 /* unit is being shutdown */
250 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
251 #define RAIDF_LOCKED 0x80 /* unit is locked */
252
253 #define raidunit(x) DISKUNIT(x)
254
255 extern struct cfdriver raid_cd;
256 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
257 raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
258 DVF_DETACH_SHUTDOWN);
259
260 /*
261 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
262 * Be aware that large numbers can allow the driver to consume a lot of
263 * kernel memory, especially on writes, and in degraded mode reads.
264 *
265 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
266 * a single 64K write will typically require 64K for the old data,
267 * 64K for the old parity, and 64K for the new parity, for a total
268 * of 192K (if the parity buffer is not re-used immediately).
269 * Even it if is used immediately, that's still 128K, which when multiplied
270 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
271 *
272 * Now in degraded mode, for example, a 64K read on the above setup may
273 * require data reconstruction, which will require *all* of the 4 remaining
274 * disks to participate -- 4 * 32K/disk == 128K again.
275 */
276
277 #ifndef RAIDOUTSTANDING
278 #define RAIDOUTSTANDING 6
279 #endif
280
281 #define RAIDLABELDEV(dev) \
282 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
283
284 /* declared here, and made public, for the benefit of KVM stuff.. */
285
286 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
287 struct disklabel *);
288 static void raidgetdisklabel(dev_t);
289 static void raidmakedisklabel(struct raid_softc *);
290
291 static int raidlock(struct raid_softc *);
292 static void raidunlock(struct raid_softc *);
293
294 static int raid_detach_unlocked(struct raid_softc *);
295
296 static void rf_markalldirty(RF_Raid_t *);
297 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
298
299 void rf_ReconThread(struct rf_recon_req *);
300 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
301 void rf_CopybackThread(RF_Raid_t *raidPtr);
302 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
303 int rf_autoconfig(device_t);
304 void rf_buildroothack(RF_ConfigSet_t *);
305
306 RF_AutoConfig_t *rf_find_raid_components(void);
307 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
308 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
309 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
310 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
311 int rf_set_autoconfig(RF_Raid_t *, int);
312 int rf_set_rootpartition(RF_Raid_t *, int);
313 void rf_release_all_vps(RF_ConfigSet_t *);
314 void rf_cleanup_config_set(RF_ConfigSet_t *);
315 int rf_have_enough_components(RF_ConfigSet_t *);
316 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
317 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
318
319 /*
320 * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
321 * Note that this is overridden by having RAID_AUTOCONFIG as an option
322 * in the kernel config file.
323 */
324 #ifdef RAID_AUTOCONFIG
325 int raidautoconfig = 1;
326 #else
327 int raidautoconfig = 0;
328 #endif
329 static bool raidautoconfigdone = false;
330
331 struct RF_Pools_s rf_pools;
332
333 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
334 static kmutex_t raid_lock;
335
336 static struct raid_softc *
337 raidcreate(int unit) {
338 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
339 if (sc == NULL) {
340 #ifdef DIAGNOSTIC
341 printf("%s: out of memory\n", __func__);
342 #endif
343 return NULL;
344 }
345 sc->sc_unit = unit;
346 bufq_alloc(&sc->buf_queue, BUFQ_DISK_DEFAULT_STRAT, BUFQ_SORT_RAWBLOCK);
347 return sc;
348 }
349
350 static void
351 raiddestroy(struct raid_softc *sc) {
352 bufq_free(sc->buf_queue);
353 kmem_free(sc, sizeof(*sc));
354 }
355
356 static struct raid_softc *
357 raidget(int unit) {
358 struct raid_softc *sc;
359 if (unit < 0) {
360 #ifdef DIAGNOSTIC
361 panic("%s: unit %d!", __func__, unit);
362 #endif
363 return NULL;
364 }
365 mutex_enter(&raid_lock);
366 LIST_FOREACH(sc, &raids, sc_link) {
367 if (sc->sc_unit == unit) {
368 mutex_exit(&raid_lock);
369 return sc;
370 }
371 }
372 mutex_exit(&raid_lock);
373 if ((sc = raidcreate(unit)) == NULL)
374 return NULL;
375 mutex_enter(&raid_lock);
376 LIST_INSERT_HEAD(&raids, sc, sc_link);
377 mutex_exit(&raid_lock);
378 return sc;
379 }
380
381 static void
382 raidput(struct raid_softc *sc) {
383 mutex_enter(&raid_lock);
384 LIST_REMOVE(sc, sc_link);
385 mutex_exit(&raid_lock);
386 raiddestroy(sc);
387 }
388
389 void
390 raidattach(int num)
391 {
392 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
393 /* This is where all the initialization stuff gets done. */
394
395 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
396 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
397 rf_init_cond2(rf_sparet_wait_cv, "sparetw");
398 rf_init_cond2(rf_sparet_resp_cv, "rfgst");
399
400 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
401 #endif
402
403 if (rf_BootRaidframe() == 0)
404 aprint_verbose("Kernelized RAIDframe activated\n");
405 else
406 panic("Serious error booting RAID!!");
407
408 if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) {
409 aprint_error("raidattach: config_cfattach_attach failed?\n");
410 }
411
412 raidautoconfigdone = false;
413
414 /*
415 * Register a finalizer which will be used to auto-config RAID
416 * sets once all real hardware devices have been found.
417 */
418 if (config_finalize_register(NULL, rf_autoconfig) != 0)
419 aprint_error("WARNING: unable to register RAIDframe finalizer\n");
420 }
421
422 int
423 rf_autoconfig(device_t self)
424 {
425 RF_AutoConfig_t *ac_list;
426 RF_ConfigSet_t *config_sets;
427
428 if (!raidautoconfig || raidautoconfigdone == true)
429 return (0);
430
431 /* XXX This code can only be run once. */
432 raidautoconfigdone = true;
433
434 #ifdef __HAVE_CPU_BOOTCONF
435 /*
436 * 0. find the boot device if needed first so we can use it later
437 * this needs to be done before we autoconfigure any raid sets,
438 * because if we use wedges we are not going to be able to open
439 * the boot device later
440 */
441 if (booted_device == NULL)
442 cpu_bootconf();
443 #endif
444 /* 1. locate all RAID components on the system */
445 aprint_debug("Searching for RAID components...\n");
446 ac_list = rf_find_raid_components();
447
448 /* 2. Sort them into their respective sets. */
449 config_sets = rf_create_auto_sets(ac_list);
450
451 /*
452 * 3. Evaluate each set and configure the valid ones.
453 * This gets done in rf_buildroothack().
454 */
455 rf_buildroothack(config_sets);
456
457 return 1;
458 }
459
460 static int
461 rf_containsboot(RF_Raid_t *r, device_t bdv) {
462 const char *bootname = device_xname(bdv);
463 size_t len = strlen(bootname);
464
465 for (int col = 0; col < r->numCol; col++) {
466 const char *devname = r->Disks[col].devname;
467 devname += sizeof("/dev/") - 1;
468 if (strncmp(devname, "dk", 2) == 0) {
469 const char *parent =
470 dkwedge_get_parent_name(r->Disks[col].dev);
471 if (parent != NULL)
472 devname = parent;
473 }
474 if (strncmp(devname, bootname, len) == 0) {
475 struct raid_softc *sc = r->softc;
476 aprint_debug("raid%d includes boot device %s\n",
477 sc->sc_unit, devname);
478 return 1;
479 }
480 }
481 return 0;
482 }
483
484 void
485 rf_buildroothack(RF_ConfigSet_t *config_sets)
486 {
487 RF_ConfigSet_t *cset;
488 RF_ConfigSet_t *next_cset;
489 int num_root;
490 struct raid_softc *sc, *rsc;
491
492 sc = rsc = NULL;
493 num_root = 0;
494 cset = config_sets;
495 while (cset != NULL) {
496 next_cset = cset->next;
497 if (rf_have_enough_components(cset) &&
498 cset->ac->clabel->autoconfigure == 1) {
499 sc = rf_auto_config_set(cset);
500 if (sc != NULL) {
501 aprint_debug("raid%d: configured ok\n",
502 sc->sc_unit);
503 if (cset->rootable) {
504 rsc = sc;
505 num_root++;
506 }
507 } else {
508 /* The autoconfig didn't work :( */
509 aprint_debug("Autoconfig failed\n");
510 rf_release_all_vps(cset);
511 }
512 } else {
513 /* we're not autoconfiguring this set...
514 release the associated resources */
515 rf_release_all_vps(cset);
516 }
517 /* cleanup */
518 rf_cleanup_config_set(cset);
519 cset = next_cset;
520 }
521
522 /* if the user has specified what the root device should be
523 then we don't touch booted_device or boothowto... */
524
525 if (rootspec != NULL)
526 return;
527
528 /* we found something bootable... */
529
530 if (num_root == 1) {
531 device_t candidate_root;
532 if (rsc->sc_dkdev.dk_nwedges != 0) {
533 /* XXX: How do we find the real root partition? */
534 char cname[sizeof(cset->ac->devname)];
535 snprintf(cname, sizeof(cname), "%s%c",
536 device_xname(rsc->sc_dev), 'a');
537 candidate_root = dkwedge_find_by_wname(cname);
538 } else
539 candidate_root = rsc->sc_dev;
540 #ifndef RAIDFRAME_FORCE_ROOT
541 if (booted_device == NULL
542 || rf_containsboot(&rsc->sc_r, booted_device))
543 #endif
544 booted_device = candidate_root;
545 } else if (num_root > 1) {
546
547 /*
548 * Maybe the MD code can help. If it cannot, then
549 * setroot() will discover that we have no
550 * booted_device and will ask the user if nothing was
551 * hardwired in the kernel config file
552 */
553 if (booted_device == NULL)
554 return;
555
556 num_root = 0;
557 mutex_enter(&raid_lock);
558 LIST_FOREACH(sc, &raids, sc_link) {
559 RF_Raid_t *r = &sc->sc_r;
560 if (r->valid == 0)
561 continue;
562
563 if (r->root_partition == 0)
564 continue;
565
566 if (rf_containsboot(r, booted_device)) {
567 num_root++;
568 rsc = sc;
569 }
570 }
571 mutex_exit(&raid_lock);
572
573 if (num_root == 1) {
574 booted_device = rsc->sc_dev;
575 } else {
576 /* we can't guess.. require the user to answer... */
577 boothowto |= RB_ASKNAME;
578 }
579 }
580 }
581
582
583 int
584 raidsize(dev_t dev)
585 {
586 struct raid_softc *rs;
587 struct disklabel *lp;
588 int part, unit, omask, size;
589
590 unit = raidunit(dev);
591 if ((rs = raidget(unit)) == NULL)
592 return -1;
593 if ((rs->sc_flags & RAIDF_INITED) == 0)
594 return (-1);
595
596 part = DISKPART(dev);
597 omask = rs->sc_dkdev.dk_openmask & (1 << part);
598 lp = rs->sc_dkdev.dk_label;
599
600 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
601 return (-1);
602
603 if (lp->d_partitions[part].p_fstype != FS_SWAP)
604 size = -1;
605 else
606 size = lp->d_partitions[part].p_size *
607 (lp->d_secsize / DEV_BSIZE);
608
609 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
610 return (-1);
611
612 return (size);
613
614 }
615
616 int
617 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
618 {
619 int unit = raidunit(dev);
620 struct raid_softc *rs;
621 const struct bdevsw *bdev;
622 struct disklabel *lp;
623 RF_Raid_t *raidPtr;
624 daddr_t offset;
625 int part, c, sparecol, j, scol, dumpto;
626 int error = 0;
627
628 if ((rs = raidget(unit)) == NULL)
629 return ENXIO;
630
631 raidPtr = &rs->sc_r;
632
633 if ((rs->sc_flags & RAIDF_INITED) == 0)
634 return ENXIO;
635
636 /* we only support dumping to RAID 1 sets */
637 if (raidPtr->Layout.numDataCol != 1 ||
638 raidPtr->Layout.numParityCol != 1)
639 return EINVAL;
640
641
642 if ((error = raidlock(rs)) != 0)
643 return error;
644
645 if (size % DEV_BSIZE != 0) {
646 error = EINVAL;
647 goto out;
648 }
649
650 if (blkno + size / DEV_BSIZE > rs->sc_size) {
651 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
652 "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
653 size / DEV_BSIZE, rs->sc_size);
654 error = EINVAL;
655 goto out;
656 }
657
658 part = DISKPART(dev);
659 lp = rs->sc_dkdev.dk_label;
660 offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
661
662 /* figure out what device is alive.. */
663
664 /*
665 Look for a component to dump to. The preference for the
666 component to dump to is as follows:
667 1) the master
668 2) a used_spare of the master
669 3) the slave
670 4) a used_spare of the slave
671 */
672
673 dumpto = -1;
674 for (c = 0; c < raidPtr->numCol; c++) {
675 if (raidPtr->Disks[c].status == rf_ds_optimal) {
676 /* this might be the one */
677 dumpto = c;
678 break;
679 }
680 }
681
682 /*
683 At this point we have possibly selected a live master or a
684 live slave. We now check to see if there is a spared
685 master (or a spared slave), if we didn't find a live master
686 or a live slave.
687 */
688
689 for (c = 0; c < raidPtr->numSpare; c++) {
690 sparecol = raidPtr->numCol + c;
691 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
692 /* How about this one? */
693 scol = -1;
694 for(j=0;j<raidPtr->numCol;j++) {
695 if (raidPtr->Disks[j].spareCol == sparecol) {
696 scol = j;
697 break;
698 }
699 }
700 if (scol == 0) {
701 /*
702 We must have found a spared master!
703 We'll take that over anything else
704 found so far. (We couldn't have
705 found a real master before, since
706 this is a used spare, and it's
707 saying that it's replacing the
708 master.) On reboot (with
709 autoconfiguration turned on)
710 sparecol will become the 1st
711 component (component0) of this set.
712 */
713 dumpto = sparecol;
714 break;
715 } else if (scol != -1) {
716 /*
717 Must be a spared slave. We'll dump
718 to that if we havn't found anything
719 else so far.
720 */
721 if (dumpto == -1)
722 dumpto = sparecol;
723 }
724 }
725 }
726
727 if (dumpto == -1) {
728 /* we couldn't find any live components to dump to!?!?
729 */
730 error = EINVAL;
731 goto out;
732 }
733
734 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
735
736 /*
737 Note that blkno is relative to this particular partition.
738 By adding the offset of this partition in the RAID
739 set, and also adding RF_PROTECTED_SECTORS, we get a
740 value that is relative to the partition used for the
741 underlying component.
742 */
743
744 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
745 blkno + offset, va, size);
746
747 out:
748 raidunlock(rs);
749
750 return error;
751 }
752 /* ARGSUSED */
753 int
754 raidopen(dev_t dev, int flags, int fmt,
755 struct lwp *l)
756 {
757 int unit = raidunit(dev);
758 struct raid_softc *rs;
759 struct disklabel *lp;
760 int part, pmask;
761 int error = 0;
762
763 if ((rs = raidget(unit)) == NULL)
764 return ENXIO;
765 if ((error = raidlock(rs)) != 0)
766 return (error);
767
768 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
769 error = EBUSY;
770 goto bad;
771 }
772
773 lp = rs->sc_dkdev.dk_label;
774
775 part = DISKPART(dev);
776
777 /*
778 * If there are wedges, and this is not RAW_PART, then we
779 * need to fail.
780 */
781 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
782 error = EBUSY;
783 goto bad;
784 }
785 pmask = (1 << part);
786
787 if ((rs->sc_flags & RAIDF_INITED) &&
788 (rs->sc_dkdev.dk_openmask == 0))
789 raidgetdisklabel(dev);
790
791 /* make sure that this partition exists */
792
793 if (part != RAW_PART) {
794 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
795 ((part >= lp->d_npartitions) ||
796 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
797 error = ENXIO;
798 goto bad;
799 }
800 }
801 /* Prevent this unit from being unconfigured while open. */
802 switch (fmt) {
803 case S_IFCHR:
804 rs->sc_dkdev.dk_copenmask |= pmask;
805 break;
806
807 case S_IFBLK:
808 rs->sc_dkdev.dk_bopenmask |= pmask;
809 break;
810 }
811
812 if ((rs->sc_dkdev.dk_openmask == 0) &&
813 ((rs->sc_flags & RAIDF_INITED) != 0)) {
814 /* First one... mark things as dirty... Note that we *MUST*
815 have done a configure before this. I DO NOT WANT TO BE
816 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
817 THAT THEY BELONG TOGETHER!!!!! */
818 /* XXX should check to see if we're only open for reading
819 here... If so, we needn't do this, but then need some
820 other way of keeping track of what's happened.. */
821
822 rf_markalldirty(&rs->sc_r);
823 }
824
825
826 rs->sc_dkdev.dk_openmask =
827 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
828
829 bad:
830 raidunlock(rs);
831
832 return (error);
833
834
835 }
836 /* ARGSUSED */
837 int
838 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
839 {
840 int unit = raidunit(dev);
841 struct raid_softc *rs;
842 int error = 0;
843 int part;
844
845 if ((rs = raidget(unit)) == NULL)
846 return ENXIO;
847
848 if ((error = raidlock(rs)) != 0)
849 return (error);
850
851 part = DISKPART(dev);
852
853 /* ...that much closer to allowing unconfiguration... */
854 switch (fmt) {
855 case S_IFCHR:
856 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
857 break;
858
859 case S_IFBLK:
860 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
861 break;
862 }
863 rs->sc_dkdev.dk_openmask =
864 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
865
866 if ((rs->sc_dkdev.dk_openmask == 0) &&
867 ((rs->sc_flags & RAIDF_INITED) != 0)) {
868 /* Last one... device is not unconfigured yet.
869 Device shutdown has taken care of setting the
870 clean bits if RAIDF_INITED is not set
871 mark things as clean... */
872
873 rf_update_component_labels(&rs->sc_r,
874 RF_FINAL_COMPONENT_UPDATE);
875
876 /* If the kernel is shutting down, it will detach
877 * this RAID set soon enough.
878 */
879 }
880
881 raidunlock(rs);
882 return (0);
883
884 }
885
886 void
887 raidstrategy(struct buf *bp)
888 {
889 unsigned int unit = raidunit(bp->b_dev);
890 RF_Raid_t *raidPtr;
891 int wlabel;
892 struct raid_softc *rs;
893
894 if ((rs = raidget(unit)) == NULL) {
895 bp->b_error = ENXIO;
896 goto done;
897 }
898 if ((rs->sc_flags & RAIDF_INITED) == 0) {
899 bp->b_error = ENXIO;
900 goto done;
901 }
902 raidPtr = &rs->sc_r;
903 if (!raidPtr->valid) {
904 bp->b_error = ENODEV;
905 goto done;
906 }
907 if (bp->b_bcount == 0) {
908 db1_printf(("b_bcount is zero..\n"));
909 goto done;
910 }
911
912 /*
913 * Do bounds checking and adjust transfer. If there's an
914 * error, the bounds check will flag that for us.
915 */
916
917 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
918 if (DISKPART(bp->b_dev) == RAW_PART) {
919 uint64_t size; /* device size in DEV_BSIZE unit */
920
921 if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
922 size = raidPtr->totalSectors <<
923 (raidPtr->logBytesPerSector - DEV_BSHIFT);
924 } else {
925 size = raidPtr->totalSectors >>
926 (DEV_BSHIFT - raidPtr->logBytesPerSector);
927 }
928 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
929 goto done;
930 }
931 } else {
932 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
933 db1_printf(("Bounds check failed!!:%d %d\n",
934 (int) bp->b_blkno, (int) wlabel));
935 goto done;
936 }
937 }
938
939 rf_lock_mutex2(raidPtr->iodone_lock);
940
941 bp->b_resid = 0;
942
943 /* stuff it onto our queue */
944 bufq_put(rs->buf_queue, bp);
945
946 /* scheduled the IO to happen at the next convenient time */
947 rf_signal_cond2(raidPtr->iodone_cv);
948 rf_unlock_mutex2(raidPtr->iodone_lock);
949
950 return;
951
952 done:
953 bp->b_resid = bp->b_bcount;
954 biodone(bp);
955 }
956 /* ARGSUSED */
957 int
958 raidread(dev_t dev, struct uio *uio, int flags)
959 {
960 int unit = raidunit(dev);
961 struct raid_softc *rs;
962
963 if ((rs = raidget(unit)) == NULL)
964 return ENXIO;
965
966 if ((rs->sc_flags & RAIDF_INITED) == 0)
967 return (ENXIO);
968
969 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
970
971 }
972 /* ARGSUSED */
973 int
974 raidwrite(dev_t dev, struct uio *uio, int flags)
975 {
976 int unit = raidunit(dev);
977 struct raid_softc *rs;
978
979 if ((rs = raidget(unit)) == NULL)
980 return ENXIO;
981
982 if ((rs->sc_flags & RAIDF_INITED) == 0)
983 return (ENXIO);
984
985 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
986
987 }
988
989 static int
990 raid_detach_unlocked(struct raid_softc *rs)
991 {
992 int error;
993 RF_Raid_t *raidPtr;
994
995 raidPtr = &rs->sc_r;
996
997 /*
998 * If somebody has a partition mounted, we shouldn't
999 * shutdown.
1000 */
1001 if (rs->sc_dkdev.dk_openmask != 0)
1002 return EBUSY;
1003
1004 if ((rs->sc_flags & RAIDF_INITED) == 0)
1005 ; /* not initialized: nothing to do */
1006 else if ((error = rf_Shutdown(raidPtr)) != 0)
1007 return error;
1008 else
1009 rs->sc_flags &= ~(RAIDF_INITED|RAIDF_SHUTDOWN);
1010
1011 /* Detach the disk. */
1012 dkwedge_delall(&rs->sc_dkdev);
1013 disk_detach(&rs->sc_dkdev);
1014 disk_destroy(&rs->sc_dkdev);
1015
1016 aprint_normal_dev(rs->sc_dev, "detached\n");
1017
1018 return 0;
1019 }
1020
1021 int
1022 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1023 {
1024 int unit = raidunit(dev);
1025 int error = 0;
1026 int part, pmask, s;
1027 cfdata_t cf;
1028 struct raid_softc *rs;
1029 RF_Config_t *k_cfg, *u_cfg;
1030 RF_Raid_t *raidPtr;
1031 RF_RaidDisk_t *diskPtr;
1032 RF_AccTotals_t *totals;
1033 RF_DeviceConfig_t *d_cfg, **ucfgp;
1034 u_char *specific_buf;
1035 int retcode = 0;
1036 int column;
1037 /* int raidid; */
1038 struct rf_recon_req *rrcopy, *rr;
1039 RF_ComponentLabel_t *clabel;
1040 RF_ComponentLabel_t *ci_label;
1041 RF_ComponentLabel_t **clabel_ptr;
1042 RF_SingleComponent_t *sparePtr,*componentPtr;
1043 RF_SingleComponent_t component;
1044 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
1045 int i, j, d;
1046 #ifdef __HAVE_OLD_DISKLABEL
1047 struct disklabel newlabel;
1048 #endif
1049 struct dkwedge_info *dkw;
1050
1051 if ((rs = raidget(unit)) == NULL)
1052 return ENXIO;
1053 raidPtr = &rs->sc_r;
1054
1055 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1056 (int) DISKPART(dev), (int) unit, cmd));
1057
1058 /* Must be open for writes for these commands... */
1059 switch (cmd) {
1060 #ifdef DIOCGSECTORSIZE
1061 case DIOCGSECTORSIZE:
1062 *(u_int *)data = raidPtr->bytesPerSector;
1063 return 0;
1064 case DIOCGMEDIASIZE:
1065 *(off_t *)data =
1066 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
1067 return 0;
1068 #endif
1069 case DIOCSDINFO:
1070 case DIOCWDINFO:
1071 #ifdef __HAVE_OLD_DISKLABEL
1072 case ODIOCWDINFO:
1073 case ODIOCSDINFO:
1074 #endif
1075 case DIOCWLABEL:
1076 case DIOCAWEDGE:
1077 case DIOCDWEDGE:
1078 case DIOCSSTRATEGY:
1079 if ((flag & FWRITE) == 0)
1080 return (EBADF);
1081 }
1082
1083 /* Must be initialized for these... */
1084 switch (cmd) {
1085 case DIOCGDINFO:
1086 case DIOCSDINFO:
1087 case DIOCWDINFO:
1088 #ifdef __HAVE_OLD_DISKLABEL
1089 case ODIOCGDINFO:
1090 case ODIOCWDINFO:
1091 case ODIOCSDINFO:
1092 case ODIOCGDEFLABEL:
1093 #endif
1094 case DIOCGPART:
1095 case DIOCWLABEL:
1096 case DIOCGDEFLABEL:
1097 case DIOCAWEDGE:
1098 case DIOCDWEDGE:
1099 case DIOCLWEDGES:
1100 case DIOCCACHESYNC:
1101 case RAIDFRAME_SHUTDOWN:
1102 case RAIDFRAME_REWRITEPARITY:
1103 case RAIDFRAME_GET_INFO:
1104 case RAIDFRAME_RESET_ACCTOTALS:
1105 case RAIDFRAME_GET_ACCTOTALS:
1106 case RAIDFRAME_KEEP_ACCTOTALS:
1107 case RAIDFRAME_GET_SIZE:
1108 case RAIDFRAME_FAIL_DISK:
1109 case RAIDFRAME_COPYBACK:
1110 case RAIDFRAME_CHECK_RECON_STATUS:
1111 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1112 case RAIDFRAME_GET_COMPONENT_LABEL:
1113 case RAIDFRAME_SET_COMPONENT_LABEL:
1114 case RAIDFRAME_ADD_HOT_SPARE:
1115 case RAIDFRAME_REMOVE_HOT_SPARE:
1116 case RAIDFRAME_INIT_LABELS:
1117 case RAIDFRAME_REBUILD_IN_PLACE:
1118 case RAIDFRAME_CHECK_PARITY:
1119 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1120 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1121 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1122 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1123 case RAIDFRAME_SET_AUTOCONFIG:
1124 case RAIDFRAME_SET_ROOT:
1125 case RAIDFRAME_DELETE_COMPONENT:
1126 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1127 case RAIDFRAME_PARITYMAP_STATUS:
1128 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1129 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1130 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1131 case DIOCGSTRATEGY:
1132 case DIOCSSTRATEGY:
1133 if ((rs->sc_flags & RAIDF_INITED) == 0)
1134 return (ENXIO);
1135 }
1136
1137 switch (cmd) {
1138 #ifdef COMPAT_50
1139 case RAIDFRAME_GET_INFO50:
1140 return rf_get_info50(raidPtr, data);
1141
1142 case RAIDFRAME_CONFIGURE50:
1143 if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0)
1144 return retcode;
1145 goto config;
1146 #endif
1147 /* configure the system */
1148 case RAIDFRAME_CONFIGURE:
1149
1150 if (raidPtr->valid) {
1151 /* There is a valid RAID set running on this unit! */
1152 printf("raid%d: Device already configured!\n",unit);
1153 return(EINVAL);
1154 }
1155
1156 /* copy-in the configuration information */
1157 /* data points to a pointer to the configuration structure */
1158
1159 u_cfg = *((RF_Config_t **) data);
1160 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1161 if (k_cfg == NULL) {
1162 return (ENOMEM);
1163 }
1164 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1165 if (retcode) {
1166 RF_Free(k_cfg, sizeof(RF_Config_t));
1167 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1168 retcode));
1169 return (retcode);
1170 }
1171 goto config;
1172 config:
1173 /* allocate a buffer for the layout-specific data, and copy it
1174 * in */
1175 if (k_cfg->layoutSpecificSize) {
1176 if (k_cfg->layoutSpecificSize > 10000) {
1177 /* sanity check */
1178 RF_Free(k_cfg, sizeof(RF_Config_t));
1179 return (EINVAL);
1180 }
1181 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1182 (u_char *));
1183 if (specific_buf == NULL) {
1184 RF_Free(k_cfg, sizeof(RF_Config_t));
1185 return (ENOMEM);
1186 }
1187 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1188 k_cfg->layoutSpecificSize);
1189 if (retcode) {
1190 RF_Free(k_cfg, sizeof(RF_Config_t));
1191 RF_Free(specific_buf,
1192 k_cfg->layoutSpecificSize);
1193 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1194 retcode));
1195 return (retcode);
1196 }
1197 } else
1198 specific_buf = NULL;
1199 k_cfg->layoutSpecific = specific_buf;
1200
1201 /* should do some kind of sanity check on the configuration.
1202 * Store the sum of all the bytes in the last byte? */
1203
1204 /* configure the system */
1205
1206 /*
1207 * Clear the entire RAID descriptor, just to make sure
1208 * there is no stale data left in the case of a
1209 * reconfiguration
1210 */
1211 memset(raidPtr, 0, sizeof(*raidPtr));
1212 raidPtr->softc = rs;
1213 raidPtr->raidid = unit;
1214
1215 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1216
1217 if (retcode == 0) {
1218
1219 /* allow this many simultaneous IO's to
1220 this RAID device */
1221 raidPtr->openings = RAIDOUTSTANDING;
1222
1223 raidinit(rs);
1224 rf_markalldirty(raidPtr);
1225 }
1226 /* free the buffers. No return code here. */
1227 if (k_cfg->layoutSpecificSize) {
1228 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1229 }
1230 RF_Free(k_cfg, sizeof(RF_Config_t));
1231
1232 return (retcode);
1233
1234 /* shutdown the system */
1235 case RAIDFRAME_SHUTDOWN:
1236
1237 part = DISKPART(dev);
1238 pmask = (1 << part);
1239
1240 if ((error = raidlock(rs)) != 0)
1241 return (error);
1242
1243 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
1244 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1245 (rs->sc_dkdev.dk_copenmask & pmask)))
1246 retcode = EBUSY;
1247 else {
1248 rs->sc_flags |= RAIDF_SHUTDOWN;
1249 rs->sc_dkdev.dk_copenmask &= ~pmask;
1250 rs->sc_dkdev.dk_bopenmask &= ~pmask;
1251 rs->sc_dkdev.dk_openmask &= ~pmask;
1252 retcode = 0;
1253 }
1254
1255 raidunlock(rs);
1256
1257 if (retcode != 0)
1258 return retcode;
1259
1260 /* free the pseudo device attach bits */
1261
1262 cf = device_cfdata(rs->sc_dev);
1263 if ((retcode = config_detach(rs->sc_dev, DETACH_QUIET)) == 0)
1264 free(cf, M_RAIDFRAME);
1265
1266 return (retcode);
1267 case RAIDFRAME_GET_COMPONENT_LABEL:
1268 clabel_ptr = (RF_ComponentLabel_t **) data;
1269 /* need to read the component label for the disk indicated
1270 by row,column in clabel */
1271
1272 /*
1273 * Perhaps there should be an option to skip the in-core
1274 * copy and hit the disk, as with disklabel(8).
1275 */
1276 RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *));
1277
1278 retcode = copyin(*clabel_ptr, clabel, sizeof(*clabel));
1279
1280 if (retcode) {
1281 RF_Free(clabel, sizeof(*clabel));
1282 return retcode;
1283 }
1284
1285 clabel->row = 0; /* Don't allow looking at anything else.*/
1286
1287 column = clabel->column;
1288
1289 if ((column < 0) || (column >= raidPtr->numCol +
1290 raidPtr->numSpare)) {
1291 RF_Free(clabel, sizeof(*clabel));
1292 return EINVAL;
1293 }
1294
1295 RF_Free(clabel, sizeof(*clabel));
1296
1297 clabel = raidget_component_label(raidPtr, column);
1298
1299 return copyout(clabel, *clabel_ptr, sizeof(**clabel_ptr));
1300
1301 #if 0
1302 case RAIDFRAME_SET_COMPONENT_LABEL:
1303 clabel = (RF_ComponentLabel_t *) data;
1304
1305 /* XXX check the label for valid stuff... */
1306 /* Note that some things *should not* get modified --
1307 the user should be re-initing the labels instead of
1308 trying to patch things.
1309 */
1310
1311 raidid = raidPtr->raidid;
1312 #ifdef DEBUG
1313 printf("raid%d: Got component label:\n", raidid);
1314 printf("raid%d: Version: %d\n", raidid, clabel->version);
1315 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1316 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1317 printf("raid%d: Column: %d\n", raidid, clabel->column);
1318 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1319 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1320 printf("raid%d: Status: %d\n", raidid, clabel->status);
1321 #endif
1322 clabel->row = 0;
1323 column = clabel->column;
1324
1325 if ((column < 0) || (column >= raidPtr->numCol)) {
1326 return(EINVAL);
1327 }
1328
1329 /* XXX this isn't allowed to do anything for now :-) */
1330
1331 /* XXX and before it is, we need to fill in the rest
1332 of the fields!?!?!?! */
1333 memcpy(raidget_component_label(raidPtr, column),
1334 clabel, sizeof(*clabel));
1335 raidflush_component_label(raidPtr, column);
1336 return (0);
1337 #endif
1338
1339 case RAIDFRAME_INIT_LABELS:
1340 clabel = (RF_ComponentLabel_t *) data;
1341 /*
1342 we only want the serial number from
1343 the above. We get all the rest of the information
1344 from the config that was used to create this RAID
1345 set.
1346 */
1347
1348 raidPtr->serial_number = clabel->serial_number;
1349
1350 for(column=0;column<raidPtr->numCol;column++) {
1351 diskPtr = &raidPtr->Disks[column];
1352 if (!RF_DEAD_DISK(diskPtr->status)) {
1353 ci_label = raidget_component_label(raidPtr,
1354 column);
1355 /* Zeroing this is important. */
1356 memset(ci_label, 0, sizeof(*ci_label));
1357 raid_init_component_label(raidPtr, ci_label);
1358 ci_label->serial_number =
1359 raidPtr->serial_number;
1360 ci_label->row = 0; /* we dont' pretend to support more */
1361 rf_component_label_set_partitionsize(ci_label,
1362 diskPtr->partitionSize);
1363 ci_label->column = column;
1364 raidflush_component_label(raidPtr, column);
1365 }
1366 /* XXXjld what about the spares? */
1367 }
1368
1369 return (retcode);
1370 case RAIDFRAME_SET_AUTOCONFIG:
1371 d = rf_set_autoconfig(raidPtr, *(int *) data);
1372 printf("raid%d: New autoconfig value is: %d\n",
1373 raidPtr->raidid, d);
1374 *(int *) data = d;
1375 return (retcode);
1376
1377 case RAIDFRAME_SET_ROOT:
1378 d = rf_set_rootpartition(raidPtr, *(int *) data);
1379 printf("raid%d: New rootpartition value is: %d\n",
1380 raidPtr->raidid, d);
1381 *(int *) data = d;
1382 return (retcode);
1383
1384 /* initialize all parity */
1385 case RAIDFRAME_REWRITEPARITY:
1386
1387 if (raidPtr->Layout.map->faultsTolerated == 0) {
1388 /* Parity for RAID 0 is trivially correct */
1389 raidPtr->parity_good = RF_RAID_CLEAN;
1390 return(0);
1391 }
1392
1393 if (raidPtr->parity_rewrite_in_progress == 1) {
1394 /* Re-write is already in progress! */
1395 return(EINVAL);
1396 }
1397
1398 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1399 rf_RewriteParityThread,
1400 raidPtr,"raid_parity");
1401 return (retcode);
1402
1403
1404 case RAIDFRAME_ADD_HOT_SPARE:
1405 sparePtr = (RF_SingleComponent_t *) data;
1406 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1407 retcode = rf_add_hot_spare(raidPtr, &component);
1408 return(retcode);
1409
1410 case RAIDFRAME_REMOVE_HOT_SPARE:
1411 return(retcode);
1412
1413 case RAIDFRAME_DELETE_COMPONENT:
1414 componentPtr = (RF_SingleComponent_t *)data;
1415 memcpy( &component, componentPtr,
1416 sizeof(RF_SingleComponent_t));
1417 retcode = rf_delete_component(raidPtr, &component);
1418 return(retcode);
1419
1420 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1421 componentPtr = (RF_SingleComponent_t *)data;
1422 memcpy( &component, componentPtr,
1423 sizeof(RF_SingleComponent_t));
1424 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1425 return(retcode);
1426
1427 case RAIDFRAME_REBUILD_IN_PLACE:
1428
1429 if (raidPtr->Layout.map->faultsTolerated == 0) {
1430 /* Can't do this on a RAID 0!! */
1431 return(EINVAL);
1432 }
1433
1434 if (raidPtr->recon_in_progress == 1) {
1435 /* a reconstruct is already in progress! */
1436 return(EINVAL);
1437 }
1438
1439 componentPtr = (RF_SingleComponent_t *) data;
1440 memcpy( &component, componentPtr,
1441 sizeof(RF_SingleComponent_t));
1442 component.row = 0; /* we don't support any more */
1443 column = component.column;
1444
1445 if ((column < 0) || (column >= raidPtr->numCol)) {
1446 return(EINVAL);
1447 }
1448
1449 rf_lock_mutex2(raidPtr->mutex);
1450 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1451 (raidPtr->numFailures > 0)) {
1452 /* XXX 0 above shouldn't be constant!!! */
1453 /* some component other than this has failed.
1454 Let's not make things worse than they already
1455 are... */
1456 printf("raid%d: Unable to reconstruct to disk at:\n",
1457 raidPtr->raidid);
1458 printf("raid%d: Col: %d Too many failures.\n",
1459 raidPtr->raidid, column);
1460 rf_unlock_mutex2(raidPtr->mutex);
1461 return (EINVAL);
1462 }
1463 if (raidPtr->Disks[column].status ==
1464 rf_ds_reconstructing) {
1465 printf("raid%d: Unable to reconstruct to disk at:\n",
1466 raidPtr->raidid);
1467 printf("raid%d: Col: %d Reconstruction already occurring!\n", raidPtr->raidid, column);
1468
1469 rf_unlock_mutex2(raidPtr->mutex);
1470 return (EINVAL);
1471 }
1472 if (raidPtr->Disks[column].status == rf_ds_spared) {
1473 rf_unlock_mutex2(raidPtr->mutex);
1474 return (EINVAL);
1475 }
1476 rf_unlock_mutex2(raidPtr->mutex);
1477
1478 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1479 if (rrcopy == NULL)
1480 return(ENOMEM);
1481
1482 rrcopy->raidPtr = (void *) raidPtr;
1483 rrcopy->col = column;
1484
1485 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1486 rf_ReconstructInPlaceThread,
1487 rrcopy,"raid_reconip");
1488 return(retcode);
1489
1490 case RAIDFRAME_GET_INFO:
1491 if (!raidPtr->valid)
1492 return (ENODEV);
1493 ucfgp = (RF_DeviceConfig_t **) data;
1494 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1495 (RF_DeviceConfig_t *));
1496 if (d_cfg == NULL)
1497 return (ENOMEM);
1498 d_cfg->rows = 1; /* there is only 1 row now */
1499 d_cfg->cols = raidPtr->numCol;
1500 d_cfg->ndevs = raidPtr->numCol;
1501 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1502 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1503 return (ENOMEM);
1504 }
1505 d_cfg->nspares = raidPtr->numSpare;
1506 if (d_cfg->nspares >= RF_MAX_DISKS) {
1507 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1508 return (ENOMEM);
1509 }
1510 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1511 d = 0;
1512 for (j = 0; j < d_cfg->cols; j++) {
1513 d_cfg->devs[d] = raidPtr->Disks[j];
1514 d++;
1515 }
1516 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1517 d_cfg->spares[i] = raidPtr->Disks[j];
1518 }
1519 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1520 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1521
1522 return (retcode);
1523
1524 case RAIDFRAME_CHECK_PARITY:
1525 *(int *) data = raidPtr->parity_good;
1526 return (0);
1527
1528 case RAIDFRAME_PARITYMAP_STATUS:
1529 if (rf_paritymap_ineligible(raidPtr))
1530 return EINVAL;
1531 rf_paritymap_status(raidPtr->parity_map,
1532 (struct rf_pmstat *)data);
1533 return 0;
1534
1535 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1536 if (rf_paritymap_ineligible(raidPtr))
1537 return EINVAL;
1538 if (raidPtr->parity_map == NULL)
1539 return ENOENT; /* ??? */
1540 if (0 != rf_paritymap_set_params(raidPtr->parity_map,
1541 (struct rf_pmparams *)data, 1))
1542 return EINVAL;
1543 return 0;
1544
1545 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1546 if (rf_paritymap_ineligible(raidPtr))
1547 return EINVAL;
1548 *(int *) data = rf_paritymap_get_disable(raidPtr);
1549 return 0;
1550
1551 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1552 if (rf_paritymap_ineligible(raidPtr))
1553 return EINVAL;
1554 rf_paritymap_set_disable(raidPtr, *(int *)data);
1555 /* XXX should errors be passed up? */
1556 return 0;
1557
1558 case RAIDFRAME_RESET_ACCTOTALS:
1559 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1560 return (0);
1561
1562 case RAIDFRAME_GET_ACCTOTALS:
1563 totals = (RF_AccTotals_t *) data;
1564 *totals = raidPtr->acc_totals;
1565 return (0);
1566
1567 case RAIDFRAME_KEEP_ACCTOTALS:
1568 raidPtr->keep_acc_totals = *(int *)data;
1569 return (0);
1570
1571 case RAIDFRAME_GET_SIZE:
1572 *(int *) data = raidPtr->totalSectors;
1573 return (0);
1574
1575 /* fail a disk & optionally start reconstruction */
1576 case RAIDFRAME_FAIL_DISK:
1577
1578 if (raidPtr->Layout.map->faultsTolerated == 0) {
1579 /* Can't do this on a RAID 0!! */
1580 return(EINVAL);
1581 }
1582
1583 rr = (struct rf_recon_req *) data;
1584 rr->row = 0;
1585 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1586 return (EINVAL);
1587
1588
1589 rf_lock_mutex2(raidPtr->mutex);
1590 if (raidPtr->status == rf_rs_reconstructing) {
1591 /* you can't fail a disk while we're reconstructing! */
1592 /* XXX wrong for RAID6 */
1593 rf_unlock_mutex2(raidPtr->mutex);
1594 return (EINVAL);
1595 }
1596 if ((raidPtr->Disks[rr->col].status ==
1597 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1598 /* some other component has failed. Let's not make
1599 things worse. XXX wrong for RAID6 */
1600 rf_unlock_mutex2(raidPtr->mutex);
1601 return (EINVAL);
1602 }
1603 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1604 /* Can't fail a spared disk! */
1605 rf_unlock_mutex2(raidPtr->mutex);
1606 return (EINVAL);
1607 }
1608 rf_unlock_mutex2(raidPtr->mutex);
1609
1610 /* make a copy of the recon request so that we don't rely on
1611 * the user's buffer */
1612 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1613 if (rrcopy == NULL)
1614 return(ENOMEM);
1615 memcpy(rrcopy, rr, sizeof(*rr));
1616 rrcopy->raidPtr = (void *) raidPtr;
1617
1618 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1619 rf_ReconThread,
1620 rrcopy,"raid_recon");
1621 return (0);
1622
1623 /* invoke a copyback operation after recon on whatever disk
1624 * needs it, if any */
1625 case RAIDFRAME_COPYBACK:
1626
1627 if (raidPtr->Layout.map->faultsTolerated == 0) {
1628 /* This makes no sense on a RAID 0!! */
1629 return(EINVAL);
1630 }
1631
1632 if (raidPtr->copyback_in_progress == 1) {
1633 /* Copyback is already in progress! */
1634 return(EINVAL);
1635 }
1636
1637 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1638 rf_CopybackThread,
1639 raidPtr,"raid_copyback");
1640 return (retcode);
1641
1642 /* return the percentage completion of reconstruction */
1643 case RAIDFRAME_CHECK_RECON_STATUS:
1644 if (raidPtr->Layout.map->faultsTolerated == 0) {
1645 /* This makes no sense on a RAID 0, so tell the
1646 user it's done. */
1647 *(int *) data = 100;
1648 return(0);
1649 }
1650 if (raidPtr->status != rf_rs_reconstructing)
1651 *(int *) data = 100;
1652 else {
1653 if (raidPtr->reconControl->numRUsTotal > 0) {
1654 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1655 } else {
1656 *(int *) data = 0;
1657 }
1658 }
1659 return (0);
1660 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1661 progressInfoPtr = (RF_ProgressInfo_t **) data;
1662 if (raidPtr->status != rf_rs_reconstructing) {
1663 progressInfo.remaining = 0;
1664 progressInfo.completed = 100;
1665 progressInfo.total = 100;
1666 } else {
1667 progressInfo.total =
1668 raidPtr->reconControl->numRUsTotal;
1669 progressInfo.completed =
1670 raidPtr->reconControl->numRUsComplete;
1671 progressInfo.remaining = progressInfo.total -
1672 progressInfo.completed;
1673 }
1674 retcode = copyout(&progressInfo, *progressInfoPtr,
1675 sizeof(RF_ProgressInfo_t));
1676 return (retcode);
1677
1678 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1679 if (raidPtr->Layout.map->faultsTolerated == 0) {
1680 /* This makes no sense on a RAID 0, so tell the
1681 user it's done. */
1682 *(int *) data = 100;
1683 return(0);
1684 }
1685 if (raidPtr->parity_rewrite_in_progress == 1) {
1686 *(int *) data = 100 *
1687 raidPtr->parity_rewrite_stripes_done /
1688 raidPtr->Layout.numStripe;
1689 } else {
1690 *(int *) data = 100;
1691 }
1692 return (0);
1693
1694 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1695 progressInfoPtr = (RF_ProgressInfo_t **) data;
1696 if (raidPtr->parity_rewrite_in_progress == 1) {
1697 progressInfo.total = raidPtr->Layout.numStripe;
1698 progressInfo.completed =
1699 raidPtr->parity_rewrite_stripes_done;
1700 progressInfo.remaining = progressInfo.total -
1701 progressInfo.completed;
1702 } else {
1703 progressInfo.remaining = 0;
1704 progressInfo.completed = 100;
1705 progressInfo.total = 100;
1706 }
1707 retcode = copyout(&progressInfo, *progressInfoPtr,
1708 sizeof(RF_ProgressInfo_t));
1709 return (retcode);
1710
1711 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1712 if (raidPtr->Layout.map->faultsTolerated == 0) {
1713 /* This makes no sense on a RAID 0 */
1714 *(int *) data = 100;
1715 return(0);
1716 }
1717 if (raidPtr->copyback_in_progress == 1) {
1718 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1719 raidPtr->Layout.numStripe;
1720 } else {
1721 *(int *) data = 100;
1722 }
1723 return (0);
1724
1725 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1726 progressInfoPtr = (RF_ProgressInfo_t **) data;
1727 if (raidPtr->copyback_in_progress == 1) {
1728 progressInfo.total = raidPtr->Layout.numStripe;
1729 progressInfo.completed =
1730 raidPtr->copyback_stripes_done;
1731 progressInfo.remaining = progressInfo.total -
1732 progressInfo.completed;
1733 } else {
1734 progressInfo.remaining = 0;
1735 progressInfo.completed = 100;
1736 progressInfo.total = 100;
1737 }
1738 retcode = copyout(&progressInfo, *progressInfoPtr,
1739 sizeof(RF_ProgressInfo_t));
1740 return (retcode);
1741
1742 /* the sparetable daemon calls this to wait for the kernel to
1743 * need a spare table. this ioctl does not return until a
1744 * spare table is needed. XXX -- calling mpsleep here in the
1745 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1746 * -- I should either compute the spare table in the kernel,
1747 * or have a different -- XXX XXX -- interface (a different
1748 * character device) for delivering the table -- XXX */
1749 #if 0
1750 case RAIDFRAME_SPARET_WAIT:
1751 rf_lock_mutex2(rf_sparet_wait_mutex);
1752 while (!rf_sparet_wait_queue)
1753 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1754 waitreq = rf_sparet_wait_queue;
1755 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1756 rf_unlock_mutex2(rf_sparet_wait_mutex);
1757
1758 /* structure assignment */
1759 *((RF_SparetWait_t *) data) = *waitreq;
1760
1761 RF_Free(waitreq, sizeof(*waitreq));
1762 return (0);
1763
1764 /* wakes up a process waiting on SPARET_WAIT and puts an error
1765 * code in it that will cause the dameon to exit */
1766 case RAIDFRAME_ABORT_SPARET_WAIT:
1767 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1768 waitreq->fcol = -1;
1769 rf_lock_mutex2(rf_sparet_wait_mutex);
1770 waitreq->next = rf_sparet_wait_queue;
1771 rf_sparet_wait_queue = waitreq;
1772 rf_broadcast_conf2(rf_sparet_wait_cv);
1773 rf_unlock_mutex2(rf_sparet_wait_mutex);
1774 return (0);
1775
1776 /* used by the spare table daemon to deliver a spare table
1777 * into the kernel */
1778 case RAIDFRAME_SEND_SPARET:
1779
1780 /* install the spare table */
1781 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1782
1783 /* respond to the requestor. the return status of the spare
1784 * table installation is passed in the "fcol" field */
1785 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1786 waitreq->fcol = retcode;
1787 rf_lock_mutex2(rf_sparet_wait_mutex);
1788 waitreq->next = rf_sparet_resp_queue;
1789 rf_sparet_resp_queue = waitreq;
1790 rf_broadcast_cond2(rf_sparet_resp_cv);
1791 rf_unlock_mutex2(rf_sparet_wait_mutex);
1792
1793 return (retcode);
1794 #endif
1795
1796 default:
1797 break; /* fall through to the os-specific code below */
1798
1799 }
1800
1801 if (!raidPtr->valid)
1802 return (EINVAL);
1803
1804 /*
1805 * Add support for "regular" device ioctls here.
1806 */
1807
1808 error = disk_ioctl(&rs->sc_dkdev, cmd, data, flag, l);
1809 if (error != EPASSTHROUGH)
1810 return (error);
1811
1812 switch (cmd) {
1813 case DIOCGDINFO:
1814 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1815 break;
1816 #ifdef __HAVE_OLD_DISKLABEL
1817 case ODIOCGDINFO:
1818 newlabel = *(rs->sc_dkdev.dk_label);
1819 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1820 return ENOTTY;
1821 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1822 break;
1823 #endif
1824
1825 case DIOCGPART:
1826 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1827 ((struct partinfo *) data)->part =
1828 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1829 break;
1830
1831 case DIOCWDINFO:
1832 case DIOCSDINFO:
1833 #ifdef __HAVE_OLD_DISKLABEL
1834 case ODIOCWDINFO:
1835 case ODIOCSDINFO:
1836 #endif
1837 {
1838 struct disklabel *lp;
1839 #ifdef __HAVE_OLD_DISKLABEL
1840 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1841 memset(&newlabel, 0, sizeof newlabel);
1842 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1843 lp = &newlabel;
1844 } else
1845 #endif
1846 lp = (struct disklabel *)data;
1847
1848 if ((error = raidlock(rs)) != 0)
1849 return (error);
1850
1851 rs->sc_flags |= RAIDF_LABELLING;
1852
1853 error = setdisklabel(rs->sc_dkdev.dk_label,
1854 lp, 0, rs->sc_dkdev.dk_cpulabel);
1855 if (error == 0) {
1856 if (cmd == DIOCWDINFO
1857 #ifdef __HAVE_OLD_DISKLABEL
1858 || cmd == ODIOCWDINFO
1859 #endif
1860 )
1861 error = writedisklabel(RAIDLABELDEV(dev),
1862 raidstrategy, rs->sc_dkdev.dk_label,
1863 rs->sc_dkdev.dk_cpulabel);
1864 }
1865 rs->sc_flags &= ~RAIDF_LABELLING;
1866
1867 raidunlock(rs);
1868
1869 if (error)
1870 return (error);
1871 break;
1872 }
1873
1874 case DIOCWLABEL:
1875 if (*(int *) data != 0)
1876 rs->sc_flags |= RAIDF_WLABEL;
1877 else
1878 rs->sc_flags &= ~RAIDF_WLABEL;
1879 break;
1880
1881 case DIOCGDEFLABEL:
1882 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1883 break;
1884
1885 #ifdef __HAVE_OLD_DISKLABEL
1886 case ODIOCGDEFLABEL:
1887 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1888 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1889 return ENOTTY;
1890 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1891 break;
1892 #endif
1893
1894 case DIOCAWEDGE:
1895 case DIOCDWEDGE:
1896 dkw = (void *)data;
1897
1898 /* If the ioctl happens here, the parent is us. */
1899 (void)strcpy(dkw->dkw_parent, rs->sc_xname);
1900 return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw);
1901
1902 case DIOCLWEDGES:
1903 return dkwedge_list(&rs->sc_dkdev,
1904 (struct dkwedge_list *)data, l);
1905 case DIOCCACHESYNC:
1906 return rf_sync_component_caches(raidPtr);
1907
1908 case DIOCGSTRATEGY:
1909 {
1910 struct disk_strategy *dks = (void *)data;
1911
1912 s = splbio();
1913 strlcpy(dks->dks_name, bufq_getstrategyname(rs->buf_queue),
1914 sizeof(dks->dks_name));
1915 splx(s);
1916 dks->dks_paramlen = 0;
1917
1918 return 0;
1919 }
1920
1921 case DIOCSSTRATEGY:
1922 {
1923 struct disk_strategy *dks = (void *)data;
1924 struct bufq_state *new;
1925 struct bufq_state *old;
1926
1927 if (dks->dks_param != NULL) {
1928 return EINVAL;
1929 }
1930 dks->dks_name[sizeof(dks->dks_name) - 1] = 0; /* ensure term */
1931 error = bufq_alloc(&new, dks->dks_name,
1932 BUFQ_EXACT|BUFQ_SORT_RAWBLOCK);
1933 if (error) {
1934 return error;
1935 }
1936 s = splbio();
1937 old = rs->buf_queue;
1938 bufq_move(new, old);
1939 rs->buf_queue = new;
1940 splx(s);
1941 bufq_free(old);
1942
1943 return 0;
1944 }
1945
1946 default:
1947 retcode = ENOTTY;
1948 }
1949 return (retcode);
1950
1951 }
1952
1953
1954 /* raidinit -- complete the rest of the initialization for the
1955 RAIDframe device. */
1956
1957
1958 static void
1959 raidinit(struct raid_softc *rs)
1960 {
1961 cfdata_t cf;
1962 int unit;
1963 RF_Raid_t *raidPtr = &rs->sc_r;
1964
1965 unit = raidPtr->raidid;
1966
1967
1968 /* XXX should check return code first... */
1969 rs->sc_flags |= RAIDF_INITED;
1970
1971 /* XXX doesn't check bounds. */
1972 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1973
1974 /* attach the pseudo device */
1975 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1976 cf->cf_name = raid_cd.cd_name;
1977 cf->cf_atname = raid_cd.cd_name;
1978 cf->cf_unit = unit;
1979 cf->cf_fstate = FSTATE_STAR;
1980
1981 rs->sc_dev = config_attach_pseudo(cf);
1982
1983 if (rs->sc_dev == NULL) {
1984 printf("raid%d: config_attach_pseudo failed\n",
1985 raidPtr->raidid);
1986 rs->sc_flags &= ~RAIDF_INITED;
1987 free(cf, M_RAIDFRAME);
1988 return;
1989 }
1990
1991 /* disk_attach actually creates space for the CPU disklabel, among
1992 * other things, so it's critical to call this *BEFORE* we try putzing
1993 * with disklabels. */
1994
1995 disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1996 disk_attach(&rs->sc_dkdev);
1997 disk_blocksize(&rs->sc_dkdev, raidPtr->bytesPerSector);
1998
1999 /* XXX There may be a weird interaction here between this, and
2000 * protectedSectors, as used in RAIDframe. */
2001
2002 rs->sc_size = raidPtr->totalSectors;
2003
2004 dkwedge_discover(&rs->sc_dkdev);
2005
2006 rf_set_geometry(rs, raidPtr);
2007
2008 }
2009 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
2010 /* wake up the daemon & tell it to get us a spare table
2011 * XXX
2012 * the entries in the queues should be tagged with the raidPtr
2013 * so that in the extremely rare case that two recons happen at once,
2014 * we know for which device were requesting a spare table
2015 * XXX
2016 *
2017 * XXX This code is not currently used. GO
2018 */
2019 int
2020 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
2021 {
2022 int retcode;
2023
2024 rf_lock_mutex2(rf_sparet_wait_mutex);
2025 req->next = rf_sparet_wait_queue;
2026 rf_sparet_wait_queue = req;
2027 rf_broadcast_cond2(rf_sparet_wait_cv);
2028
2029 /* mpsleep unlocks the mutex */
2030 while (!rf_sparet_resp_queue) {
2031 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
2032 }
2033 req = rf_sparet_resp_queue;
2034 rf_sparet_resp_queue = req->next;
2035 rf_unlock_mutex2(rf_sparet_wait_mutex);
2036
2037 retcode = req->fcol;
2038 RF_Free(req, sizeof(*req)); /* this is not the same req as we
2039 * alloc'd */
2040 return (retcode);
2041 }
2042 #endif
2043
2044 /* a wrapper around rf_DoAccess that extracts appropriate info from the
2045 * bp & passes it down.
2046 * any calls originating in the kernel must use non-blocking I/O
2047 * do some extra sanity checking to return "appropriate" error values for
2048 * certain conditions (to make some standard utilities work)
2049 *
2050 * Formerly known as: rf_DoAccessKernel
2051 */
2052 void
2053 raidstart(RF_Raid_t *raidPtr)
2054 {
2055 RF_SectorCount_t num_blocks, pb, sum;
2056 RF_RaidAddr_t raid_addr;
2057 struct partition *pp;
2058 daddr_t blocknum;
2059 struct raid_softc *rs;
2060 int do_async;
2061 struct buf *bp;
2062 int rc;
2063
2064 rs = raidPtr->softc;
2065 /* quick check to see if anything has died recently */
2066 rf_lock_mutex2(raidPtr->mutex);
2067 if (raidPtr->numNewFailures > 0) {
2068 rf_unlock_mutex2(raidPtr->mutex);
2069 rf_update_component_labels(raidPtr,
2070 RF_NORMAL_COMPONENT_UPDATE);
2071 rf_lock_mutex2(raidPtr->mutex);
2072 raidPtr->numNewFailures--;
2073 }
2074
2075 /* Check to see if we're at the limit... */
2076 while (raidPtr->openings > 0) {
2077 rf_unlock_mutex2(raidPtr->mutex);
2078
2079 /* get the next item, if any, from the queue */
2080 if ((bp = bufq_get(rs->buf_queue)) == NULL) {
2081 /* nothing more to do */
2082 return;
2083 }
2084
2085 /* Ok, for the bp we have here, bp->b_blkno is relative to the
2086 * partition.. Need to make it absolute to the underlying
2087 * device.. */
2088
2089 blocknum = bp->b_blkno << DEV_BSHIFT >> raidPtr->logBytesPerSector;
2090 if (DISKPART(bp->b_dev) != RAW_PART) {
2091 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
2092 blocknum += pp->p_offset;
2093 }
2094
2095 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
2096 (int) blocknum));
2097
2098 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
2099 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
2100
2101 /* *THIS* is where we adjust what block we're going to...
2102 * but DO NOT TOUCH bp->b_blkno!!! */
2103 raid_addr = blocknum;
2104
2105 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
2106 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
2107 sum = raid_addr + num_blocks + pb;
2108 if (1 || rf_debugKernelAccess) {
2109 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
2110 (int) raid_addr, (int) sum, (int) num_blocks,
2111 (int) pb, (int) bp->b_resid));
2112 }
2113 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
2114 || (sum < num_blocks) || (sum < pb)) {
2115 bp->b_error = ENOSPC;
2116 bp->b_resid = bp->b_bcount;
2117 biodone(bp);
2118 rf_lock_mutex2(raidPtr->mutex);
2119 continue;
2120 }
2121 /*
2122 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
2123 */
2124
2125 if (bp->b_bcount & raidPtr->sectorMask) {
2126 bp->b_error = EINVAL;
2127 bp->b_resid = bp->b_bcount;
2128 biodone(bp);
2129 rf_lock_mutex2(raidPtr->mutex);
2130 continue;
2131
2132 }
2133 db1_printf(("Calling DoAccess..\n"));
2134
2135
2136 rf_lock_mutex2(raidPtr->mutex);
2137 raidPtr->openings--;
2138 rf_unlock_mutex2(raidPtr->mutex);
2139
2140 /*
2141 * Everything is async.
2142 */
2143 do_async = 1;
2144
2145 disk_busy(&rs->sc_dkdev);
2146
2147 /* XXX we're still at splbio() here... do we *really*
2148 need to be? */
2149
2150 /* don't ever condition on bp->b_flags & B_WRITE.
2151 * always condition on B_READ instead */
2152
2153 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
2154 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
2155 do_async, raid_addr, num_blocks,
2156 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
2157
2158 if (rc) {
2159 bp->b_error = rc;
2160 bp->b_resid = bp->b_bcount;
2161 biodone(bp);
2162 /* continue loop */
2163 }
2164
2165 rf_lock_mutex2(raidPtr->mutex);
2166 }
2167 rf_unlock_mutex2(raidPtr->mutex);
2168 }
2169
2170
2171
2172
2173 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
2174
2175 int
2176 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
2177 {
2178 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2179 struct buf *bp;
2180
2181 req->queue = queue;
2182 bp = req->bp;
2183
2184 switch (req->type) {
2185 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2186 /* XXX need to do something extra here.. */
2187 /* I'm leaving this in, as I've never actually seen it used,
2188 * and I'd like folks to report it... GO */
2189 printf(("WAKEUP CALLED\n"));
2190 queue->numOutstanding++;
2191
2192 bp->b_flags = 0;
2193 bp->b_private = req;
2194
2195 KernelWakeupFunc(bp);
2196 break;
2197
2198 case RF_IO_TYPE_READ:
2199 case RF_IO_TYPE_WRITE:
2200 #if RF_ACC_TRACE > 0
2201 if (req->tracerec) {
2202 RF_ETIMER_START(req->tracerec->timer);
2203 }
2204 #endif
2205 InitBP(bp, queue->rf_cinfo->ci_vp,
2206 op, queue->rf_cinfo->ci_dev,
2207 req->sectorOffset, req->numSector,
2208 req->buf, KernelWakeupFunc, (void *) req,
2209 queue->raidPtr->logBytesPerSector, req->b_proc);
2210
2211 if (rf_debugKernelAccess) {
2212 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2213 (long) bp->b_blkno));
2214 }
2215 queue->numOutstanding++;
2216 queue->last_deq_sector = req->sectorOffset;
2217 /* acc wouldn't have been let in if there were any pending
2218 * reqs at any other priority */
2219 queue->curPriority = req->priority;
2220
2221 db1_printf(("Going for %c to unit %d col %d\n",
2222 req->type, queue->raidPtr->raidid,
2223 queue->col));
2224 db1_printf(("sector %d count %d (%d bytes) %d\n",
2225 (int) req->sectorOffset, (int) req->numSector,
2226 (int) (req->numSector <<
2227 queue->raidPtr->logBytesPerSector),
2228 (int) queue->raidPtr->logBytesPerSector));
2229
2230 /*
2231 * XXX: drop lock here since this can block at
2232 * least with backing SCSI devices. Retake it
2233 * to minimize fuss with calling interfaces.
2234 */
2235
2236 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2237 bdev_strategy(bp);
2238 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2239 break;
2240
2241 default:
2242 panic("bad req->type in rf_DispatchKernelIO");
2243 }
2244 db1_printf(("Exiting from DispatchKernelIO\n"));
2245
2246 return (0);
2247 }
2248 /* this is the callback function associated with a I/O invoked from
2249 kernel code.
2250 */
2251 static void
2252 KernelWakeupFunc(struct buf *bp)
2253 {
2254 RF_DiskQueueData_t *req = NULL;
2255 RF_DiskQueue_t *queue;
2256
2257 db1_printf(("recovering the request queue:\n"));
2258
2259 req = bp->b_private;
2260
2261 queue = (RF_DiskQueue_t *) req->queue;
2262
2263 rf_lock_mutex2(queue->raidPtr->iodone_lock);
2264
2265 #if RF_ACC_TRACE > 0
2266 if (req->tracerec) {
2267 RF_ETIMER_STOP(req->tracerec->timer);
2268 RF_ETIMER_EVAL(req->tracerec->timer);
2269 rf_lock_mutex2(rf_tracing_mutex);
2270 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2271 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2272 req->tracerec->num_phys_ios++;
2273 rf_unlock_mutex2(rf_tracing_mutex);
2274 }
2275 #endif
2276
2277 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2278 * ballistic, and mark the component as hosed... */
2279
2280 if (bp->b_error != 0) {
2281 /* Mark the disk as dead */
2282 /* but only mark it once... */
2283 /* and only if it wouldn't leave this RAID set
2284 completely broken */
2285 if (((queue->raidPtr->Disks[queue->col].status ==
2286 rf_ds_optimal) ||
2287 (queue->raidPtr->Disks[queue->col].status ==
2288 rf_ds_used_spare)) &&
2289 (queue->raidPtr->numFailures <
2290 queue->raidPtr->Layout.map->faultsTolerated)) {
2291 printf("raid%d: IO Error. Marking %s as failed.\n",
2292 queue->raidPtr->raidid,
2293 queue->raidPtr->Disks[queue->col].devname);
2294 queue->raidPtr->Disks[queue->col].status =
2295 rf_ds_failed;
2296 queue->raidPtr->status = rf_rs_degraded;
2297 queue->raidPtr->numFailures++;
2298 queue->raidPtr->numNewFailures++;
2299 } else { /* Disk is already dead... */
2300 /* printf("Disk already marked as dead!\n"); */
2301 }
2302
2303 }
2304
2305 /* Fill in the error value */
2306 req->error = bp->b_error;
2307
2308 /* Drop this one on the "finished" queue... */
2309 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2310
2311 /* Let the raidio thread know there is work to be done. */
2312 rf_signal_cond2(queue->raidPtr->iodone_cv);
2313
2314 rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2315 }
2316
2317
2318 /*
2319 * initialize a buf structure for doing an I/O in the kernel.
2320 */
2321 static void
2322 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2323 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2324 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2325 struct proc *b_proc)
2326 {
2327 /* bp->b_flags = B_PHYS | rw_flag; */
2328 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */
2329 bp->b_oflags = 0;
2330 bp->b_cflags = 0;
2331 bp->b_bcount = numSect << logBytesPerSector;
2332 bp->b_bufsize = bp->b_bcount;
2333 bp->b_error = 0;
2334 bp->b_dev = dev;
2335 bp->b_data = bf;
2336 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2337 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2338 if (bp->b_bcount == 0) {
2339 panic("bp->b_bcount is zero in InitBP!!");
2340 }
2341 bp->b_proc = b_proc;
2342 bp->b_iodone = cbFunc;
2343 bp->b_private = cbArg;
2344 }
2345
2346 static void
2347 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2348 struct disklabel *lp)
2349 {
2350 memset(lp, 0, sizeof(*lp));
2351
2352 /* fabricate a label... */
2353 lp->d_secperunit = raidPtr->totalSectors;
2354 lp->d_secsize = raidPtr->bytesPerSector;
2355 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2356 lp->d_ntracks = 4 * raidPtr->numCol;
2357 lp->d_ncylinders = raidPtr->totalSectors /
2358 (lp->d_nsectors * lp->d_ntracks);
2359 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2360
2361 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2362 lp->d_type = DTYPE_RAID;
2363 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2364 lp->d_rpm = 3600;
2365 lp->d_interleave = 1;
2366 lp->d_flags = 0;
2367
2368 lp->d_partitions[RAW_PART].p_offset = 0;
2369 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2370 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2371 lp->d_npartitions = RAW_PART + 1;
2372
2373 lp->d_magic = DISKMAGIC;
2374 lp->d_magic2 = DISKMAGIC;
2375 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2376
2377 }
2378 /*
2379 * Read the disklabel from the raid device. If one is not present, fake one
2380 * up.
2381 */
2382 static void
2383 raidgetdisklabel(dev_t dev)
2384 {
2385 int unit = raidunit(dev);
2386 struct raid_softc *rs;
2387 const char *errstring;
2388 struct disklabel *lp;
2389 struct cpu_disklabel *clp;
2390 RF_Raid_t *raidPtr;
2391
2392 if ((rs = raidget(unit)) == NULL)
2393 return;
2394
2395 lp = rs->sc_dkdev.dk_label;
2396 clp = rs->sc_dkdev.dk_cpulabel;
2397
2398 db1_printf(("Getting the disklabel...\n"));
2399
2400 memset(clp, 0, sizeof(*clp));
2401
2402 raidPtr = &rs->sc_r;
2403
2404 raidgetdefaultlabel(raidPtr, rs, lp);
2405
2406 /*
2407 * Call the generic disklabel extraction routine.
2408 */
2409 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2410 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2411 if (errstring)
2412 raidmakedisklabel(rs);
2413 else {
2414 int i;
2415 struct partition *pp;
2416
2417 /*
2418 * Sanity check whether the found disklabel is valid.
2419 *
2420 * This is necessary since total size of the raid device
2421 * may vary when an interleave is changed even though exactly
2422 * same components are used, and old disklabel may used
2423 * if that is found.
2424 */
2425 if (lp->d_secperunit != rs->sc_size)
2426 printf("raid%d: WARNING: %s: "
2427 "total sector size in disklabel (%" PRIu32 ") != "
2428 "the size of raid (%" PRIu64 ")\n", unit, rs->sc_xname,
2429 lp->d_secperunit, rs->sc_size);
2430 for (i = 0; i < lp->d_npartitions; i++) {
2431 pp = &lp->d_partitions[i];
2432 if (pp->p_offset + pp->p_size > rs->sc_size)
2433 printf("raid%d: WARNING: %s: end of partition `%c' "
2434 "exceeds the size of raid (%" PRIu64 ")\n",
2435 unit, rs->sc_xname, 'a' + i, rs->sc_size);
2436 }
2437 }
2438
2439 }
2440 /*
2441 * Take care of things one might want to take care of in the event
2442 * that a disklabel isn't present.
2443 */
2444 static void
2445 raidmakedisklabel(struct raid_softc *rs)
2446 {
2447 struct disklabel *lp = rs->sc_dkdev.dk_label;
2448 db1_printf(("Making a label..\n"));
2449
2450 /*
2451 * For historical reasons, if there's no disklabel present
2452 * the raw partition must be marked FS_BSDFFS.
2453 */
2454
2455 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2456
2457 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2458
2459 lp->d_checksum = dkcksum(lp);
2460 }
2461 /*
2462 * Wait interruptibly for an exclusive lock.
2463 *
2464 * XXX
2465 * Several drivers do this; it should be abstracted and made MP-safe.
2466 * (Hmm... where have we seen this warning before :-> GO )
2467 */
2468 static int
2469 raidlock(struct raid_softc *rs)
2470 {
2471 int error;
2472
2473 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2474 rs->sc_flags |= RAIDF_WANTED;
2475 if ((error =
2476 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2477 return (error);
2478 }
2479 rs->sc_flags |= RAIDF_LOCKED;
2480 return (0);
2481 }
2482 /*
2483 * Unlock and wake up any waiters.
2484 */
2485 static void
2486 raidunlock(struct raid_softc *rs)
2487 {
2488
2489 rs->sc_flags &= ~RAIDF_LOCKED;
2490 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2491 rs->sc_flags &= ~RAIDF_WANTED;
2492 wakeup(rs);
2493 }
2494 }
2495
2496
2497 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2498 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2499 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2500
2501 static daddr_t
2502 rf_component_info_offset(void)
2503 {
2504
2505 return RF_COMPONENT_INFO_OFFSET;
2506 }
2507
2508 static daddr_t
2509 rf_component_info_size(unsigned secsize)
2510 {
2511 daddr_t info_size;
2512
2513 KASSERT(secsize);
2514 if (secsize > RF_COMPONENT_INFO_SIZE)
2515 info_size = secsize;
2516 else
2517 info_size = RF_COMPONENT_INFO_SIZE;
2518
2519 return info_size;
2520 }
2521
2522 static daddr_t
2523 rf_parity_map_offset(RF_Raid_t *raidPtr)
2524 {
2525 daddr_t map_offset;
2526
2527 KASSERT(raidPtr->bytesPerSector);
2528 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2529 map_offset = raidPtr->bytesPerSector;
2530 else
2531 map_offset = RF_COMPONENT_INFO_SIZE;
2532 map_offset += rf_component_info_offset();
2533
2534 return map_offset;
2535 }
2536
2537 static daddr_t
2538 rf_parity_map_size(RF_Raid_t *raidPtr)
2539 {
2540 daddr_t map_size;
2541
2542 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2543 map_size = raidPtr->bytesPerSector;
2544 else
2545 map_size = RF_PARITY_MAP_SIZE;
2546
2547 return map_size;
2548 }
2549
2550 int
2551 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2552 {
2553 RF_ComponentLabel_t *clabel;
2554
2555 clabel = raidget_component_label(raidPtr, col);
2556 clabel->clean = RF_RAID_CLEAN;
2557 raidflush_component_label(raidPtr, col);
2558 return(0);
2559 }
2560
2561
2562 int
2563 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2564 {
2565 RF_ComponentLabel_t *clabel;
2566
2567 clabel = raidget_component_label(raidPtr, col);
2568 clabel->clean = RF_RAID_DIRTY;
2569 raidflush_component_label(raidPtr, col);
2570 return(0);
2571 }
2572
2573 int
2574 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2575 {
2576 KASSERT(raidPtr->bytesPerSector);
2577 return raidread_component_label(raidPtr->bytesPerSector,
2578 raidPtr->Disks[col].dev,
2579 raidPtr->raid_cinfo[col].ci_vp,
2580 &raidPtr->raid_cinfo[col].ci_label);
2581 }
2582
2583 RF_ComponentLabel_t *
2584 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2585 {
2586 return &raidPtr->raid_cinfo[col].ci_label;
2587 }
2588
2589 int
2590 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2591 {
2592 RF_ComponentLabel_t *label;
2593
2594 label = &raidPtr->raid_cinfo[col].ci_label;
2595 label->mod_counter = raidPtr->mod_counter;
2596 #ifndef RF_NO_PARITY_MAP
2597 label->parity_map_modcount = label->mod_counter;
2598 #endif
2599 return raidwrite_component_label(raidPtr->bytesPerSector,
2600 raidPtr->Disks[col].dev,
2601 raidPtr->raid_cinfo[col].ci_vp, label);
2602 }
2603
2604
2605 static int
2606 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2607 RF_ComponentLabel_t *clabel)
2608 {
2609 return raidread_component_area(dev, b_vp, clabel,
2610 sizeof(RF_ComponentLabel_t),
2611 rf_component_info_offset(),
2612 rf_component_info_size(secsize));
2613 }
2614
2615 /* ARGSUSED */
2616 static int
2617 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2618 size_t msize, daddr_t offset, daddr_t dsize)
2619 {
2620 struct buf *bp;
2621 const struct bdevsw *bdev;
2622 int error;
2623
2624 /* XXX should probably ensure that we don't try to do this if
2625 someone has changed rf_protected_sectors. */
2626
2627 if (b_vp == NULL) {
2628 /* For whatever reason, this component is not valid.
2629 Don't try to read a component label from it. */
2630 return(EINVAL);
2631 }
2632
2633 /* get a block of the appropriate size... */
2634 bp = geteblk((int)dsize);
2635 bp->b_dev = dev;
2636
2637 /* get our ducks in a row for the read */
2638 bp->b_blkno = offset / DEV_BSIZE;
2639 bp->b_bcount = dsize;
2640 bp->b_flags |= B_READ;
2641 bp->b_resid = dsize;
2642
2643 bdev = bdevsw_lookup(bp->b_dev);
2644 if (bdev == NULL)
2645 return (ENXIO);
2646 (*bdev->d_strategy)(bp);
2647
2648 error = biowait(bp);
2649
2650 if (!error) {
2651 memcpy(data, bp->b_data, msize);
2652 }
2653
2654 brelse(bp, 0);
2655 return(error);
2656 }
2657
2658
2659 static int
2660 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2661 RF_ComponentLabel_t *clabel)
2662 {
2663 return raidwrite_component_area(dev, b_vp, clabel,
2664 sizeof(RF_ComponentLabel_t),
2665 rf_component_info_offset(),
2666 rf_component_info_size(secsize), 0);
2667 }
2668
2669 /* ARGSUSED */
2670 static int
2671 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2672 size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2673 {
2674 struct buf *bp;
2675 const struct bdevsw *bdev;
2676 int error;
2677
2678 /* get a block of the appropriate size... */
2679 bp = geteblk((int)dsize);
2680 bp->b_dev = dev;
2681
2682 /* get our ducks in a row for the write */
2683 bp->b_blkno = offset / DEV_BSIZE;
2684 bp->b_bcount = dsize;
2685 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2686 bp->b_resid = dsize;
2687
2688 memset(bp->b_data, 0, dsize);
2689 memcpy(bp->b_data, data, msize);
2690
2691 bdev = bdevsw_lookup(bp->b_dev);
2692 if (bdev == NULL)
2693 return (ENXIO);
2694 (*bdev->d_strategy)(bp);
2695 if (asyncp)
2696 return 0;
2697 error = biowait(bp);
2698 brelse(bp, 0);
2699 if (error) {
2700 #if 1
2701 printf("Failed to write RAID component info!\n");
2702 #endif
2703 }
2704
2705 return(error);
2706 }
2707
2708 void
2709 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2710 {
2711 int c;
2712
2713 for (c = 0; c < raidPtr->numCol; c++) {
2714 /* Skip dead disks. */
2715 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2716 continue;
2717 /* XXXjld: what if an error occurs here? */
2718 raidwrite_component_area(raidPtr->Disks[c].dev,
2719 raidPtr->raid_cinfo[c].ci_vp, map,
2720 RF_PARITYMAP_NBYTE,
2721 rf_parity_map_offset(raidPtr),
2722 rf_parity_map_size(raidPtr), 0);
2723 }
2724 }
2725
2726 void
2727 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2728 {
2729 struct rf_paritymap_ondisk tmp;
2730 int c,first;
2731
2732 first=1;
2733 for (c = 0; c < raidPtr->numCol; c++) {
2734 /* Skip dead disks. */
2735 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2736 continue;
2737 raidread_component_area(raidPtr->Disks[c].dev,
2738 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2739 RF_PARITYMAP_NBYTE,
2740 rf_parity_map_offset(raidPtr),
2741 rf_parity_map_size(raidPtr));
2742 if (first) {
2743 memcpy(map, &tmp, sizeof(*map));
2744 first = 0;
2745 } else {
2746 rf_paritymap_merge(map, &tmp);
2747 }
2748 }
2749 }
2750
2751 void
2752 rf_markalldirty(RF_Raid_t *raidPtr)
2753 {
2754 RF_ComponentLabel_t *clabel;
2755 int sparecol;
2756 int c;
2757 int j;
2758 int scol = -1;
2759
2760 raidPtr->mod_counter++;
2761 for (c = 0; c < raidPtr->numCol; c++) {
2762 /* we don't want to touch (at all) a disk that has
2763 failed */
2764 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2765 clabel = raidget_component_label(raidPtr, c);
2766 if (clabel->status == rf_ds_spared) {
2767 /* XXX do something special...
2768 but whatever you do, don't
2769 try to access it!! */
2770 } else {
2771 raidmarkdirty(raidPtr, c);
2772 }
2773 }
2774 }
2775
2776 for( c = 0; c < raidPtr->numSpare ; c++) {
2777 sparecol = raidPtr->numCol + c;
2778 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2779 /*
2780
2781 we claim this disk is "optimal" if it's
2782 rf_ds_used_spare, as that means it should be
2783 directly substitutable for the disk it replaced.
2784 We note that too...
2785
2786 */
2787
2788 for(j=0;j<raidPtr->numCol;j++) {
2789 if (raidPtr->Disks[j].spareCol == sparecol) {
2790 scol = j;
2791 break;
2792 }
2793 }
2794
2795 clabel = raidget_component_label(raidPtr, sparecol);
2796 /* make sure status is noted */
2797
2798 raid_init_component_label(raidPtr, clabel);
2799
2800 clabel->row = 0;
2801 clabel->column = scol;
2802 /* Note: we *don't* change status from rf_ds_used_spare
2803 to rf_ds_optimal */
2804 /* clabel.status = rf_ds_optimal; */
2805
2806 raidmarkdirty(raidPtr, sparecol);
2807 }
2808 }
2809 }
2810
2811
2812 void
2813 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2814 {
2815 RF_ComponentLabel_t *clabel;
2816 int sparecol;
2817 int c;
2818 int j;
2819 int scol;
2820
2821 scol = -1;
2822
2823 /* XXX should do extra checks to make sure things really are clean,
2824 rather than blindly setting the clean bit... */
2825
2826 raidPtr->mod_counter++;
2827
2828 for (c = 0; c < raidPtr->numCol; c++) {
2829 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2830 clabel = raidget_component_label(raidPtr, c);
2831 /* make sure status is noted */
2832 clabel->status = rf_ds_optimal;
2833
2834 /* note what unit we are configured as */
2835 clabel->last_unit = raidPtr->raidid;
2836
2837 raidflush_component_label(raidPtr, c);
2838 if (final == RF_FINAL_COMPONENT_UPDATE) {
2839 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2840 raidmarkclean(raidPtr, c);
2841 }
2842 }
2843 }
2844 /* else we don't touch it.. */
2845 }
2846
2847 for( c = 0; c < raidPtr->numSpare ; c++) {
2848 sparecol = raidPtr->numCol + c;
2849 /* Need to ensure that the reconstruct actually completed! */
2850 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2851 /*
2852
2853 we claim this disk is "optimal" if it's
2854 rf_ds_used_spare, as that means it should be
2855 directly substitutable for the disk it replaced.
2856 We note that too...
2857
2858 */
2859
2860 for(j=0;j<raidPtr->numCol;j++) {
2861 if (raidPtr->Disks[j].spareCol == sparecol) {
2862 scol = j;
2863 break;
2864 }
2865 }
2866
2867 /* XXX shouldn't *really* need this... */
2868 clabel = raidget_component_label(raidPtr, sparecol);
2869 /* make sure status is noted */
2870
2871 raid_init_component_label(raidPtr, clabel);
2872
2873 clabel->column = scol;
2874 clabel->status = rf_ds_optimal;
2875 clabel->last_unit = raidPtr->raidid;
2876
2877 raidflush_component_label(raidPtr, sparecol);
2878 if (final == RF_FINAL_COMPONENT_UPDATE) {
2879 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2880 raidmarkclean(raidPtr, sparecol);
2881 }
2882 }
2883 }
2884 }
2885 }
2886
2887 void
2888 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2889 {
2890
2891 if (vp != NULL) {
2892 if (auto_configured == 1) {
2893 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2894 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2895 vput(vp);
2896
2897 } else {
2898 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2899 }
2900 }
2901 }
2902
2903
2904 void
2905 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2906 {
2907 int r,c;
2908 struct vnode *vp;
2909 int acd;
2910
2911
2912 /* We take this opportunity to close the vnodes like we should.. */
2913
2914 for (c = 0; c < raidPtr->numCol; c++) {
2915 vp = raidPtr->raid_cinfo[c].ci_vp;
2916 acd = raidPtr->Disks[c].auto_configured;
2917 rf_close_component(raidPtr, vp, acd);
2918 raidPtr->raid_cinfo[c].ci_vp = NULL;
2919 raidPtr->Disks[c].auto_configured = 0;
2920 }
2921
2922 for (r = 0; r < raidPtr->numSpare; r++) {
2923 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2924 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2925 rf_close_component(raidPtr, vp, acd);
2926 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2927 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2928 }
2929 }
2930
2931
2932 void
2933 rf_ReconThread(struct rf_recon_req *req)
2934 {
2935 int s;
2936 RF_Raid_t *raidPtr;
2937
2938 s = splbio();
2939 raidPtr = (RF_Raid_t *) req->raidPtr;
2940 raidPtr->recon_in_progress = 1;
2941
2942 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2943 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2944
2945 RF_Free(req, sizeof(*req));
2946
2947 raidPtr->recon_in_progress = 0;
2948 splx(s);
2949
2950 /* That's all... */
2951 kthread_exit(0); /* does not return */
2952 }
2953
2954 void
2955 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2956 {
2957 int retcode;
2958 int s;
2959
2960 raidPtr->parity_rewrite_stripes_done = 0;
2961 raidPtr->parity_rewrite_in_progress = 1;
2962 s = splbio();
2963 retcode = rf_RewriteParity(raidPtr);
2964 splx(s);
2965 if (retcode) {
2966 printf("raid%d: Error re-writing parity (%d)!\n",
2967 raidPtr->raidid, retcode);
2968 } else {
2969 /* set the clean bit! If we shutdown correctly,
2970 the clean bit on each component label will get
2971 set */
2972 raidPtr->parity_good = RF_RAID_CLEAN;
2973 }
2974 raidPtr->parity_rewrite_in_progress = 0;
2975
2976 /* Anyone waiting for us to stop? If so, inform them... */
2977 if (raidPtr->waitShutdown) {
2978 wakeup(&raidPtr->parity_rewrite_in_progress);
2979 }
2980
2981 /* That's all... */
2982 kthread_exit(0); /* does not return */
2983 }
2984
2985
2986 void
2987 rf_CopybackThread(RF_Raid_t *raidPtr)
2988 {
2989 int s;
2990
2991 raidPtr->copyback_in_progress = 1;
2992 s = splbio();
2993 rf_CopybackReconstructedData(raidPtr);
2994 splx(s);
2995 raidPtr->copyback_in_progress = 0;
2996
2997 /* That's all... */
2998 kthread_exit(0); /* does not return */
2999 }
3000
3001
3002 void
3003 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
3004 {
3005 int s;
3006 RF_Raid_t *raidPtr;
3007
3008 s = splbio();
3009 raidPtr = req->raidPtr;
3010 raidPtr->recon_in_progress = 1;
3011 rf_ReconstructInPlace(raidPtr, req->col);
3012 RF_Free(req, sizeof(*req));
3013 raidPtr->recon_in_progress = 0;
3014 splx(s);
3015
3016 /* That's all... */
3017 kthread_exit(0); /* does not return */
3018 }
3019
3020 static RF_AutoConfig_t *
3021 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
3022 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
3023 unsigned secsize)
3024 {
3025 int good_one = 0;
3026 RF_ComponentLabel_t *clabel;
3027 RF_AutoConfig_t *ac;
3028
3029 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
3030 if (clabel == NULL) {
3031 oomem:
3032 while(ac_list) {
3033 ac = ac_list;
3034 if (ac->clabel)
3035 free(ac->clabel, M_RAIDFRAME);
3036 ac_list = ac_list->next;
3037 free(ac, M_RAIDFRAME);
3038 }
3039 printf("RAID auto config: out of memory!\n");
3040 return NULL; /* XXX probably should panic? */
3041 }
3042
3043 if (!raidread_component_label(secsize, dev, vp, clabel)) {
3044 /* Got the label. Does it look reasonable? */
3045 if (rf_reasonable_label(clabel, numsecs) &&
3046 (rf_component_label_partitionsize(clabel) <= size)) {
3047 #ifdef DEBUG
3048 printf("Component on: %s: %llu\n",
3049 cname, (unsigned long long)size);
3050 rf_print_component_label(clabel);
3051 #endif
3052 /* if it's reasonable, add it, else ignore it. */
3053 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
3054 M_NOWAIT);
3055 if (ac == NULL) {
3056 free(clabel, M_RAIDFRAME);
3057 goto oomem;
3058 }
3059 strlcpy(ac->devname, cname, sizeof(ac->devname));
3060 ac->dev = dev;
3061 ac->vp = vp;
3062 ac->clabel = clabel;
3063 ac->next = ac_list;
3064 ac_list = ac;
3065 good_one = 1;
3066 }
3067 }
3068 if (!good_one) {
3069 /* cleanup */
3070 free(clabel, M_RAIDFRAME);
3071 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3072 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3073 vput(vp);
3074 }
3075 return ac_list;
3076 }
3077
3078 RF_AutoConfig_t *
3079 rf_find_raid_components(void)
3080 {
3081 struct vnode *vp;
3082 struct disklabel label;
3083 device_t dv;
3084 deviter_t di;
3085 dev_t dev;
3086 int bmajor, bminor, wedge, rf_part_found;
3087 int error;
3088 int i;
3089 RF_AutoConfig_t *ac_list;
3090 uint64_t numsecs;
3091 unsigned secsize;
3092
3093 /* initialize the AutoConfig list */
3094 ac_list = NULL;
3095
3096 /* we begin by trolling through *all* the devices on the system */
3097
3098 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
3099 dv = deviter_next(&di)) {
3100
3101 /* we are only interested in disks... */
3102 if (device_class(dv) != DV_DISK)
3103 continue;
3104
3105 /* we don't care about floppies... */
3106 if (device_is_a(dv, "fd")) {
3107 continue;
3108 }
3109
3110 /* we don't care about CD's... */
3111 if (device_is_a(dv, "cd")) {
3112 continue;
3113 }
3114
3115 /* we don't care about md's... */
3116 if (device_is_a(dv, "md")) {
3117 continue;
3118 }
3119
3120 /* hdfd is the Atari/Hades floppy driver */
3121 if (device_is_a(dv, "hdfd")) {
3122 continue;
3123 }
3124
3125 /* fdisa is the Atari/Milan floppy driver */
3126 if (device_is_a(dv, "fdisa")) {
3127 continue;
3128 }
3129
3130 /* need to find the device_name_to_block_device_major stuff */
3131 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
3132
3133 rf_part_found = 0; /*No raid partition as yet*/
3134
3135 /* get a vnode for the raw partition of this disk */
3136
3137 wedge = device_is_a(dv, "dk");
3138 bminor = minor(device_unit(dv));
3139 dev = wedge ? makedev(bmajor, bminor) :
3140 MAKEDISKDEV(bmajor, bminor, RAW_PART);
3141 if (bdevvp(dev, &vp))
3142 panic("RAID can't alloc vnode");
3143
3144 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
3145
3146 if (error) {
3147 /* "Who cares." Continue looking
3148 for something that exists*/
3149 vput(vp);
3150 continue;
3151 }
3152
3153 error = getdisksize(vp, &numsecs, &secsize);
3154 if (error) {
3155 vput(vp);
3156 continue;
3157 }
3158 if (wedge) {
3159 struct dkwedge_info dkw;
3160 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
3161 NOCRED);
3162 if (error) {
3163 printf("RAIDframe: can't get wedge info for "
3164 "dev %s (%d)\n", device_xname(dv), error);
3165 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3166 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3167 vput(vp);
3168 continue;
3169 }
3170
3171 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
3172 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3173 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3174 vput(vp);
3175 continue;
3176 }
3177
3178 ac_list = rf_get_component(ac_list, dev, vp,
3179 device_xname(dv), dkw.dkw_size, numsecs, secsize);
3180 rf_part_found = 1; /*There is a raid component on this disk*/
3181 continue;
3182 }
3183
3184 /* Ok, the disk exists. Go get the disklabel. */
3185 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
3186 if (error) {
3187 /*
3188 * XXX can't happen - open() would
3189 * have errored out (or faked up one)
3190 */
3191 if (error != ENOTTY)
3192 printf("RAIDframe: can't get label for dev "
3193 "%s (%d)\n", device_xname(dv), error);
3194 }
3195
3196 /* don't need this any more. We'll allocate it again
3197 a little later if we really do... */
3198 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3199 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3200 vput(vp);
3201
3202 if (error)
3203 continue;
3204
3205 rf_part_found = 0; /*No raid partitions yet*/
3206 for (i = 0; i < label.d_npartitions; i++) {
3207 char cname[sizeof(ac_list->devname)];
3208
3209 /* We only support partitions marked as RAID */
3210 if (label.d_partitions[i].p_fstype != FS_RAID)
3211 continue;
3212
3213 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
3214 if (bdevvp(dev, &vp))
3215 panic("RAID can't alloc vnode");
3216
3217 error = VOP_OPEN(vp, FREAD, NOCRED);
3218 if (error) {
3219 /* Whatever... */
3220 vput(vp);
3221 continue;
3222 }
3223 snprintf(cname, sizeof(cname), "%s%c",
3224 device_xname(dv), 'a' + i);
3225 ac_list = rf_get_component(ac_list, dev, vp, cname,
3226 label.d_partitions[i].p_size, numsecs, secsize);
3227 rf_part_found = 1; /*There is at least one raid partition on this disk*/
3228 }
3229
3230 /*
3231 *If there is no raid component on this disk, either in a
3232 *disklabel or inside a wedge, check the raw partition as well,
3233 *as it is possible to configure raid components on raw disk
3234 *devices.
3235 */
3236
3237 if (!rf_part_found) {
3238 char cname[sizeof(ac_list->devname)];
3239
3240 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
3241 if (bdevvp(dev, &vp))
3242 panic("RAID can't alloc vnode");
3243
3244 error = VOP_OPEN(vp, FREAD, NOCRED);
3245 if (error) {
3246 /* Whatever... */
3247 vput(vp);
3248 continue;
3249 }
3250 snprintf(cname, sizeof(cname), "%s%c",
3251 device_xname(dv), 'a' + RAW_PART);
3252 ac_list = rf_get_component(ac_list, dev, vp, cname,
3253 label.d_partitions[RAW_PART].p_size, numsecs, secsize);
3254 }
3255 }
3256 deviter_release(&di);
3257 return ac_list;
3258 }
3259
3260
3261 int
3262 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3263 {
3264
3265 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
3266 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
3267 ((clabel->clean == RF_RAID_CLEAN) ||
3268 (clabel->clean == RF_RAID_DIRTY)) &&
3269 clabel->row >=0 &&
3270 clabel->column >= 0 &&
3271 clabel->num_rows > 0 &&
3272 clabel->num_columns > 0 &&
3273 clabel->row < clabel->num_rows &&
3274 clabel->column < clabel->num_columns &&
3275 clabel->blockSize > 0 &&
3276 /*
3277 * numBlocksHi may contain garbage, but it is ok since
3278 * the type is unsigned. If it is really garbage,
3279 * rf_fix_old_label_size() will fix it.
3280 */
3281 rf_component_label_numblocks(clabel) > 0) {
3282 /*
3283 * label looks reasonable enough...
3284 * let's make sure it has no old garbage.
3285 */
3286 if (numsecs)
3287 rf_fix_old_label_size(clabel, numsecs);
3288 return(1);
3289 }
3290 return(0);
3291 }
3292
3293
3294 /*
3295 * For reasons yet unknown, some old component labels have garbage in
3296 * the newer numBlocksHi region, and this causes lossage. Since those
3297 * disks will also have numsecs set to less than 32 bits of sectors,
3298 * we can determine when this corruption has occurred, and fix it.
3299 *
3300 * The exact same problem, with the same unknown reason, happens to
3301 * the partitionSizeHi member as well.
3302 */
3303 static void
3304 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3305 {
3306
3307 if (numsecs < ((uint64_t)1 << 32)) {
3308 if (clabel->numBlocksHi) {
3309 printf("WARNING: total sectors < 32 bits, yet "
3310 "numBlocksHi set\n"
3311 "WARNING: resetting numBlocksHi to zero.\n");
3312 clabel->numBlocksHi = 0;
3313 }
3314
3315 if (clabel->partitionSizeHi) {
3316 printf("WARNING: total sectors < 32 bits, yet "
3317 "partitionSizeHi set\n"
3318 "WARNING: resetting partitionSizeHi to zero.\n");
3319 clabel->partitionSizeHi = 0;
3320 }
3321 }
3322 }
3323
3324
3325 #ifdef DEBUG
3326 void
3327 rf_print_component_label(RF_ComponentLabel_t *clabel)
3328 {
3329 uint64_t numBlocks;
3330
3331 numBlocks = rf_component_label_numblocks(clabel);
3332
3333 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3334 clabel->row, clabel->column,
3335 clabel->num_rows, clabel->num_columns);
3336 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3337 clabel->version, clabel->serial_number,
3338 clabel->mod_counter);
3339 printf(" Clean: %s Status: %d\n",
3340 clabel->clean ? "Yes" : "No", clabel->status);
3341 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3342 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3343 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3344 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3345 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3346 printf(" Contains root partition: %s\n",
3347 clabel->root_partition ? "Yes" : "No");
3348 printf(" Last configured as: raid%d\n", clabel->last_unit);
3349 #if 0
3350 printf(" Config order: %d\n", clabel->config_order);
3351 #endif
3352
3353 }
3354 #endif
3355
3356 RF_ConfigSet_t *
3357 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3358 {
3359 RF_AutoConfig_t *ac;
3360 RF_ConfigSet_t *config_sets;
3361 RF_ConfigSet_t *cset;
3362 RF_AutoConfig_t *ac_next;
3363
3364
3365 config_sets = NULL;
3366
3367 /* Go through the AutoConfig list, and figure out which components
3368 belong to what sets. */
3369 ac = ac_list;
3370 while(ac!=NULL) {
3371 /* we're going to putz with ac->next, so save it here
3372 for use at the end of the loop */
3373 ac_next = ac->next;
3374
3375 if (config_sets == NULL) {
3376 /* will need at least this one... */
3377 config_sets = (RF_ConfigSet_t *)
3378 malloc(sizeof(RF_ConfigSet_t),
3379 M_RAIDFRAME, M_NOWAIT);
3380 if (config_sets == NULL) {
3381 panic("rf_create_auto_sets: No memory!");
3382 }
3383 /* this one is easy :) */
3384 config_sets->ac = ac;
3385 config_sets->next = NULL;
3386 config_sets->rootable = 0;
3387 ac->next = NULL;
3388 } else {
3389 /* which set does this component fit into? */
3390 cset = config_sets;
3391 while(cset!=NULL) {
3392 if (rf_does_it_fit(cset, ac)) {
3393 /* looks like it matches... */
3394 ac->next = cset->ac;
3395 cset->ac = ac;
3396 break;
3397 }
3398 cset = cset->next;
3399 }
3400 if (cset==NULL) {
3401 /* didn't find a match above... new set..*/
3402 cset = (RF_ConfigSet_t *)
3403 malloc(sizeof(RF_ConfigSet_t),
3404 M_RAIDFRAME, M_NOWAIT);
3405 if (cset == NULL) {
3406 panic("rf_create_auto_sets: No memory!");
3407 }
3408 cset->ac = ac;
3409 ac->next = NULL;
3410 cset->next = config_sets;
3411 cset->rootable = 0;
3412 config_sets = cset;
3413 }
3414 }
3415 ac = ac_next;
3416 }
3417
3418
3419 return(config_sets);
3420 }
3421
3422 static int
3423 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3424 {
3425 RF_ComponentLabel_t *clabel1, *clabel2;
3426
3427 /* If this one matches the *first* one in the set, that's good
3428 enough, since the other members of the set would have been
3429 through here too... */
3430 /* note that we are not checking partitionSize here..
3431
3432 Note that we are also not checking the mod_counters here.
3433 If everything else matches except the mod_counter, that's
3434 good enough for this test. We will deal with the mod_counters
3435 a little later in the autoconfiguration process.
3436
3437 (clabel1->mod_counter == clabel2->mod_counter) &&
3438
3439 The reason we don't check for this is that failed disks
3440 will have lower modification counts. If those disks are
3441 not added to the set they used to belong to, then they will
3442 form their own set, which may result in 2 different sets,
3443 for example, competing to be configured at raid0, and
3444 perhaps competing to be the root filesystem set. If the
3445 wrong ones get configured, or both attempt to become /,
3446 weird behaviour and or serious lossage will occur. Thus we
3447 need to bring them into the fold here, and kick them out at
3448 a later point.
3449
3450 */
3451
3452 clabel1 = cset->ac->clabel;
3453 clabel2 = ac->clabel;
3454 if ((clabel1->version == clabel2->version) &&
3455 (clabel1->serial_number == clabel2->serial_number) &&
3456 (clabel1->num_rows == clabel2->num_rows) &&
3457 (clabel1->num_columns == clabel2->num_columns) &&
3458 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3459 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3460 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3461 (clabel1->parityConfig == clabel2->parityConfig) &&
3462 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3463 (clabel1->blockSize == clabel2->blockSize) &&
3464 rf_component_label_numblocks(clabel1) ==
3465 rf_component_label_numblocks(clabel2) &&
3466 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3467 (clabel1->root_partition == clabel2->root_partition) &&
3468 (clabel1->last_unit == clabel2->last_unit) &&
3469 (clabel1->config_order == clabel2->config_order)) {
3470 /* if it get's here, it almost *has* to be a match */
3471 } else {
3472 /* it's not consistent with somebody in the set..
3473 punt */
3474 return(0);
3475 }
3476 /* all was fine.. it must fit... */
3477 return(1);
3478 }
3479
3480 int
3481 rf_have_enough_components(RF_ConfigSet_t *cset)
3482 {
3483 RF_AutoConfig_t *ac;
3484 RF_AutoConfig_t *auto_config;
3485 RF_ComponentLabel_t *clabel;
3486 int c;
3487 int num_cols;
3488 int num_missing;
3489 int mod_counter;
3490 int mod_counter_found;
3491 int even_pair_failed;
3492 char parity_type;
3493
3494
3495 /* check to see that we have enough 'live' components
3496 of this set. If so, we can configure it if necessary */
3497
3498 num_cols = cset->ac->clabel->num_columns;
3499 parity_type = cset->ac->clabel->parityConfig;
3500
3501 /* XXX Check for duplicate components!?!?!? */
3502
3503 /* Determine what the mod_counter is supposed to be for this set. */
3504
3505 mod_counter_found = 0;
3506 mod_counter = 0;
3507 ac = cset->ac;
3508 while(ac!=NULL) {
3509 if (mod_counter_found==0) {
3510 mod_counter = ac->clabel->mod_counter;
3511 mod_counter_found = 1;
3512 } else {
3513 if (ac->clabel->mod_counter > mod_counter) {
3514 mod_counter = ac->clabel->mod_counter;
3515 }
3516 }
3517 ac = ac->next;
3518 }
3519
3520 num_missing = 0;
3521 auto_config = cset->ac;
3522
3523 even_pair_failed = 0;
3524 for(c=0; c<num_cols; c++) {
3525 ac = auto_config;
3526 while(ac!=NULL) {
3527 if ((ac->clabel->column == c) &&
3528 (ac->clabel->mod_counter == mod_counter)) {
3529 /* it's this one... */
3530 #ifdef DEBUG
3531 printf("Found: %s at %d\n",
3532 ac->devname,c);
3533 #endif
3534 break;
3535 }
3536 ac=ac->next;
3537 }
3538 if (ac==NULL) {
3539 /* Didn't find one here! */
3540 /* special case for RAID 1, especially
3541 where there are more than 2
3542 components (where RAIDframe treats
3543 things a little differently :( ) */
3544 if (parity_type == '1') {
3545 if (c%2 == 0) { /* even component */
3546 even_pair_failed = 1;
3547 } else { /* odd component. If
3548 we're failed, and
3549 so is the even
3550 component, it's
3551 "Good Night, Charlie" */
3552 if (even_pair_failed == 1) {
3553 return(0);
3554 }
3555 }
3556 } else {
3557 /* normal accounting */
3558 num_missing++;
3559 }
3560 }
3561 if ((parity_type == '1') && (c%2 == 1)) {
3562 /* Just did an even component, and we didn't
3563 bail.. reset the even_pair_failed flag,
3564 and go on to the next component.... */
3565 even_pair_failed = 0;
3566 }
3567 }
3568
3569 clabel = cset->ac->clabel;
3570
3571 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3572 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3573 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3574 /* XXX this needs to be made *much* more general */
3575 /* Too many failures */
3576 return(0);
3577 }
3578 /* otherwise, all is well, and we've got enough to take a kick
3579 at autoconfiguring this set */
3580 return(1);
3581 }
3582
3583 void
3584 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3585 RF_Raid_t *raidPtr)
3586 {
3587 RF_ComponentLabel_t *clabel;
3588 int i;
3589
3590 clabel = ac->clabel;
3591
3592 /* 1. Fill in the common stuff */
3593 config->numRow = clabel->num_rows = 1;
3594 config->numCol = clabel->num_columns;
3595 config->numSpare = 0; /* XXX should this be set here? */
3596 config->sectPerSU = clabel->sectPerSU;
3597 config->SUsPerPU = clabel->SUsPerPU;
3598 config->SUsPerRU = clabel->SUsPerRU;
3599 config->parityConfig = clabel->parityConfig;
3600 /* XXX... */
3601 strcpy(config->diskQueueType,"fifo");
3602 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3603 config->layoutSpecificSize = 0; /* XXX ?? */
3604
3605 while(ac!=NULL) {
3606 /* row/col values will be in range due to the checks
3607 in reasonable_label() */
3608 strcpy(config->devnames[0][ac->clabel->column],
3609 ac->devname);
3610 ac = ac->next;
3611 }
3612
3613 for(i=0;i<RF_MAXDBGV;i++) {
3614 config->debugVars[i][0] = 0;
3615 }
3616 }
3617
3618 int
3619 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3620 {
3621 RF_ComponentLabel_t *clabel;
3622 int column;
3623 int sparecol;
3624
3625 raidPtr->autoconfigure = new_value;
3626
3627 for(column=0; column<raidPtr->numCol; column++) {
3628 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3629 clabel = raidget_component_label(raidPtr, column);
3630 clabel->autoconfigure = new_value;
3631 raidflush_component_label(raidPtr, column);
3632 }
3633 }
3634 for(column = 0; column < raidPtr->numSpare ; column++) {
3635 sparecol = raidPtr->numCol + column;
3636 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3637 clabel = raidget_component_label(raidPtr, sparecol);
3638 clabel->autoconfigure = new_value;
3639 raidflush_component_label(raidPtr, sparecol);
3640 }
3641 }
3642 return(new_value);
3643 }
3644
3645 int
3646 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3647 {
3648 RF_ComponentLabel_t *clabel;
3649 int column;
3650 int sparecol;
3651
3652 raidPtr->root_partition = new_value;
3653 for(column=0; column<raidPtr->numCol; column++) {
3654 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3655 clabel = raidget_component_label(raidPtr, column);
3656 clabel->root_partition = new_value;
3657 raidflush_component_label(raidPtr, column);
3658 }
3659 }
3660 for(column = 0; column < raidPtr->numSpare ; column++) {
3661 sparecol = raidPtr->numCol + column;
3662 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3663 clabel = raidget_component_label(raidPtr, sparecol);
3664 clabel->root_partition = new_value;
3665 raidflush_component_label(raidPtr, sparecol);
3666 }
3667 }
3668 return(new_value);
3669 }
3670
3671 void
3672 rf_release_all_vps(RF_ConfigSet_t *cset)
3673 {
3674 RF_AutoConfig_t *ac;
3675
3676 ac = cset->ac;
3677 while(ac!=NULL) {
3678 /* Close the vp, and give it back */
3679 if (ac->vp) {
3680 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3681 VOP_CLOSE(ac->vp, FREAD, NOCRED);
3682 vput(ac->vp);
3683 ac->vp = NULL;
3684 }
3685 ac = ac->next;
3686 }
3687 }
3688
3689
3690 void
3691 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3692 {
3693 RF_AutoConfig_t *ac;
3694 RF_AutoConfig_t *next_ac;
3695
3696 ac = cset->ac;
3697 while(ac!=NULL) {
3698 next_ac = ac->next;
3699 /* nuke the label */
3700 free(ac->clabel, M_RAIDFRAME);
3701 /* cleanup the config structure */
3702 free(ac, M_RAIDFRAME);
3703 /* "next.." */
3704 ac = next_ac;
3705 }
3706 /* and, finally, nuke the config set */
3707 free(cset, M_RAIDFRAME);
3708 }
3709
3710
3711 void
3712 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3713 {
3714 /* current version number */
3715 clabel->version = RF_COMPONENT_LABEL_VERSION;
3716 clabel->serial_number = raidPtr->serial_number;
3717 clabel->mod_counter = raidPtr->mod_counter;
3718
3719 clabel->num_rows = 1;
3720 clabel->num_columns = raidPtr->numCol;
3721 clabel->clean = RF_RAID_DIRTY; /* not clean */
3722 clabel->status = rf_ds_optimal; /* "It's good!" */
3723
3724 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3725 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3726 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3727
3728 clabel->blockSize = raidPtr->bytesPerSector;
3729 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3730
3731 /* XXX not portable */
3732 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3733 clabel->maxOutstanding = raidPtr->maxOutstanding;
3734 clabel->autoconfigure = raidPtr->autoconfigure;
3735 clabel->root_partition = raidPtr->root_partition;
3736 clabel->last_unit = raidPtr->raidid;
3737 clabel->config_order = raidPtr->config_order;
3738
3739 #ifndef RF_NO_PARITY_MAP
3740 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3741 #endif
3742 }
3743
3744 struct raid_softc *
3745 rf_auto_config_set(RF_ConfigSet_t *cset)
3746 {
3747 RF_Raid_t *raidPtr;
3748 RF_Config_t *config;
3749 int raidID;
3750 struct raid_softc *sc;
3751
3752 #ifdef DEBUG
3753 printf("RAID autoconfigure\n");
3754 #endif
3755
3756 /* 1. Create a config structure */
3757 config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO);
3758 if (config == NULL) {
3759 printf("Out of mem!?!?\n");
3760 /* XXX do something more intelligent here. */
3761 return NULL;
3762 }
3763
3764 /*
3765 2. Figure out what RAID ID this one is supposed to live at
3766 See if we can get the same RAID dev that it was configured
3767 on last time..
3768 */
3769
3770 raidID = cset->ac->clabel->last_unit;
3771 for (sc = raidget(raidID); sc->sc_r.valid != 0; sc = raidget(++raidID))
3772 continue;
3773 #ifdef DEBUG
3774 printf("Configuring raid%d:\n",raidID);
3775 #endif
3776
3777 raidPtr = &sc->sc_r;
3778
3779 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3780 raidPtr->softc = sc;
3781 raidPtr->raidid = raidID;
3782 raidPtr->openings = RAIDOUTSTANDING;
3783
3784 /* 3. Build the configuration structure */
3785 rf_create_configuration(cset->ac, config, raidPtr);
3786
3787 /* 4. Do the configuration */
3788 if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3789 raidinit(sc);
3790
3791 rf_markalldirty(raidPtr);
3792 raidPtr->autoconfigure = 1; /* XXX do this here? */
3793 if (cset->ac->clabel->root_partition==1) {
3794 /* everything configured just fine. Make a note
3795 that this set is eligible to be root. */
3796 cset->rootable = 1;
3797 /* XXX do this here? */
3798 raidPtr->root_partition = 1;
3799 }
3800 } else {
3801 raidput(sc);
3802 sc = NULL;
3803 }
3804
3805 /* 5. Cleanup */
3806 free(config, M_RAIDFRAME);
3807 return sc;
3808 }
3809
3810 void
3811 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3812 {
3813 struct buf *bp;
3814 struct raid_softc *rs;
3815
3816 bp = (struct buf *)desc->bp;
3817 rs = desc->raidPtr->softc;
3818 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid),
3819 (bp->b_flags & B_READ));
3820 }
3821
3822 void
3823 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3824 size_t xmin, size_t xmax)
3825 {
3826 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3827 pool_sethiwat(p, xmax);
3828 pool_prime(p, xmin);
3829 pool_setlowat(p, xmin);
3830 }
3831
3832 /*
3833 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buf_queue to see
3834 * if there is IO pending and if that IO could possibly be done for a
3835 * given RAID set. Returns 0 if IO is waiting and can be done, 1
3836 * otherwise.
3837 *
3838 */
3839
3840 int
3841 rf_buf_queue_check(RF_Raid_t *raidPtr)
3842 {
3843 struct raid_softc *rs = raidPtr->softc;
3844 if ((bufq_peek(rs->buf_queue) != NULL) && raidPtr->openings > 0) {
3845 /* there is work to do */
3846 return 0;
3847 }
3848 /* default is nothing to do */
3849 return 1;
3850 }
3851
3852 int
3853 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3854 {
3855 uint64_t numsecs;
3856 unsigned secsize;
3857 int error;
3858
3859 error = getdisksize(vp, &numsecs, &secsize);
3860 if (error == 0) {
3861 diskPtr->blockSize = secsize;
3862 diskPtr->numBlocks = numsecs - rf_protectedSectors;
3863 diskPtr->partitionSize = numsecs;
3864 return 0;
3865 }
3866 return error;
3867 }
3868
3869 static int
3870 raid_match(device_t self, cfdata_t cfdata, void *aux)
3871 {
3872 return 1;
3873 }
3874
3875 static void
3876 raid_attach(device_t parent, device_t self, void *aux)
3877 {
3878
3879 }
3880
3881
3882 static int
3883 raid_detach(device_t self, int flags)
3884 {
3885 int error;
3886 struct raid_softc *rs = raidget(device_unit(self));
3887
3888 if (rs == NULL)
3889 return ENXIO;
3890
3891 if ((error = raidlock(rs)) != 0)
3892 return (error);
3893
3894 error = raid_detach_unlocked(rs);
3895
3896 raidunlock(rs);
3897
3898 /* XXXkd: raidput(rs) ??? */
3899
3900 return error;
3901 }
3902
3903 static void
3904 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
3905 {
3906 struct disk_geom *dg = &rs->sc_dkdev.dk_geom;
3907
3908 memset(dg, 0, sizeof(*dg));
3909
3910 dg->dg_secperunit = raidPtr->totalSectors;
3911 dg->dg_secsize = raidPtr->bytesPerSector;
3912 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
3913 dg->dg_ntracks = 4 * raidPtr->numCol;
3914
3915 disk_set_info(rs->sc_dev, &rs->sc_dkdev, NULL);
3916 }
3917
3918 /*
3919 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3920 * We end up returning whatever error was returned by the first cache flush
3921 * that fails.
3922 */
3923
3924 int
3925 rf_sync_component_caches(RF_Raid_t *raidPtr)
3926 {
3927 int c, sparecol;
3928 int e,error;
3929 int force = 1;
3930
3931 error = 0;
3932 for (c = 0; c < raidPtr->numCol; c++) {
3933 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3934 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3935 &force, FWRITE, NOCRED);
3936 if (e) {
3937 if (e != ENODEV)
3938 printf("raid%d: cache flush to component %s failed.\n",
3939 raidPtr->raidid, raidPtr->Disks[c].devname);
3940 if (error == 0) {
3941 error = e;
3942 }
3943 }
3944 }
3945 }
3946
3947 for( c = 0; c < raidPtr->numSpare ; c++) {
3948 sparecol = raidPtr->numCol + c;
3949 /* Need to ensure that the reconstruct actually completed! */
3950 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3951 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3952 DIOCCACHESYNC, &force, FWRITE, NOCRED);
3953 if (e) {
3954 if (e != ENODEV)
3955 printf("raid%d: cache flush to component %s failed.\n",
3956 raidPtr->raidid, raidPtr->Disks[sparecol].devname);
3957 if (error == 0) {
3958 error = e;
3959 }
3960 }
3961 }
3962 }
3963 return error;
3964 }
3965