rf_netbsdkintf.c revision 1.311 1 /* $NetBSD: rf_netbsdkintf.c,v 1.311 2014/07/25 08:02:20 dholland Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: cd.c 1.6 90/11/28$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
89 * School of Computer Science
90 * Carnegie Mellon University
91 * Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97 /***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.311 2014/07/25 08:02:20 dholland Exp $");
105
106 #ifdef _KERNEL_OPT
107 #include "opt_compat_netbsd.h"
108 #include "opt_raid_autoconfig.h"
109 #endif
110
111 #include <sys/param.h>
112 #include <sys/errno.h>
113 #include <sys/pool.h>
114 #include <sys/proc.h>
115 #include <sys/queue.h>
116 #include <sys/disk.h>
117 #include <sys/device.h>
118 #include <sys/stat.h>
119 #include <sys/ioctl.h>
120 #include <sys/fcntl.h>
121 #include <sys/systm.h>
122 #include <sys/vnode.h>
123 #include <sys/disklabel.h>
124 #include <sys/conf.h>
125 #include <sys/buf.h>
126 #include <sys/bufq.h>
127 #include <sys/reboot.h>
128 #include <sys/kauth.h>
129
130 #include <prop/proplib.h>
131
132 #include <dev/raidframe/raidframevar.h>
133 #include <dev/raidframe/raidframeio.h>
134 #include <dev/raidframe/rf_paritymap.h>
135
136 #include "rf_raid.h"
137 #include "rf_copyback.h"
138 #include "rf_dag.h"
139 #include "rf_dagflags.h"
140 #include "rf_desc.h"
141 #include "rf_diskqueue.h"
142 #include "rf_etimer.h"
143 #include "rf_general.h"
144 #include "rf_kintf.h"
145 #include "rf_options.h"
146 #include "rf_driver.h"
147 #include "rf_parityscan.h"
148 #include "rf_threadstuff.h"
149
150 #ifdef COMPAT_50
151 #include "rf_compat50.h"
152 #endif
153
154 #ifdef DEBUG
155 int rf_kdebug_level = 0;
156 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
157 #else /* DEBUG */
158 #define db1_printf(a) { }
159 #endif /* DEBUG */
160
161 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
162 static rf_declare_mutex2(rf_sparet_wait_mutex);
163 static rf_declare_cond2(rf_sparet_wait_cv);
164 static rf_declare_cond2(rf_sparet_resp_cv);
165
166 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
167 * spare table */
168 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
169 * installation process */
170 #endif
171
172 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
173
174 /* prototypes */
175 static void KernelWakeupFunc(struct buf *);
176 static void InitBP(struct buf *, struct vnode *, unsigned,
177 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
178 void *, int, struct proc *);
179 struct raid_softc;
180 static void raidinit(struct raid_softc *);
181
182 void raidattach(int);
183 static int raid_match(device_t, cfdata_t, void *);
184 static void raid_attach(device_t, device_t, void *);
185 static int raid_detach(device_t, int);
186
187 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
188 daddr_t, daddr_t);
189 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
190 daddr_t, daddr_t, int);
191
192 static int raidwrite_component_label(unsigned,
193 dev_t, struct vnode *, RF_ComponentLabel_t *);
194 static int raidread_component_label(unsigned,
195 dev_t, struct vnode *, RF_ComponentLabel_t *);
196
197
198 dev_type_open(raidopen);
199 dev_type_close(raidclose);
200 dev_type_read(raidread);
201 dev_type_write(raidwrite);
202 dev_type_ioctl(raidioctl);
203 dev_type_strategy(raidstrategy);
204 dev_type_dump(raiddump);
205 dev_type_size(raidsize);
206
207 const struct bdevsw raid_bdevsw = {
208 .d_open = raidopen,
209 .d_close = raidclose,
210 .d_strategy = raidstrategy,
211 .d_ioctl = raidioctl,
212 .d_dump = raiddump,
213 .d_psize = raidsize,
214 .d_discard = nodiscard,
215 .d_flag = D_DISK
216 };
217
218 const struct cdevsw raid_cdevsw = {
219 .d_open = raidopen,
220 .d_close = raidclose,
221 .d_read = raidread,
222 .d_write = raidwrite,
223 .d_ioctl = raidioctl,
224 .d_stop = nostop,
225 .d_tty = notty,
226 .d_poll = nopoll,
227 .d_mmap = nommap,
228 .d_kqfilter = nokqfilter,
229 .d_flag = D_DISK
230 };
231
232 static struct dkdriver rf_dkdriver = { raidstrategy, minphys };
233
234 struct raid_softc {
235 device_t sc_dev;
236 int sc_unit;
237 int sc_flags; /* flags */
238 int sc_cflags; /* configuration flags */
239 uint64_t sc_size; /* size of the raid device */
240 char sc_xname[20]; /* XXX external name */
241 struct disk sc_dkdev; /* generic disk device info */
242 struct bufq_state *buf_queue; /* used for the device queue */
243 RF_Raid_t sc_r;
244 LIST_ENTRY(raid_softc) sc_link;
245 };
246 /* sc_flags */
247 #define RAIDF_INITED 0x01 /* unit has been initialized */
248 #define RAIDF_WLABEL 0x02 /* label area is writable */
249 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
250 #define RAIDF_SHUTDOWN 0x08 /* unit is being shutdown */
251 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
252 #define RAIDF_LOCKED 0x80 /* unit is locked */
253
254 #define raidunit(x) DISKUNIT(x)
255
256 extern struct cfdriver raid_cd;
257 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
258 raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
259 DVF_DETACH_SHUTDOWN);
260
261 /*
262 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
263 * Be aware that large numbers can allow the driver to consume a lot of
264 * kernel memory, especially on writes, and in degraded mode reads.
265 *
266 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
267 * a single 64K write will typically require 64K for the old data,
268 * 64K for the old parity, and 64K for the new parity, for a total
269 * of 192K (if the parity buffer is not re-used immediately).
270 * Even it if is used immediately, that's still 128K, which when multiplied
271 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
272 *
273 * Now in degraded mode, for example, a 64K read on the above setup may
274 * require data reconstruction, which will require *all* of the 4 remaining
275 * disks to participate -- 4 * 32K/disk == 128K again.
276 */
277
278 #ifndef RAIDOUTSTANDING
279 #define RAIDOUTSTANDING 6
280 #endif
281
282 #define RAIDLABELDEV(dev) \
283 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
284
285 /* declared here, and made public, for the benefit of KVM stuff.. */
286
287 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
288 struct disklabel *);
289 static void raidgetdisklabel(dev_t);
290 static void raidmakedisklabel(struct raid_softc *);
291
292 static int raidlock(struct raid_softc *);
293 static void raidunlock(struct raid_softc *);
294
295 static int raid_detach_unlocked(struct raid_softc *);
296
297 static void rf_markalldirty(RF_Raid_t *);
298 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
299
300 void rf_ReconThread(struct rf_recon_req *);
301 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
302 void rf_CopybackThread(RF_Raid_t *raidPtr);
303 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
304 int rf_autoconfig(device_t);
305 void rf_buildroothack(RF_ConfigSet_t *);
306
307 RF_AutoConfig_t *rf_find_raid_components(void);
308 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
309 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
310 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
311 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
312 int rf_set_autoconfig(RF_Raid_t *, int);
313 int rf_set_rootpartition(RF_Raid_t *, int);
314 void rf_release_all_vps(RF_ConfigSet_t *);
315 void rf_cleanup_config_set(RF_ConfigSet_t *);
316 int rf_have_enough_components(RF_ConfigSet_t *);
317 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
318 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
319
320 /*
321 * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
322 * Note that this is overridden by having RAID_AUTOCONFIG as an option
323 * in the kernel config file.
324 */
325 #ifdef RAID_AUTOCONFIG
326 int raidautoconfig = 1;
327 #else
328 int raidautoconfig = 0;
329 #endif
330 static bool raidautoconfigdone = false;
331
332 struct RF_Pools_s rf_pools;
333
334 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
335 static kmutex_t raid_lock;
336
337 static struct raid_softc *
338 raidcreate(int unit) {
339 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
340 if (sc == NULL) {
341 #ifdef DIAGNOSTIC
342 printf("%s: out of memory\n", __func__);
343 #endif
344 return NULL;
345 }
346 sc->sc_unit = unit;
347 bufq_alloc(&sc->buf_queue, "fcfs", BUFQ_SORT_RAWBLOCK);
348 return sc;
349 }
350
351 static void
352 raiddestroy(struct raid_softc *sc) {
353 bufq_free(sc->buf_queue);
354 kmem_free(sc, sizeof(*sc));
355 }
356
357 static struct raid_softc *
358 raidget(int unit) {
359 struct raid_softc *sc;
360 if (unit < 0) {
361 #ifdef DIAGNOSTIC
362 panic("%s: unit %d!", __func__, unit);
363 #endif
364 return NULL;
365 }
366 mutex_enter(&raid_lock);
367 LIST_FOREACH(sc, &raids, sc_link) {
368 if (sc->sc_unit == unit) {
369 mutex_exit(&raid_lock);
370 return sc;
371 }
372 }
373 mutex_exit(&raid_lock);
374 if ((sc = raidcreate(unit)) == NULL)
375 return NULL;
376 mutex_enter(&raid_lock);
377 LIST_INSERT_HEAD(&raids, sc, sc_link);
378 mutex_exit(&raid_lock);
379 return sc;
380 }
381
382 static void
383 raidput(struct raid_softc *sc) {
384 mutex_enter(&raid_lock);
385 LIST_REMOVE(sc, sc_link);
386 mutex_exit(&raid_lock);
387 raiddestroy(sc);
388 }
389
390 void
391 raidattach(int num)
392 {
393 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
394 /* This is where all the initialization stuff gets done. */
395
396 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
397 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
398 rf_init_cond2(rf_sparet_wait_cv, "sparetw");
399 rf_init_cond2(rf_sparet_resp_cv, "rfgst");
400
401 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
402 #endif
403
404 if (rf_BootRaidframe() == 0)
405 aprint_verbose("Kernelized RAIDframe activated\n");
406 else
407 panic("Serious error booting RAID!!");
408
409 if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) {
410 aprint_error("raidattach: config_cfattach_attach failed?\n");
411 }
412
413 raidautoconfigdone = false;
414
415 /*
416 * Register a finalizer which will be used to auto-config RAID
417 * sets once all real hardware devices have been found.
418 */
419 if (config_finalize_register(NULL, rf_autoconfig) != 0)
420 aprint_error("WARNING: unable to register RAIDframe finalizer\n");
421 }
422
423 int
424 rf_autoconfig(device_t self)
425 {
426 RF_AutoConfig_t *ac_list;
427 RF_ConfigSet_t *config_sets;
428
429 if (!raidautoconfig || raidautoconfigdone == true)
430 return (0);
431
432 /* XXX This code can only be run once. */
433 raidautoconfigdone = true;
434
435 #ifdef __HAVE_CPU_BOOTCONF
436 /*
437 * 0. find the boot device if needed first so we can use it later
438 * this needs to be done before we autoconfigure any raid sets,
439 * because if we use wedges we are not going to be able to open
440 * the boot device later
441 */
442 if (booted_device == NULL)
443 cpu_bootconf();
444 #endif
445 /* 1. locate all RAID components on the system */
446 aprint_debug("Searching for RAID components...\n");
447 ac_list = rf_find_raid_components();
448
449 /* 2. Sort them into their respective sets. */
450 config_sets = rf_create_auto_sets(ac_list);
451
452 /*
453 * 3. Evaluate each set and configure the valid ones.
454 * This gets done in rf_buildroothack().
455 */
456 rf_buildroothack(config_sets);
457
458 return 1;
459 }
460
461 static int
462 rf_containsboot(RF_Raid_t *r, device_t bdv) {
463 const char *bootname = device_xname(bdv);
464 size_t len = strlen(bootname);
465
466 for (int col = 0; col < r->numCol; col++) {
467 const char *devname = r->Disks[col].devname;
468 devname += sizeof("/dev/") - 1;
469 if (strncmp(devname, "dk", 2) == 0) {
470 const char *parent =
471 dkwedge_get_parent_name(r->Disks[col].dev);
472 if (parent != NULL)
473 devname = parent;
474 }
475 if (strncmp(devname, bootname, len) == 0) {
476 struct raid_softc *sc = r->softc;
477 aprint_debug("raid%d includes boot device %s\n",
478 sc->sc_unit, devname);
479 return 1;
480 }
481 }
482 return 0;
483 }
484
485 void
486 rf_buildroothack(RF_ConfigSet_t *config_sets)
487 {
488 RF_ConfigSet_t *cset;
489 RF_ConfigSet_t *next_cset;
490 int num_root;
491 struct raid_softc *sc, *rsc;
492
493 sc = rsc = NULL;
494 num_root = 0;
495 cset = config_sets;
496 while (cset != NULL) {
497 next_cset = cset->next;
498 if (rf_have_enough_components(cset) &&
499 cset->ac->clabel->autoconfigure == 1) {
500 sc = rf_auto_config_set(cset);
501 if (sc != NULL) {
502 aprint_debug("raid%d: configured ok\n",
503 sc->sc_unit);
504 if (cset->rootable) {
505 rsc = sc;
506 num_root++;
507 }
508 } else {
509 /* The autoconfig didn't work :( */
510 aprint_debug("Autoconfig failed\n");
511 rf_release_all_vps(cset);
512 }
513 } else {
514 /* we're not autoconfiguring this set...
515 release the associated resources */
516 rf_release_all_vps(cset);
517 }
518 /* cleanup */
519 rf_cleanup_config_set(cset);
520 cset = next_cset;
521 }
522
523 /* if the user has specified what the root device should be
524 then we don't touch booted_device or boothowto... */
525
526 if (rootspec != NULL)
527 return;
528
529 /* we found something bootable... */
530
531 /*
532 * XXX: The following code assumes that the root raid
533 * is the first ('a') partition. This is about the best
534 * we can do with a BSD disklabel, but we might be able
535 * to do better with a GPT label, by setting a specified
536 * attribute to indicate the root partition. We can then
537 * stash the partition number in the r->root_partition
538 * high bits (the bottom 2 bits are already used). For
539 * now we just set booted_partition to 0 when we override
540 * root.
541 */
542 if (num_root == 1) {
543 device_t candidate_root;
544 if (rsc->sc_dkdev.dk_nwedges != 0) {
545 char cname[sizeof(cset->ac->devname)];
546 /* XXX: assume 'a' */
547 snprintf(cname, sizeof(cname), "%s%c",
548 device_xname(rsc->sc_dev), 'a');
549 candidate_root = dkwedge_find_by_wname(cname);
550 } else
551 candidate_root = rsc->sc_dev;
552 if (booted_device == NULL ||
553 rsc->sc_r.root_partition == 1 ||
554 rf_containsboot(&rsc->sc_r, booted_device)) {
555 booted_device = candidate_root;
556 booted_partition = 0; /* XXX assume 'a' */
557 }
558 } else if (num_root > 1) {
559
560 /*
561 * Maybe the MD code can help. If it cannot, then
562 * setroot() will discover that we have no
563 * booted_device and will ask the user if nothing was
564 * hardwired in the kernel config file
565 */
566 if (booted_device == NULL)
567 return;
568
569 num_root = 0;
570 mutex_enter(&raid_lock);
571 LIST_FOREACH(sc, &raids, sc_link) {
572 RF_Raid_t *r = &sc->sc_r;
573 if (r->valid == 0)
574 continue;
575
576 if (r->root_partition == 0)
577 continue;
578
579 if (rf_containsboot(r, booted_device)) {
580 num_root++;
581 rsc = sc;
582 }
583 }
584 mutex_exit(&raid_lock);
585
586 if (num_root == 1) {
587 booted_device = rsc->sc_dev;
588 booted_partition = 0; /* XXX assume 'a' */
589 } else {
590 /* we can't guess.. require the user to answer... */
591 boothowto |= RB_ASKNAME;
592 }
593 }
594 }
595
596
597 int
598 raidsize(dev_t dev)
599 {
600 struct raid_softc *rs;
601 struct disklabel *lp;
602 int part, unit, omask, size;
603
604 unit = raidunit(dev);
605 if ((rs = raidget(unit)) == NULL)
606 return -1;
607 if ((rs->sc_flags & RAIDF_INITED) == 0)
608 return (-1);
609
610 part = DISKPART(dev);
611 omask = rs->sc_dkdev.dk_openmask & (1 << part);
612 lp = rs->sc_dkdev.dk_label;
613
614 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
615 return (-1);
616
617 if (lp->d_partitions[part].p_fstype != FS_SWAP)
618 size = -1;
619 else
620 size = lp->d_partitions[part].p_size *
621 (lp->d_secsize / DEV_BSIZE);
622
623 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
624 return (-1);
625
626 return (size);
627
628 }
629
630 int
631 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
632 {
633 int unit = raidunit(dev);
634 struct raid_softc *rs;
635 const struct bdevsw *bdev;
636 struct disklabel *lp;
637 RF_Raid_t *raidPtr;
638 daddr_t offset;
639 int part, c, sparecol, j, scol, dumpto;
640 int error = 0;
641
642 if ((rs = raidget(unit)) == NULL)
643 return ENXIO;
644
645 raidPtr = &rs->sc_r;
646
647 if ((rs->sc_flags & RAIDF_INITED) == 0)
648 return ENXIO;
649
650 /* we only support dumping to RAID 1 sets */
651 if (raidPtr->Layout.numDataCol != 1 ||
652 raidPtr->Layout.numParityCol != 1)
653 return EINVAL;
654
655
656 if ((error = raidlock(rs)) != 0)
657 return error;
658
659 if (size % DEV_BSIZE != 0) {
660 error = EINVAL;
661 goto out;
662 }
663
664 if (blkno + size / DEV_BSIZE > rs->sc_size) {
665 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
666 "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
667 size / DEV_BSIZE, rs->sc_size);
668 error = EINVAL;
669 goto out;
670 }
671
672 part = DISKPART(dev);
673 lp = rs->sc_dkdev.dk_label;
674 offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
675
676 /* figure out what device is alive.. */
677
678 /*
679 Look for a component to dump to. The preference for the
680 component to dump to is as follows:
681 1) the master
682 2) a used_spare of the master
683 3) the slave
684 4) a used_spare of the slave
685 */
686
687 dumpto = -1;
688 for (c = 0; c < raidPtr->numCol; c++) {
689 if (raidPtr->Disks[c].status == rf_ds_optimal) {
690 /* this might be the one */
691 dumpto = c;
692 break;
693 }
694 }
695
696 /*
697 At this point we have possibly selected a live master or a
698 live slave. We now check to see if there is a spared
699 master (or a spared slave), if we didn't find a live master
700 or a live slave.
701 */
702
703 for (c = 0; c < raidPtr->numSpare; c++) {
704 sparecol = raidPtr->numCol + c;
705 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
706 /* How about this one? */
707 scol = -1;
708 for(j=0;j<raidPtr->numCol;j++) {
709 if (raidPtr->Disks[j].spareCol == sparecol) {
710 scol = j;
711 break;
712 }
713 }
714 if (scol == 0) {
715 /*
716 We must have found a spared master!
717 We'll take that over anything else
718 found so far. (We couldn't have
719 found a real master before, since
720 this is a used spare, and it's
721 saying that it's replacing the
722 master.) On reboot (with
723 autoconfiguration turned on)
724 sparecol will become the 1st
725 component (component0) of this set.
726 */
727 dumpto = sparecol;
728 break;
729 } else if (scol != -1) {
730 /*
731 Must be a spared slave. We'll dump
732 to that if we havn't found anything
733 else so far.
734 */
735 if (dumpto == -1)
736 dumpto = sparecol;
737 }
738 }
739 }
740
741 if (dumpto == -1) {
742 /* we couldn't find any live components to dump to!?!?
743 */
744 error = EINVAL;
745 goto out;
746 }
747
748 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
749
750 /*
751 Note that blkno is relative to this particular partition.
752 By adding the offset of this partition in the RAID
753 set, and also adding RF_PROTECTED_SECTORS, we get a
754 value that is relative to the partition used for the
755 underlying component.
756 */
757
758 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
759 blkno + offset, va, size);
760
761 out:
762 raidunlock(rs);
763
764 return error;
765 }
766 /* ARGSUSED */
767 int
768 raidopen(dev_t dev, int flags, int fmt,
769 struct lwp *l)
770 {
771 int unit = raidunit(dev);
772 struct raid_softc *rs;
773 struct disklabel *lp;
774 int part, pmask;
775 int error = 0;
776
777 if ((rs = raidget(unit)) == NULL)
778 return ENXIO;
779 if ((error = raidlock(rs)) != 0)
780 return (error);
781
782 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
783 error = EBUSY;
784 goto bad;
785 }
786
787 lp = rs->sc_dkdev.dk_label;
788
789 part = DISKPART(dev);
790
791 /*
792 * If there are wedges, and this is not RAW_PART, then we
793 * need to fail.
794 */
795 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
796 error = EBUSY;
797 goto bad;
798 }
799 pmask = (1 << part);
800
801 if ((rs->sc_flags & RAIDF_INITED) &&
802 (rs->sc_dkdev.dk_openmask == 0))
803 raidgetdisklabel(dev);
804
805 /* make sure that this partition exists */
806
807 if (part != RAW_PART) {
808 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
809 ((part >= lp->d_npartitions) ||
810 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
811 error = ENXIO;
812 goto bad;
813 }
814 }
815 /* Prevent this unit from being unconfigured while open. */
816 switch (fmt) {
817 case S_IFCHR:
818 rs->sc_dkdev.dk_copenmask |= pmask;
819 break;
820
821 case S_IFBLK:
822 rs->sc_dkdev.dk_bopenmask |= pmask;
823 break;
824 }
825
826 if ((rs->sc_dkdev.dk_openmask == 0) &&
827 ((rs->sc_flags & RAIDF_INITED) != 0)) {
828 /* First one... mark things as dirty... Note that we *MUST*
829 have done a configure before this. I DO NOT WANT TO BE
830 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
831 THAT THEY BELONG TOGETHER!!!!! */
832 /* XXX should check to see if we're only open for reading
833 here... If so, we needn't do this, but then need some
834 other way of keeping track of what's happened.. */
835
836 rf_markalldirty(&rs->sc_r);
837 }
838
839
840 rs->sc_dkdev.dk_openmask =
841 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
842
843 bad:
844 raidunlock(rs);
845
846 return (error);
847
848
849 }
850 /* ARGSUSED */
851 int
852 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
853 {
854 int unit = raidunit(dev);
855 struct raid_softc *rs;
856 int error = 0;
857 int part;
858
859 if ((rs = raidget(unit)) == NULL)
860 return ENXIO;
861
862 if ((error = raidlock(rs)) != 0)
863 return (error);
864
865 part = DISKPART(dev);
866
867 /* ...that much closer to allowing unconfiguration... */
868 switch (fmt) {
869 case S_IFCHR:
870 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
871 break;
872
873 case S_IFBLK:
874 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
875 break;
876 }
877 rs->sc_dkdev.dk_openmask =
878 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
879
880 if ((rs->sc_dkdev.dk_openmask == 0) &&
881 ((rs->sc_flags & RAIDF_INITED) != 0)) {
882 /* Last one... device is not unconfigured yet.
883 Device shutdown has taken care of setting the
884 clean bits if RAIDF_INITED is not set
885 mark things as clean... */
886
887 rf_update_component_labels(&rs->sc_r,
888 RF_FINAL_COMPONENT_UPDATE);
889
890 /* If the kernel is shutting down, it will detach
891 * this RAID set soon enough.
892 */
893 }
894
895 raidunlock(rs);
896 return (0);
897
898 }
899
900 void
901 raidstrategy(struct buf *bp)
902 {
903 unsigned int unit = raidunit(bp->b_dev);
904 RF_Raid_t *raidPtr;
905 int wlabel;
906 struct raid_softc *rs;
907
908 if ((rs = raidget(unit)) == NULL) {
909 bp->b_error = ENXIO;
910 goto done;
911 }
912 if ((rs->sc_flags & RAIDF_INITED) == 0) {
913 bp->b_error = ENXIO;
914 goto done;
915 }
916 raidPtr = &rs->sc_r;
917 if (!raidPtr->valid) {
918 bp->b_error = ENODEV;
919 goto done;
920 }
921 if (bp->b_bcount == 0) {
922 db1_printf(("b_bcount is zero..\n"));
923 goto done;
924 }
925
926 /*
927 * Do bounds checking and adjust transfer. If there's an
928 * error, the bounds check will flag that for us.
929 */
930
931 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
932 if (DISKPART(bp->b_dev) == RAW_PART) {
933 uint64_t size; /* device size in DEV_BSIZE unit */
934
935 if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
936 size = raidPtr->totalSectors <<
937 (raidPtr->logBytesPerSector - DEV_BSHIFT);
938 } else {
939 size = raidPtr->totalSectors >>
940 (DEV_BSHIFT - raidPtr->logBytesPerSector);
941 }
942 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
943 goto done;
944 }
945 } else {
946 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
947 db1_printf(("Bounds check failed!!:%d %d\n",
948 (int) bp->b_blkno, (int) wlabel));
949 goto done;
950 }
951 }
952
953 rf_lock_mutex2(raidPtr->iodone_lock);
954
955 bp->b_resid = 0;
956
957 /* stuff it onto our queue */
958 bufq_put(rs->buf_queue, bp);
959
960 /* scheduled the IO to happen at the next convenient time */
961 rf_signal_cond2(raidPtr->iodone_cv);
962 rf_unlock_mutex2(raidPtr->iodone_lock);
963
964 return;
965
966 done:
967 bp->b_resid = bp->b_bcount;
968 biodone(bp);
969 }
970 /* ARGSUSED */
971 int
972 raidread(dev_t dev, struct uio *uio, int flags)
973 {
974 int unit = raidunit(dev);
975 struct raid_softc *rs;
976
977 if ((rs = raidget(unit)) == NULL)
978 return ENXIO;
979
980 if ((rs->sc_flags & RAIDF_INITED) == 0)
981 return (ENXIO);
982
983 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
984
985 }
986 /* ARGSUSED */
987 int
988 raidwrite(dev_t dev, struct uio *uio, int flags)
989 {
990 int unit = raidunit(dev);
991 struct raid_softc *rs;
992
993 if ((rs = raidget(unit)) == NULL)
994 return ENXIO;
995
996 if ((rs->sc_flags & RAIDF_INITED) == 0)
997 return (ENXIO);
998
999 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
1000
1001 }
1002
1003 static int
1004 raid_detach_unlocked(struct raid_softc *rs)
1005 {
1006 int error;
1007 RF_Raid_t *raidPtr;
1008
1009 raidPtr = &rs->sc_r;
1010
1011 /*
1012 * If somebody has a partition mounted, we shouldn't
1013 * shutdown.
1014 */
1015 if (rs->sc_dkdev.dk_openmask != 0)
1016 return EBUSY;
1017
1018 if ((rs->sc_flags & RAIDF_INITED) == 0)
1019 ; /* not initialized: nothing to do */
1020 else if ((error = rf_Shutdown(raidPtr)) != 0)
1021 return error;
1022 else
1023 rs->sc_flags &= ~(RAIDF_INITED|RAIDF_SHUTDOWN);
1024
1025 /* Detach the disk. */
1026 dkwedge_delall(&rs->sc_dkdev);
1027 disk_detach(&rs->sc_dkdev);
1028 disk_destroy(&rs->sc_dkdev);
1029
1030 aprint_normal_dev(rs->sc_dev, "detached\n");
1031
1032 return 0;
1033 }
1034
1035 int
1036 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1037 {
1038 int unit = raidunit(dev);
1039 int error = 0;
1040 int part, pmask, s;
1041 cfdata_t cf;
1042 struct raid_softc *rs;
1043 RF_Config_t *k_cfg, *u_cfg;
1044 RF_Raid_t *raidPtr;
1045 RF_RaidDisk_t *diskPtr;
1046 RF_AccTotals_t *totals;
1047 RF_DeviceConfig_t *d_cfg, **ucfgp;
1048 u_char *specific_buf;
1049 int retcode = 0;
1050 int column;
1051 /* int raidid; */
1052 struct rf_recon_req *rrcopy, *rr;
1053 RF_ComponentLabel_t *clabel;
1054 RF_ComponentLabel_t *ci_label;
1055 RF_ComponentLabel_t **clabel_ptr;
1056 RF_SingleComponent_t *sparePtr,*componentPtr;
1057 RF_SingleComponent_t component;
1058 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
1059 int i, j, d;
1060 #ifdef __HAVE_OLD_DISKLABEL
1061 struct disklabel newlabel;
1062 #endif
1063 struct dkwedge_info *dkw;
1064
1065 if ((rs = raidget(unit)) == NULL)
1066 return ENXIO;
1067 raidPtr = &rs->sc_r;
1068
1069 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1070 (int) DISKPART(dev), (int) unit, cmd));
1071
1072 /* Must be open for writes for these commands... */
1073 switch (cmd) {
1074 #ifdef DIOCGSECTORSIZE
1075 case DIOCGSECTORSIZE:
1076 *(u_int *)data = raidPtr->bytesPerSector;
1077 return 0;
1078 case DIOCGMEDIASIZE:
1079 *(off_t *)data =
1080 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
1081 return 0;
1082 #endif
1083 case DIOCSDINFO:
1084 case DIOCWDINFO:
1085 #ifdef __HAVE_OLD_DISKLABEL
1086 case ODIOCWDINFO:
1087 case ODIOCSDINFO:
1088 #endif
1089 case DIOCWLABEL:
1090 case DIOCAWEDGE:
1091 case DIOCDWEDGE:
1092 case DIOCSSTRATEGY:
1093 if ((flag & FWRITE) == 0)
1094 return (EBADF);
1095 }
1096
1097 /* Must be initialized for these... */
1098 switch (cmd) {
1099 case DIOCGDINFO:
1100 case DIOCSDINFO:
1101 case DIOCWDINFO:
1102 #ifdef __HAVE_OLD_DISKLABEL
1103 case ODIOCGDINFO:
1104 case ODIOCWDINFO:
1105 case ODIOCSDINFO:
1106 case ODIOCGDEFLABEL:
1107 #endif
1108 case DIOCGPART:
1109 case DIOCWLABEL:
1110 case DIOCGDEFLABEL:
1111 case DIOCAWEDGE:
1112 case DIOCDWEDGE:
1113 case DIOCLWEDGES:
1114 case DIOCCACHESYNC:
1115 case RAIDFRAME_SHUTDOWN:
1116 case RAIDFRAME_REWRITEPARITY:
1117 case RAIDFRAME_GET_INFO:
1118 case RAIDFRAME_RESET_ACCTOTALS:
1119 case RAIDFRAME_GET_ACCTOTALS:
1120 case RAIDFRAME_KEEP_ACCTOTALS:
1121 case RAIDFRAME_GET_SIZE:
1122 case RAIDFRAME_FAIL_DISK:
1123 case RAIDFRAME_COPYBACK:
1124 case RAIDFRAME_CHECK_RECON_STATUS:
1125 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1126 case RAIDFRAME_GET_COMPONENT_LABEL:
1127 case RAIDFRAME_SET_COMPONENT_LABEL:
1128 case RAIDFRAME_ADD_HOT_SPARE:
1129 case RAIDFRAME_REMOVE_HOT_SPARE:
1130 case RAIDFRAME_INIT_LABELS:
1131 case RAIDFRAME_REBUILD_IN_PLACE:
1132 case RAIDFRAME_CHECK_PARITY:
1133 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1134 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1135 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1136 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1137 case RAIDFRAME_SET_AUTOCONFIG:
1138 case RAIDFRAME_SET_ROOT:
1139 case RAIDFRAME_DELETE_COMPONENT:
1140 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1141 case RAIDFRAME_PARITYMAP_STATUS:
1142 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1143 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1144 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1145 case DIOCGSTRATEGY:
1146 case DIOCSSTRATEGY:
1147 if ((rs->sc_flags & RAIDF_INITED) == 0)
1148 return (ENXIO);
1149 }
1150
1151 switch (cmd) {
1152 #ifdef COMPAT_50
1153 case RAIDFRAME_GET_INFO50:
1154 return rf_get_info50(raidPtr, data);
1155
1156 case RAIDFRAME_CONFIGURE50:
1157 if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0)
1158 return retcode;
1159 goto config;
1160 #endif
1161 /* configure the system */
1162 case RAIDFRAME_CONFIGURE:
1163
1164 if (raidPtr->valid) {
1165 /* There is a valid RAID set running on this unit! */
1166 printf("raid%d: Device already configured!\n",unit);
1167 return(EINVAL);
1168 }
1169
1170 /* copy-in the configuration information */
1171 /* data points to a pointer to the configuration structure */
1172
1173 u_cfg = *((RF_Config_t **) data);
1174 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1175 if (k_cfg == NULL) {
1176 return (ENOMEM);
1177 }
1178 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1179 if (retcode) {
1180 RF_Free(k_cfg, sizeof(RF_Config_t));
1181 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1182 retcode));
1183 return (retcode);
1184 }
1185 goto config;
1186 config:
1187 /* allocate a buffer for the layout-specific data, and copy it
1188 * in */
1189 if (k_cfg->layoutSpecificSize) {
1190 if (k_cfg->layoutSpecificSize > 10000) {
1191 /* sanity check */
1192 RF_Free(k_cfg, sizeof(RF_Config_t));
1193 return (EINVAL);
1194 }
1195 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1196 (u_char *));
1197 if (specific_buf == NULL) {
1198 RF_Free(k_cfg, sizeof(RF_Config_t));
1199 return (ENOMEM);
1200 }
1201 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1202 k_cfg->layoutSpecificSize);
1203 if (retcode) {
1204 RF_Free(k_cfg, sizeof(RF_Config_t));
1205 RF_Free(specific_buf,
1206 k_cfg->layoutSpecificSize);
1207 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1208 retcode));
1209 return (retcode);
1210 }
1211 } else
1212 specific_buf = NULL;
1213 k_cfg->layoutSpecific = specific_buf;
1214
1215 /* should do some kind of sanity check on the configuration.
1216 * Store the sum of all the bytes in the last byte? */
1217
1218 /* configure the system */
1219
1220 /*
1221 * Clear the entire RAID descriptor, just to make sure
1222 * there is no stale data left in the case of a
1223 * reconfiguration
1224 */
1225 memset(raidPtr, 0, sizeof(*raidPtr));
1226 raidPtr->softc = rs;
1227 raidPtr->raidid = unit;
1228
1229 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1230
1231 if (retcode == 0) {
1232
1233 /* allow this many simultaneous IO's to
1234 this RAID device */
1235 raidPtr->openings = RAIDOUTSTANDING;
1236
1237 raidinit(rs);
1238 rf_markalldirty(raidPtr);
1239 }
1240 /* free the buffers. No return code here. */
1241 if (k_cfg->layoutSpecificSize) {
1242 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1243 }
1244 RF_Free(k_cfg, sizeof(RF_Config_t));
1245
1246 return (retcode);
1247
1248 /* shutdown the system */
1249 case RAIDFRAME_SHUTDOWN:
1250
1251 part = DISKPART(dev);
1252 pmask = (1 << part);
1253
1254 if ((error = raidlock(rs)) != 0)
1255 return (error);
1256
1257 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
1258 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1259 (rs->sc_dkdev.dk_copenmask & pmask)))
1260 retcode = EBUSY;
1261 else {
1262 rs->sc_flags |= RAIDF_SHUTDOWN;
1263 rs->sc_dkdev.dk_copenmask &= ~pmask;
1264 rs->sc_dkdev.dk_bopenmask &= ~pmask;
1265 rs->sc_dkdev.dk_openmask &= ~pmask;
1266 retcode = 0;
1267 }
1268
1269 raidunlock(rs);
1270
1271 if (retcode != 0)
1272 return retcode;
1273
1274 /* free the pseudo device attach bits */
1275
1276 cf = device_cfdata(rs->sc_dev);
1277 if ((retcode = config_detach(rs->sc_dev, DETACH_QUIET)) == 0)
1278 free(cf, M_RAIDFRAME);
1279
1280 return (retcode);
1281 case RAIDFRAME_GET_COMPONENT_LABEL:
1282 clabel_ptr = (RF_ComponentLabel_t **) data;
1283 /* need to read the component label for the disk indicated
1284 by row,column in clabel */
1285
1286 /*
1287 * Perhaps there should be an option to skip the in-core
1288 * copy and hit the disk, as with disklabel(8).
1289 */
1290 RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *));
1291
1292 retcode = copyin(*clabel_ptr, clabel, sizeof(*clabel));
1293
1294 if (retcode) {
1295 RF_Free(clabel, sizeof(*clabel));
1296 return retcode;
1297 }
1298
1299 clabel->row = 0; /* Don't allow looking at anything else.*/
1300
1301 column = clabel->column;
1302
1303 if ((column < 0) || (column >= raidPtr->numCol +
1304 raidPtr->numSpare)) {
1305 RF_Free(clabel, sizeof(*clabel));
1306 return EINVAL;
1307 }
1308
1309 RF_Free(clabel, sizeof(*clabel));
1310
1311 clabel = raidget_component_label(raidPtr, column);
1312
1313 return copyout(clabel, *clabel_ptr, sizeof(**clabel_ptr));
1314
1315 #if 0
1316 case RAIDFRAME_SET_COMPONENT_LABEL:
1317 clabel = (RF_ComponentLabel_t *) data;
1318
1319 /* XXX check the label for valid stuff... */
1320 /* Note that some things *should not* get modified --
1321 the user should be re-initing the labels instead of
1322 trying to patch things.
1323 */
1324
1325 raidid = raidPtr->raidid;
1326 #ifdef DEBUG
1327 printf("raid%d: Got component label:\n", raidid);
1328 printf("raid%d: Version: %d\n", raidid, clabel->version);
1329 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1330 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1331 printf("raid%d: Column: %d\n", raidid, clabel->column);
1332 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1333 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1334 printf("raid%d: Status: %d\n", raidid, clabel->status);
1335 #endif
1336 clabel->row = 0;
1337 column = clabel->column;
1338
1339 if ((column < 0) || (column >= raidPtr->numCol)) {
1340 return(EINVAL);
1341 }
1342
1343 /* XXX this isn't allowed to do anything for now :-) */
1344
1345 /* XXX and before it is, we need to fill in the rest
1346 of the fields!?!?!?! */
1347 memcpy(raidget_component_label(raidPtr, column),
1348 clabel, sizeof(*clabel));
1349 raidflush_component_label(raidPtr, column);
1350 return (0);
1351 #endif
1352
1353 case RAIDFRAME_INIT_LABELS:
1354 clabel = (RF_ComponentLabel_t *) data;
1355 /*
1356 we only want the serial number from
1357 the above. We get all the rest of the information
1358 from the config that was used to create this RAID
1359 set.
1360 */
1361
1362 raidPtr->serial_number = clabel->serial_number;
1363
1364 for(column=0;column<raidPtr->numCol;column++) {
1365 diskPtr = &raidPtr->Disks[column];
1366 if (!RF_DEAD_DISK(diskPtr->status)) {
1367 ci_label = raidget_component_label(raidPtr,
1368 column);
1369 /* Zeroing this is important. */
1370 memset(ci_label, 0, sizeof(*ci_label));
1371 raid_init_component_label(raidPtr, ci_label);
1372 ci_label->serial_number =
1373 raidPtr->serial_number;
1374 ci_label->row = 0; /* we dont' pretend to support more */
1375 rf_component_label_set_partitionsize(ci_label,
1376 diskPtr->partitionSize);
1377 ci_label->column = column;
1378 raidflush_component_label(raidPtr, column);
1379 }
1380 /* XXXjld what about the spares? */
1381 }
1382
1383 return (retcode);
1384 case RAIDFRAME_SET_AUTOCONFIG:
1385 d = rf_set_autoconfig(raidPtr, *(int *) data);
1386 printf("raid%d: New autoconfig value is: %d\n",
1387 raidPtr->raidid, d);
1388 *(int *) data = d;
1389 return (retcode);
1390
1391 case RAIDFRAME_SET_ROOT:
1392 d = rf_set_rootpartition(raidPtr, *(int *) data);
1393 printf("raid%d: New rootpartition value is: %d\n",
1394 raidPtr->raidid, d);
1395 *(int *) data = d;
1396 return (retcode);
1397
1398 /* initialize all parity */
1399 case RAIDFRAME_REWRITEPARITY:
1400
1401 if (raidPtr->Layout.map->faultsTolerated == 0) {
1402 /* Parity for RAID 0 is trivially correct */
1403 raidPtr->parity_good = RF_RAID_CLEAN;
1404 return(0);
1405 }
1406
1407 if (raidPtr->parity_rewrite_in_progress == 1) {
1408 /* Re-write is already in progress! */
1409 return(EINVAL);
1410 }
1411
1412 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1413 rf_RewriteParityThread,
1414 raidPtr,"raid_parity");
1415 return (retcode);
1416
1417
1418 case RAIDFRAME_ADD_HOT_SPARE:
1419 sparePtr = (RF_SingleComponent_t *) data;
1420 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1421 retcode = rf_add_hot_spare(raidPtr, &component);
1422 return(retcode);
1423
1424 case RAIDFRAME_REMOVE_HOT_SPARE:
1425 return(retcode);
1426
1427 case RAIDFRAME_DELETE_COMPONENT:
1428 componentPtr = (RF_SingleComponent_t *)data;
1429 memcpy( &component, componentPtr,
1430 sizeof(RF_SingleComponent_t));
1431 retcode = rf_delete_component(raidPtr, &component);
1432 return(retcode);
1433
1434 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1435 componentPtr = (RF_SingleComponent_t *)data;
1436 memcpy( &component, componentPtr,
1437 sizeof(RF_SingleComponent_t));
1438 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1439 return(retcode);
1440
1441 case RAIDFRAME_REBUILD_IN_PLACE:
1442
1443 if (raidPtr->Layout.map->faultsTolerated == 0) {
1444 /* Can't do this on a RAID 0!! */
1445 return(EINVAL);
1446 }
1447
1448 if (raidPtr->recon_in_progress == 1) {
1449 /* a reconstruct is already in progress! */
1450 return(EINVAL);
1451 }
1452
1453 componentPtr = (RF_SingleComponent_t *) data;
1454 memcpy( &component, componentPtr,
1455 sizeof(RF_SingleComponent_t));
1456 component.row = 0; /* we don't support any more */
1457 column = component.column;
1458
1459 if ((column < 0) || (column >= raidPtr->numCol)) {
1460 return(EINVAL);
1461 }
1462
1463 rf_lock_mutex2(raidPtr->mutex);
1464 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1465 (raidPtr->numFailures > 0)) {
1466 /* XXX 0 above shouldn't be constant!!! */
1467 /* some component other than this has failed.
1468 Let's not make things worse than they already
1469 are... */
1470 printf("raid%d: Unable to reconstruct to disk at:\n",
1471 raidPtr->raidid);
1472 printf("raid%d: Col: %d Too many failures.\n",
1473 raidPtr->raidid, column);
1474 rf_unlock_mutex2(raidPtr->mutex);
1475 return (EINVAL);
1476 }
1477 if (raidPtr->Disks[column].status ==
1478 rf_ds_reconstructing) {
1479 printf("raid%d: Unable to reconstruct to disk at:\n",
1480 raidPtr->raidid);
1481 printf("raid%d: Col: %d Reconstruction already occurring!\n", raidPtr->raidid, column);
1482
1483 rf_unlock_mutex2(raidPtr->mutex);
1484 return (EINVAL);
1485 }
1486 if (raidPtr->Disks[column].status == rf_ds_spared) {
1487 rf_unlock_mutex2(raidPtr->mutex);
1488 return (EINVAL);
1489 }
1490 rf_unlock_mutex2(raidPtr->mutex);
1491
1492 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1493 if (rrcopy == NULL)
1494 return(ENOMEM);
1495
1496 rrcopy->raidPtr = (void *) raidPtr;
1497 rrcopy->col = column;
1498
1499 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1500 rf_ReconstructInPlaceThread,
1501 rrcopy,"raid_reconip");
1502 return(retcode);
1503
1504 case RAIDFRAME_GET_INFO:
1505 if (!raidPtr->valid)
1506 return (ENODEV);
1507 ucfgp = (RF_DeviceConfig_t **) data;
1508 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1509 (RF_DeviceConfig_t *));
1510 if (d_cfg == NULL)
1511 return (ENOMEM);
1512 d_cfg->rows = 1; /* there is only 1 row now */
1513 d_cfg->cols = raidPtr->numCol;
1514 d_cfg->ndevs = raidPtr->numCol;
1515 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1516 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1517 return (ENOMEM);
1518 }
1519 d_cfg->nspares = raidPtr->numSpare;
1520 if (d_cfg->nspares >= RF_MAX_DISKS) {
1521 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1522 return (ENOMEM);
1523 }
1524 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1525 d = 0;
1526 for (j = 0; j < d_cfg->cols; j++) {
1527 d_cfg->devs[d] = raidPtr->Disks[j];
1528 d++;
1529 }
1530 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1531 d_cfg->spares[i] = raidPtr->Disks[j];
1532 }
1533 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1534 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1535
1536 return (retcode);
1537
1538 case RAIDFRAME_CHECK_PARITY:
1539 *(int *) data = raidPtr->parity_good;
1540 return (0);
1541
1542 case RAIDFRAME_PARITYMAP_STATUS:
1543 if (rf_paritymap_ineligible(raidPtr))
1544 return EINVAL;
1545 rf_paritymap_status(raidPtr->parity_map,
1546 (struct rf_pmstat *)data);
1547 return 0;
1548
1549 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1550 if (rf_paritymap_ineligible(raidPtr))
1551 return EINVAL;
1552 if (raidPtr->parity_map == NULL)
1553 return ENOENT; /* ??? */
1554 if (0 != rf_paritymap_set_params(raidPtr->parity_map,
1555 (struct rf_pmparams *)data, 1))
1556 return EINVAL;
1557 return 0;
1558
1559 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1560 if (rf_paritymap_ineligible(raidPtr))
1561 return EINVAL;
1562 *(int *) data = rf_paritymap_get_disable(raidPtr);
1563 return 0;
1564
1565 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1566 if (rf_paritymap_ineligible(raidPtr))
1567 return EINVAL;
1568 rf_paritymap_set_disable(raidPtr, *(int *)data);
1569 /* XXX should errors be passed up? */
1570 return 0;
1571
1572 case RAIDFRAME_RESET_ACCTOTALS:
1573 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1574 return (0);
1575
1576 case RAIDFRAME_GET_ACCTOTALS:
1577 totals = (RF_AccTotals_t *) data;
1578 *totals = raidPtr->acc_totals;
1579 return (0);
1580
1581 case RAIDFRAME_KEEP_ACCTOTALS:
1582 raidPtr->keep_acc_totals = *(int *)data;
1583 return (0);
1584
1585 case RAIDFRAME_GET_SIZE:
1586 *(int *) data = raidPtr->totalSectors;
1587 return (0);
1588
1589 /* fail a disk & optionally start reconstruction */
1590 case RAIDFRAME_FAIL_DISK:
1591
1592 if (raidPtr->Layout.map->faultsTolerated == 0) {
1593 /* Can't do this on a RAID 0!! */
1594 return(EINVAL);
1595 }
1596
1597 rr = (struct rf_recon_req *) data;
1598 rr->row = 0;
1599 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1600 return (EINVAL);
1601
1602
1603 rf_lock_mutex2(raidPtr->mutex);
1604 if (raidPtr->status == rf_rs_reconstructing) {
1605 /* you can't fail a disk while we're reconstructing! */
1606 /* XXX wrong for RAID6 */
1607 rf_unlock_mutex2(raidPtr->mutex);
1608 return (EINVAL);
1609 }
1610 if ((raidPtr->Disks[rr->col].status ==
1611 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1612 /* some other component has failed. Let's not make
1613 things worse. XXX wrong for RAID6 */
1614 rf_unlock_mutex2(raidPtr->mutex);
1615 return (EINVAL);
1616 }
1617 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1618 /* Can't fail a spared disk! */
1619 rf_unlock_mutex2(raidPtr->mutex);
1620 return (EINVAL);
1621 }
1622 rf_unlock_mutex2(raidPtr->mutex);
1623
1624 /* make a copy of the recon request so that we don't rely on
1625 * the user's buffer */
1626 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1627 if (rrcopy == NULL)
1628 return(ENOMEM);
1629 memcpy(rrcopy, rr, sizeof(*rr));
1630 rrcopy->raidPtr = (void *) raidPtr;
1631
1632 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1633 rf_ReconThread,
1634 rrcopy,"raid_recon");
1635 return (0);
1636
1637 /* invoke a copyback operation after recon on whatever disk
1638 * needs it, if any */
1639 case RAIDFRAME_COPYBACK:
1640
1641 if (raidPtr->Layout.map->faultsTolerated == 0) {
1642 /* This makes no sense on a RAID 0!! */
1643 return(EINVAL);
1644 }
1645
1646 if (raidPtr->copyback_in_progress == 1) {
1647 /* Copyback is already in progress! */
1648 return(EINVAL);
1649 }
1650
1651 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1652 rf_CopybackThread,
1653 raidPtr,"raid_copyback");
1654 return (retcode);
1655
1656 /* return the percentage completion of reconstruction */
1657 case RAIDFRAME_CHECK_RECON_STATUS:
1658 if (raidPtr->Layout.map->faultsTolerated == 0) {
1659 /* This makes no sense on a RAID 0, so tell the
1660 user it's done. */
1661 *(int *) data = 100;
1662 return(0);
1663 }
1664 if (raidPtr->status != rf_rs_reconstructing)
1665 *(int *) data = 100;
1666 else {
1667 if (raidPtr->reconControl->numRUsTotal > 0) {
1668 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1669 } else {
1670 *(int *) data = 0;
1671 }
1672 }
1673 return (0);
1674 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1675 progressInfoPtr = (RF_ProgressInfo_t **) data;
1676 if (raidPtr->status != rf_rs_reconstructing) {
1677 progressInfo.remaining = 0;
1678 progressInfo.completed = 100;
1679 progressInfo.total = 100;
1680 } else {
1681 progressInfo.total =
1682 raidPtr->reconControl->numRUsTotal;
1683 progressInfo.completed =
1684 raidPtr->reconControl->numRUsComplete;
1685 progressInfo.remaining = progressInfo.total -
1686 progressInfo.completed;
1687 }
1688 retcode = copyout(&progressInfo, *progressInfoPtr,
1689 sizeof(RF_ProgressInfo_t));
1690 return (retcode);
1691
1692 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1693 if (raidPtr->Layout.map->faultsTolerated == 0) {
1694 /* This makes no sense on a RAID 0, so tell the
1695 user it's done. */
1696 *(int *) data = 100;
1697 return(0);
1698 }
1699 if (raidPtr->parity_rewrite_in_progress == 1) {
1700 *(int *) data = 100 *
1701 raidPtr->parity_rewrite_stripes_done /
1702 raidPtr->Layout.numStripe;
1703 } else {
1704 *(int *) data = 100;
1705 }
1706 return (0);
1707
1708 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1709 progressInfoPtr = (RF_ProgressInfo_t **) data;
1710 if (raidPtr->parity_rewrite_in_progress == 1) {
1711 progressInfo.total = raidPtr->Layout.numStripe;
1712 progressInfo.completed =
1713 raidPtr->parity_rewrite_stripes_done;
1714 progressInfo.remaining = progressInfo.total -
1715 progressInfo.completed;
1716 } else {
1717 progressInfo.remaining = 0;
1718 progressInfo.completed = 100;
1719 progressInfo.total = 100;
1720 }
1721 retcode = copyout(&progressInfo, *progressInfoPtr,
1722 sizeof(RF_ProgressInfo_t));
1723 return (retcode);
1724
1725 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1726 if (raidPtr->Layout.map->faultsTolerated == 0) {
1727 /* This makes no sense on a RAID 0 */
1728 *(int *) data = 100;
1729 return(0);
1730 }
1731 if (raidPtr->copyback_in_progress == 1) {
1732 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1733 raidPtr->Layout.numStripe;
1734 } else {
1735 *(int *) data = 100;
1736 }
1737 return (0);
1738
1739 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1740 progressInfoPtr = (RF_ProgressInfo_t **) data;
1741 if (raidPtr->copyback_in_progress == 1) {
1742 progressInfo.total = raidPtr->Layout.numStripe;
1743 progressInfo.completed =
1744 raidPtr->copyback_stripes_done;
1745 progressInfo.remaining = progressInfo.total -
1746 progressInfo.completed;
1747 } else {
1748 progressInfo.remaining = 0;
1749 progressInfo.completed = 100;
1750 progressInfo.total = 100;
1751 }
1752 retcode = copyout(&progressInfo, *progressInfoPtr,
1753 sizeof(RF_ProgressInfo_t));
1754 return (retcode);
1755
1756 /* the sparetable daemon calls this to wait for the kernel to
1757 * need a spare table. this ioctl does not return until a
1758 * spare table is needed. XXX -- calling mpsleep here in the
1759 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1760 * -- I should either compute the spare table in the kernel,
1761 * or have a different -- XXX XXX -- interface (a different
1762 * character device) for delivering the table -- XXX */
1763 #if 0
1764 case RAIDFRAME_SPARET_WAIT:
1765 rf_lock_mutex2(rf_sparet_wait_mutex);
1766 while (!rf_sparet_wait_queue)
1767 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1768 waitreq = rf_sparet_wait_queue;
1769 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1770 rf_unlock_mutex2(rf_sparet_wait_mutex);
1771
1772 /* structure assignment */
1773 *((RF_SparetWait_t *) data) = *waitreq;
1774
1775 RF_Free(waitreq, sizeof(*waitreq));
1776 return (0);
1777
1778 /* wakes up a process waiting on SPARET_WAIT and puts an error
1779 * code in it that will cause the dameon to exit */
1780 case RAIDFRAME_ABORT_SPARET_WAIT:
1781 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1782 waitreq->fcol = -1;
1783 rf_lock_mutex2(rf_sparet_wait_mutex);
1784 waitreq->next = rf_sparet_wait_queue;
1785 rf_sparet_wait_queue = waitreq;
1786 rf_broadcast_conf2(rf_sparet_wait_cv);
1787 rf_unlock_mutex2(rf_sparet_wait_mutex);
1788 return (0);
1789
1790 /* used by the spare table daemon to deliver a spare table
1791 * into the kernel */
1792 case RAIDFRAME_SEND_SPARET:
1793
1794 /* install the spare table */
1795 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1796
1797 /* respond to the requestor. the return status of the spare
1798 * table installation is passed in the "fcol" field */
1799 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1800 waitreq->fcol = retcode;
1801 rf_lock_mutex2(rf_sparet_wait_mutex);
1802 waitreq->next = rf_sparet_resp_queue;
1803 rf_sparet_resp_queue = waitreq;
1804 rf_broadcast_cond2(rf_sparet_resp_cv);
1805 rf_unlock_mutex2(rf_sparet_wait_mutex);
1806
1807 return (retcode);
1808 #endif
1809
1810 default:
1811 break; /* fall through to the os-specific code below */
1812
1813 }
1814
1815 if (!raidPtr->valid)
1816 return (EINVAL);
1817
1818 /*
1819 * Add support for "regular" device ioctls here.
1820 */
1821
1822 error = disk_ioctl(&rs->sc_dkdev, cmd, data, flag, l);
1823 if (error != EPASSTHROUGH)
1824 return (error);
1825
1826 switch (cmd) {
1827 case DIOCGDINFO:
1828 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1829 break;
1830 #ifdef __HAVE_OLD_DISKLABEL
1831 case ODIOCGDINFO:
1832 newlabel = *(rs->sc_dkdev.dk_label);
1833 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1834 return ENOTTY;
1835 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1836 break;
1837 #endif
1838
1839 case DIOCGPART:
1840 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1841 ((struct partinfo *) data)->part =
1842 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1843 break;
1844
1845 case DIOCWDINFO:
1846 case DIOCSDINFO:
1847 #ifdef __HAVE_OLD_DISKLABEL
1848 case ODIOCWDINFO:
1849 case ODIOCSDINFO:
1850 #endif
1851 {
1852 struct disklabel *lp;
1853 #ifdef __HAVE_OLD_DISKLABEL
1854 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1855 memset(&newlabel, 0, sizeof newlabel);
1856 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1857 lp = &newlabel;
1858 } else
1859 #endif
1860 lp = (struct disklabel *)data;
1861
1862 if ((error = raidlock(rs)) != 0)
1863 return (error);
1864
1865 rs->sc_flags |= RAIDF_LABELLING;
1866
1867 error = setdisklabel(rs->sc_dkdev.dk_label,
1868 lp, 0, rs->sc_dkdev.dk_cpulabel);
1869 if (error == 0) {
1870 if (cmd == DIOCWDINFO
1871 #ifdef __HAVE_OLD_DISKLABEL
1872 || cmd == ODIOCWDINFO
1873 #endif
1874 )
1875 error = writedisklabel(RAIDLABELDEV(dev),
1876 raidstrategy, rs->sc_dkdev.dk_label,
1877 rs->sc_dkdev.dk_cpulabel);
1878 }
1879 rs->sc_flags &= ~RAIDF_LABELLING;
1880
1881 raidunlock(rs);
1882
1883 if (error)
1884 return (error);
1885 break;
1886 }
1887
1888 case DIOCWLABEL:
1889 if (*(int *) data != 0)
1890 rs->sc_flags |= RAIDF_WLABEL;
1891 else
1892 rs->sc_flags &= ~RAIDF_WLABEL;
1893 break;
1894
1895 case DIOCGDEFLABEL:
1896 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1897 break;
1898
1899 #ifdef __HAVE_OLD_DISKLABEL
1900 case ODIOCGDEFLABEL:
1901 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1902 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1903 return ENOTTY;
1904 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1905 break;
1906 #endif
1907
1908 case DIOCAWEDGE:
1909 case DIOCDWEDGE:
1910 dkw = (void *)data;
1911
1912 /* If the ioctl happens here, the parent is us. */
1913 (void)strcpy(dkw->dkw_parent, rs->sc_xname);
1914 return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw);
1915
1916 case DIOCLWEDGES:
1917 return dkwedge_list(&rs->sc_dkdev,
1918 (struct dkwedge_list *)data, l);
1919 case DIOCCACHESYNC:
1920 return rf_sync_component_caches(raidPtr);
1921
1922 case DIOCGSTRATEGY:
1923 {
1924 struct disk_strategy *dks = (void *)data;
1925
1926 s = splbio();
1927 strlcpy(dks->dks_name, bufq_getstrategyname(rs->buf_queue),
1928 sizeof(dks->dks_name));
1929 splx(s);
1930 dks->dks_paramlen = 0;
1931
1932 return 0;
1933 }
1934
1935 case DIOCSSTRATEGY:
1936 {
1937 struct disk_strategy *dks = (void *)data;
1938 struct bufq_state *new;
1939 struct bufq_state *old;
1940
1941 if (dks->dks_param != NULL) {
1942 return EINVAL;
1943 }
1944 dks->dks_name[sizeof(dks->dks_name) - 1] = 0; /* ensure term */
1945 error = bufq_alloc(&new, dks->dks_name,
1946 BUFQ_EXACT|BUFQ_SORT_RAWBLOCK);
1947 if (error) {
1948 return error;
1949 }
1950 s = splbio();
1951 old = rs->buf_queue;
1952 bufq_move(new, old);
1953 rs->buf_queue = new;
1954 splx(s);
1955 bufq_free(old);
1956
1957 return 0;
1958 }
1959
1960 default:
1961 retcode = ENOTTY;
1962 }
1963 return (retcode);
1964
1965 }
1966
1967
1968 /* raidinit -- complete the rest of the initialization for the
1969 RAIDframe device. */
1970
1971
1972 static void
1973 raidinit(struct raid_softc *rs)
1974 {
1975 cfdata_t cf;
1976 int unit;
1977 RF_Raid_t *raidPtr = &rs->sc_r;
1978
1979 unit = raidPtr->raidid;
1980
1981
1982 /* XXX should check return code first... */
1983 rs->sc_flags |= RAIDF_INITED;
1984
1985 /* XXX doesn't check bounds. */
1986 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1987
1988 /* attach the pseudo device */
1989 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1990 cf->cf_name = raid_cd.cd_name;
1991 cf->cf_atname = raid_cd.cd_name;
1992 cf->cf_unit = unit;
1993 cf->cf_fstate = FSTATE_STAR;
1994
1995 rs->sc_dev = config_attach_pseudo(cf);
1996
1997 if (rs->sc_dev == NULL) {
1998 printf("raid%d: config_attach_pseudo failed\n",
1999 raidPtr->raidid);
2000 rs->sc_flags &= ~RAIDF_INITED;
2001 free(cf, M_RAIDFRAME);
2002 return;
2003 }
2004
2005 /* disk_attach actually creates space for the CPU disklabel, among
2006 * other things, so it's critical to call this *BEFORE* we try putzing
2007 * with disklabels. */
2008
2009 disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver);
2010 disk_attach(&rs->sc_dkdev);
2011 disk_blocksize(&rs->sc_dkdev, raidPtr->bytesPerSector);
2012
2013 /* XXX There may be a weird interaction here between this, and
2014 * protectedSectors, as used in RAIDframe. */
2015
2016 rs->sc_size = raidPtr->totalSectors;
2017
2018 dkwedge_discover(&rs->sc_dkdev);
2019
2020 rf_set_geometry(rs, raidPtr);
2021
2022 }
2023 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
2024 /* wake up the daemon & tell it to get us a spare table
2025 * XXX
2026 * the entries in the queues should be tagged with the raidPtr
2027 * so that in the extremely rare case that two recons happen at once,
2028 * we know for which device were requesting a spare table
2029 * XXX
2030 *
2031 * XXX This code is not currently used. GO
2032 */
2033 int
2034 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
2035 {
2036 int retcode;
2037
2038 rf_lock_mutex2(rf_sparet_wait_mutex);
2039 req->next = rf_sparet_wait_queue;
2040 rf_sparet_wait_queue = req;
2041 rf_broadcast_cond2(rf_sparet_wait_cv);
2042
2043 /* mpsleep unlocks the mutex */
2044 while (!rf_sparet_resp_queue) {
2045 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
2046 }
2047 req = rf_sparet_resp_queue;
2048 rf_sparet_resp_queue = req->next;
2049 rf_unlock_mutex2(rf_sparet_wait_mutex);
2050
2051 retcode = req->fcol;
2052 RF_Free(req, sizeof(*req)); /* this is not the same req as we
2053 * alloc'd */
2054 return (retcode);
2055 }
2056 #endif
2057
2058 /* a wrapper around rf_DoAccess that extracts appropriate info from the
2059 * bp & passes it down.
2060 * any calls originating in the kernel must use non-blocking I/O
2061 * do some extra sanity checking to return "appropriate" error values for
2062 * certain conditions (to make some standard utilities work)
2063 *
2064 * Formerly known as: rf_DoAccessKernel
2065 */
2066 void
2067 raidstart(RF_Raid_t *raidPtr)
2068 {
2069 RF_SectorCount_t num_blocks, pb, sum;
2070 RF_RaidAddr_t raid_addr;
2071 struct partition *pp;
2072 daddr_t blocknum;
2073 struct raid_softc *rs;
2074 int do_async;
2075 struct buf *bp;
2076 int rc;
2077
2078 rs = raidPtr->softc;
2079 /* quick check to see if anything has died recently */
2080 rf_lock_mutex2(raidPtr->mutex);
2081 if (raidPtr->numNewFailures > 0) {
2082 rf_unlock_mutex2(raidPtr->mutex);
2083 rf_update_component_labels(raidPtr,
2084 RF_NORMAL_COMPONENT_UPDATE);
2085 rf_lock_mutex2(raidPtr->mutex);
2086 raidPtr->numNewFailures--;
2087 }
2088
2089 /* Check to see if we're at the limit... */
2090 while (raidPtr->openings > 0) {
2091 rf_unlock_mutex2(raidPtr->mutex);
2092
2093 /* get the next item, if any, from the queue */
2094 if ((bp = bufq_get(rs->buf_queue)) == NULL) {
2095 /* nothing more to do */
2096 return;
2097 }
2098
2099 /* Ok, for the bp we have here, bp->b_blkno is relative to the
2100 * partition.. Need to make it absolute to the underlying
2101 * device.. */
2102
2103 blocknum = bp->b_blkno << DEV_BSHIFT >> raidPtr->logBytesPerSector;
2104 if (DISKPART(bp->b_dev) != RAW_PART) {
2105 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
2106 blocknum += pp->p_offset;
2107 }
2108
2109 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
2110 (int) blocknum));
2111
2112 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
2113 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
2114
2115 /* *THIS* is where we adjust what block we're going to...
2116 * but DO NOT TOUCH bp->b_blkno!!! */
2117 raid_addr = blocknum;
2118
2119 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
2120 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
2121 sum = raid_addr + num_blocks + pb;
2122 if (1 || rf_debugKernelAccess) {
2123 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
2124 (int) raid_addr, (int) sum, (int) num_blocks,
2125 (int) pb, (int) bp->b_resid));
2126 }
2127 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
2128 || (sum < num_blocks) || (sum < pb)) {
2129 bp->b_error = ENOSPC;
2130 bp->b_resid = bp->b_bcount;
2131 biodone(bp);
2132 rf_lock_mutex2(raidPtr->mutex);
2133 continue;
2134 }
2135 /*
2136 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
2137 */
2138
2139 if (bp->b_bcount & raidPtr->sectorMask) {
2140 bp->b_error = EINVAL;
2141 bp->b_resid = bp->b_bcount;
2142 biodone(bp);
2143 rf_lock_mutex2(raidPtr->mutex);
2144 continue;
2145
2146 }
2147 db1_printf(("Calling DoAccess..\n"));
2148
2149
2150 rf_lock_mutex2(raidPtr->mutex);
2151 raidPtr->openings--;
2152 rf_unlock_mutex2(raidPtr->mutex);
2153
2154 /*
2155 * Everything is async.
2156 */
2157 do_async = 1;
2158
2159 disk_busy(&rs->sc_dkdev);
2160
2161 /* XXX we're still at splbio() here... do we *really*
2162 need to be? */
2163
2164 /* don't ever condition on bp->b_flags & B_WRITE.
2165 * always condition on B_READ instead */
2166
2167 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
2168 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
2169 do_async, raid_addr, num_blocks,
2170 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
2171
2172 if (rc) {
2173 bp->b_error = rc;
2174 bp->b_resid = bp->b_bcount;
2175 biodone(bp);
2176 /* continue loop */
2177 }
2178
2179 rf_lock_mutex2(raidPtr->mutex);
2180 }
2181 rf_unlock_mutex2(raidPtr->mutex);
2182 }
2183
2184
2185
2186
2187 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
2188
2189 int
2190 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
2191 {
2192 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2193 struct buf *bp;
2194
2195 req->queue = queue;
2196 bp = req->bp;
2197
2198 switch (req->type) {
2199 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2200 /* XXX need to do something extra here.. */
2201 /* I'm leaving this in, as I've never actually seen it used,
2202 * and I'd like folks to report it... GO */
2203 printf(("WAKEUP CALLED\n"));
2204 queue->numOutstanding++;
2205
2206 bp->b_flags = 0;
2207 bp->b_private = req;
2208
2209 KernelWakeupFunc(bp);
2210 break;
2211
2212 case RF_IO_TYPE_READ:
2213 case RF_IO_TYPE_WRITE:
2214 #if RF_ACC_TRACE > 0
2215 if (req->tracerec) {
2216 RF_ETIMER_START(req->tracerec->timer);
2217 }
2218 #endif
2219 InitBP(bp, queue->rf_cinfo->ci_vp,
2220 op, queue->rf_cinfo->ci_dev,
2221 req->sectorOffset, req->numSector,
2222 req->buf, KernelWakeupFunc, (void *) req,
2223 queue->raidPtr->logBytesPerSector, req->b_proc);
2224
2225 if (rf_debugKernelAccess) {
2226 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2227 (long) bp->b_blkno));
2228 }
2229 queue->numOutstanding++;
2230 queue->last_deq_sector = req->sectorOffset;
2231 /* acc wouldn't have been let in if there were any pending
2232 * reqs at any other priority */
2233 queue->curPriority = req->priority;
2234
2235 db1_printf(("Going for %c to unit %d col %d\n",
2236 req->type, queue->raidPtr->raidid,
2237 queue->col));
2238 db1_printf(("sector %d count %d (%d bytes) %d\n",
2239 (int) req->sectorOffset, (int) req->numSector,
2240 (int) (req->numSector <<
2241 queue->raidPtr->logBytesPerSector),
2242 (int) queue->raidPtr->logBytesPerSector));
2243
2244 /*
2245 * XXX: drop lock here since this can block at
2246 * least with backing SCSI devices. Retake it
2247 * to minimize fuss with calling interfaces.
2248 */
2249
2250 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2251 bdev_strategy(bp);
2252 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2253 break;
2254
2255 default:
2256 panic("bad req->type in rf_DispatchKernelIO");
2257 }
2258 db1_printf(("Exiting from DispatchKernelIO\n"));
2259
2260 return (0);
2261 }
2262 /* this is the callback function associated with a I/O invoked from
2263 kernel code.
2264 */
2265 static void
2266 KernelWakeupFunc(struct buf *bp)
2267 {
2268 RF_DiskQueueData_t *req = NULL;
2269 RF_DiskQueue_t *queue;
2270
2271 db1_printf(("recovering the request queue:\n"));
2272
2273 req = bp->b_private;
2274
2275 queue = (RF_DiskQueue_t *) req->queue;
2276
2277 rf_lock_mutex2(queue->raidPtr->iodone_lock);
2278
2279 #if RF_ACC_TRACE > 0
2280 if (req->tracerec) {
2281 RF_ETIMER_STOP(req->tracerec->timer);
2282 RF_ETIMER_EVAL(req->tracerec->timer);
2283 rf_lock_mutex2(rf_tracing_mutex);
2284 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2285 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2286 req->tracerec->num_phys_ios++;
2287 rf_unlock_mutex2(rf_tracing_mutex);
2288 }
2289 #endif
2290
2291 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2292 * ballistic, and mark the component as hosed... */
2293
2294 if (bp->b_error != 0) {
2295 /* Mark the disk as dead */
2296 /* but only mark it once... */
2297 /* and only if it wouldn't leave this RAID set
2298 completely broken */
2299 if (((queue->raidPtr->Disks[queue->col].status ==
2300 rf_ds_optimal) ||
2301 (queue->raidPtr->Disks[queue->col].status ==
2302 rf_ds_used_spare)) &&
2303 (queue->raidPtr->numFailures <
2304 queue->raidPtr->Layout.map->faultsTolerated)) {
2305 printf("raid%d: IO Error. Marking %s as failed.\n",
2306 queue->raidPtr->raidid,
2307 queue->raidPtr->Disks[queue->col].devname);
2308 queue->raidPtr->Disks[queue->col].status =
2309 rf_ds_failed;
2310 queue->raidPtr->status = rf_rs_degraded;
2311 queue->raidPtr->numFailures++;
2312 queue->raidPtr->numNewFailures++;
2313 } else { /* Disk is already dead... */
2314 /* printf("Disk already marked as dead!\n"); */
2315 }
2316
2317 }
2318
2319 /* Fill in the error value */
2320 req->error = bp->b_error;
2321
2322 /* Drop this one on the "finished" queue... */
2323 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2324
2325 /* Let the raidio thread know there is work to be done. */
2326 rf_signal_cond2(queue->raidPtr->iodone_cv);
2327
2328 rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2329 }
2330
2331
2332 /*
2333 * initialize a buf structure for doing an I/O in the kernel.
2334 */
2335 static void
2336 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2337 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2338 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2339 struct proc *b_proc)
2340 {
2341 /* bp->b_flags = B_PHYS | rw_flag; */
2342 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */
2343 bp->b_oflags = 0;
2344 bp->b_cflags = 0;
2345 bp->b_bcount = numSect << logBytesPerSector;
2346 bp->b_bufsize = bp->b_bcount;
2347 bp->b_error = 0;
2348 bp->b_dev = dev;
2349 bp->b_data = bf;
2350 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2351 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2352 if (bp->b_bcount == 0) {
2353 panic("bp->b_bcount is zero in InitBP!!");
2354 }
2355 bp->b_proc = b_proc;
2356 bp->b_iodone = cbFunc;
2357 bp->b_private = cbArg;
2358 }
2359
2360 static void
2361 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2362 struct disklabel *lp)
2363 {
2364 memset(lp, 0, sizeof(*lp));
2365
2366 /* fabricate a label... */
2367 lp->d_secperunit = raidPtr->totalSectors;
2368 lp->d_secsize = raidPtr->bytesPerSector;
2369 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2370 lp->d_ntracks = 4 * raidPtr->numCol;
2371 lp->d_ncylinders = raidPtr->totalSectors /
2372 (lp->d_nsectors * lp->d_ntracks);
2373 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2374
2375 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2376 lp->d_type = DTYPE_RAID;
2377 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2378 lp->d_rpm = 3600;
2379 lp->d_interleave = 1;
2380 lp->d_flags = 0;
2381
2382 lp->d_partitions[RAW_PART].p_offset = 0;
2383 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2384 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2385 lp->d_npartitions = RAW_PART + 1;
2386
2387 lp->d_magic = DISKMAGIC;
2388 lp->d_magic2 = DISKMAGIC;
2389 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2390
2391 }
2392 /*
2393 * Read the disklabel from the raid device. If one is not present, fake one
2394 * up.
2395 */
2396 static void
2397 raidgetdisklabel(dev_t dev)
2398 {
2399 int unit = raidunit(dev);
2400 struct raid_softc *rs;
2401 const char *errstring;
2402 struct disklabel *lp;
2403 struct cpu_disklabel *clp;
2404 RF_Raid_t *raidPtr;
2405
2406 if ((rs = raidget(unit)) == NULL)
2407 return;
2408
2409 lp = rs->sc_dkdev.dk_label;
2410 clp = rs->sc_dkdev.dk_cpulabel;
2411
2412 db1_printf(("Getting the disklabel...\n"));
2413
2414 memset(clp, 0, sizeof(*clp));
2415
2416 raidPtr = &rs->sc_r;
2417
2418 raidgetdefaultlabel(raidPtr, rs, lp);
2419
2420 /*
2421 * Call the generic disklabel extraction routine.
2422 */
2423 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2424 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2425 if (errstring)
2426 raidmakedisklabel(rs);
2427 else {
2428 int i;
2429 struct partition *pp;
2430
2431 /*
2432 * Sanity check whether the found disklabel is valid.
2433 *
2434 * This is necessary since total size of the raid device
2435 * may vary when an interleave is changed even though exactly
2436 * same components are used, and old disklabel may used
2437 * if that is found.
2438 */
2439 if (lp->d_secperunit != rs->sc_size)
2440 printf("raid%d: WARNING: %s: "
2441 "total sector size in disklabel (%" PRIu32 ") != "
2442 "the size of raid (%" PRIu64 ")\n", unit, rs->sc_xname,
2443 lp->d_secperunit, rs->sc_size);
2444 for (i = 0; i < lp->d_npartitions; i++) {
2445 pp = &lp->d_partitions[i];
2446 if (pp->p_offset + pp->p_size > rs->sc_size)
2447 printf("raid%d: WARNING: %s: end of partition `%c' "
2448 "exceeds the size of raid (%" PRIu64 ")\n",
2449 unit, rs->sc_xname, 'a' + i, rs->sc_size);
2450 }
2451 }
2452
2453 }
2454 /*
2455 * Take care of things one might want to take care of in the event
2456 * that a disklabel isn't present.
2457 */
2458 static void
2459 raidmakedisklabel(struct raid_softc *rs)
2460 {
2461 struct disklabel *lp = rs->sc_dkdev.dk_label;
2462 db1_printf(("Making a label..\n"));
2463
2464 /*
2465 * For historical reasons, if there's no disklabel present
2466 * the raw partition must be marked FS_BSDFFS.
2467 */
2468
2469 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2470
2471 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2472
2473 lp->d_checksum = dkcksum(lp);
2474 }
2475 /*
2476 * Wait interruptibly for an exclusive lock.
2477 *
2478 * XXX
2479 * Several drivers do this; it should be abstracted and made MP-safe.
2480 * (Hmm... where have we seen this warning before :-> GO )
2481 */
2482 static int
2483 raidlock(struct raid_softc *rs)
2484 {
2485 int error;
2486
2487 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2488 rs->sc_flags |= RAIDF_WANTED;
2489 if ((error =
2490 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2491 return (error);
2492 }
2493 rs->sc_flags |= RAIDF_LOCKED;
2494 return (0);
2495 }
2496 /*
2497 * Unlock and wake up any waiters.
2498 */
2499 static void
2500 raidunlock(struct raid_softc *rs)
2501 {
2502
2503 rs->sc_flags &= ~RAIDF_LOCKED;
2504 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2505 rs->sc_flags &= ~RAIDF_WANTED;
2506 wakeup(rs);
2507 }
2508 }
2509
2510
2511 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2512 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2513 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2514
2515 static daddr_t
2516 rf_component_info_offset(void)
2517 {
2518
2519 return RF_COMPONENT_INFO_OFFSET;
2520 }
2521
2522 static daddr_t
2523 rf_component_info_size(unsigned secsize)
2524 {
2525 daddr_t info_size;
2526
2527 KASSERT(secsize);
2528 if (secsize > RF_COMPONENT_INFO_SIZE)
2529 info_size = secsize;
2530 else
2531 info_size = RF_COMPONENT_INFO_SIZE;
2532
2533 return info_size;
2534 }
2535
2536 static daddr_t
2537 rf_parity_map_offset(RF_Raid_t *raidPtr)
2538 {
2539 daddr_t map_offset;
2540
2541 KASSERT(raidPtr->bytesPerSector);
2542 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2543 map_offset = raidPtr->bytesPerSector;
2544 else
2545 map_offset = RF_COMPONENT_INFO_SIZE;
2546 map_offset += rf_component_info_offset();
2547
2548 return map_offset;
2549 }
2550
2551 static daddr_t
2552 rf_parity_map_size(RF_Raid_t *raidPtr)
2553 {
2554 daddr_t map_size;
2555
2556 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2557 map_size = raidPtr->bytesPerSector;
2558 else
2559 map_size = RF_PARITY_MAP_SIZE;
2560
2561 return map_size;
2562 }
2563
2564 int
2565 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2566 {
2567 RF_ComponentLabel_t *clabel;
2568
2569 clabel = raidget_component_label(raidPtr, col);
2570 clabel->clean = RF_RAID_CLEAN;
2571 raidflush_component_label(raidPtr, col);
2572 return(0);
2573 }
2574
2575
2576 int
2577 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2578 {
2579 RF_ComponentLabel_t *clabel;
2580
2581 clabel = raidget_component_label(raidPtr, col);
2582 clabel->clean = RF_RAID_DIRTY;
2583 raidflush_component_label(raidPtr, col);
2584 return(0);
2585 }
2586
2587 int
2588 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2589 {
2590 KASSERT(raidPtr->bytesPerSector);
2591 return raidread_component_label(raidPtr->bytesPerSector,
2592 raidPtr->Disks[col].dev,
2593 raidPtr->raid_cinfo[col].ci_vp,
2594 &raidPtr->raid_cinfo[col].ci_label);
2595 }
2596
2597 RF_ComponentLabel_t *
2598 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2599 {
2600 return &raidPtr->raid_cinfo[col].ci_label;
2601 }
2602
2603 int
2604 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2605 {
2606 RF_ComponentLabel_t *label;
2607
2608 label = &raidPtr->raid_cinfo[col].ci_label;
2609 label->mod_counter = raidPtr->mod_counter;
2610 #ifndef RF_NO_PARITY_MAP
2611 label->parity_map_modcount = label->mod_counter;
2612 #endif
2613 return raidwrite_component_label(raidPtr->bytesPerSector,
2614 raidPtr->Disks[col].dev,
2615 raidPtr->raid_cinfo[col].ci_vp, label);
2616 }
2617
2618
2619 static int
2620 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2621 RF_ComponentLabel_t *clabel)
2622 {
2623 return raidread_component_area(dev, b_vp, clabel,
2624 sizeof(RF_ComponentLabel_t),
2625 rf_component_info_offset(),
2626 rf_component_info_size(secsize));
2627 }
2628
2629 /* ARGSUSED */
2630 static int
2631 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2632 size_t msize, daddr_t offset, daddr_t dsize)
2633 {
2634 struct buf *bp;
2635 const struct bdevsw *bdev;
2636 int error;
2637
2638 /* XXX should probably ensure that we don't try to do this if
2639 someone has changed rf_protected_sectors. */
2640
2641 if (b_vp == NULL) {
2642 /* For whatever reason, this component is not valid.
2643 Don't try to read a component label from it. */
2644 return(EINVAL);
2645 }
2646
2647 /* get a block of the appropriate size... */
2648 bp = geteblk((int)dsize);
2649 bp->b_dev = dev;
2650
2651 /* get our ducks in a row for the read */
2652 bp->b_blkno = offset / DEV_BSIZE;
2653 bp->b_bcount = dsize;
2654 bp->b_flags |= B_READ;
2655 bp->b_resid = dsize;
2656
2657 bdev = bdevsw_lookup(bp->b_dev);
2658 if (bdev == NULL)
2659 return (ENXIO);
2660 (*bdev->d_strategy)(bp);
2661
2662 error = biowait(bp);
2663
2664 if (!error) {
2665 memcpy(data, bp->b_data, msize);
2666 }
2667
2668 brelse(bp, 0);
2669 return(error);
2670 }
2671
2672
2673 static int
2674 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2675 RF_ComponentLabel_t *clabel)
2676 {
2677 return raidwrite_component_area(dev, b_vp, clabel,
2678 sizeof(RF_ComponentLabel_t),
2679 rf_component_info_offset(),
2680 rf_component_info_size(secsize), 0);
2681 }
2682
2683 /* ARGSUSED */
2684 static int
2685 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2686 size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2687 {
2688 struct buf *bp;
2689 const struct bdevsw *bdev;
2690 int error;
2691
2692 /* get a block of the appropriate size... */
2693 bp = geteblk((int)dsize);
2694 bp->b_dev = dev;
2695
2696 /* get our ducks in a row for the write */
2697 bp->b_blkno = offset / DEV_BSIZE;
2698 bp->b_bcount = dsize;
2699 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2700 bp->b_resid = dsize;
2701
2702 memset(bp->b_data, 0, dsize);
2703 memcpy(bp->b_data, data, msize);
2704
2705 bdev = bdevsw_lookup(bp->b_dev);
2706 if (bdev == NULL)
2707 return (ENXIO);
2708 (*bdev->d_strategy)(bp);
2709 if (asyncp)
2710 return 0;
2711 error = biowait(bp);
2712 brelse(bp, 0);
2713 if (error) {
2714 #if 1
2715 printf("Failed to write RAID component info!\n");
2716 #endif
2717 }
2718
2719 return(error);
2720 }
2721
2722 void
2723 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2724 {
2725 int c;
2726
2727 for (c = 0; c < raidPtr->numCol; c++) {
2728 /* Skip dead disks. */
2729 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2730 continue;
2731 /* XXXjld: what if an error occurs here? */
2732 raidwrite_component_area(raidPtr->Disks[c].dev,
2733 raidPtr->raid_cinfo[c].ci_vp, map,
2734 RF_PARITYMAP_NBYTE,
2735 rf_parity_map_offset(raidPtr),
2736 rf_parity_map_size(raidPtr), 0);
2737 }
2738 }
2739
2740 void
2741 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2742 {
2743 struct rf_paritymap_ondisk tmp;
2744 int c,first;
2745
2746 first=1;
2747 for (c = 0; c < raidPtr->numCol; c++) {
2748 /* Skip dead disks. */
2749 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2750 continue;
2751 raidread_component_area(raidPtr->Disks[c].dev,
2752 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2753 RF_PARITYMAP_NBYTE,
2754 rf_parity_map_offset(raidPtr),
2755 rf_parity_map_size(raidPtr));
2756 if (first) {
2757 memcpy(map, &tmp, sizeof(*map));
2758 first = 0;
2759 } else {
2760 rf_paritymap_merge(map, &tmp);
2761 }
2762 }
2763 }
2764
2765 void
2766 rf_markalldirty(RF_Raid_t *raidPtr)
2767 {
2768 RF_ComponentLabel_t *clabel;
2769 int sparecol;
2770 int c;
2771 int j;
2772 int scol = -1;
2773
2774 raidPtr->mod_counter++;
2775 for (c = 0; c < raidPtr->numCol; c++) {
2776 /* we don't want to touch (at all) a disk that has
2777 failed */
2778 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2779 clabel = raidget_component_label(raidPtr, c);
2780 if (clabel->status == rf_ds_spared) {
2781 /* XXX do something special...
2782 but whatever you do, don't
2783 try to access it!! */
2784 } else {
2785 raidmarkdirty(raidPtr, c);
2786 }
2787 }
2788 }
2789
2790 for( c = 0; c < raidPtr->numSpare ; c++) {
2791 sparecol = raidPtr->numCol + c;
2792 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2793 /*
2794
2795 we claim this disk is "optimal" if it's
2796 rf_ds_used_spare, as that means it should be
2797 directly substitutable for the disk it replaced.
2798 We note that too...
2799
2800 */
2801
2802 for(j=0;j<raidPtr->numCol;j++) {
2803 if (raidPtr->Disks[j].spareCol == sparecol) {
2804 scol = j;
2805 break;
2806 }
2807 }
2808
2809 clabel = raidget_component_label(raidPtr, sparecol);
2810 /* make sure status is noted */
2811
2812 raid_init_component_label(raidPtr, clabel);
2813
2814 clabel->row = 0;
2815 clabel->column = scol;
2816 /* Note: we *don't* change status from rf_ds_used_spare
2817 to rf_ds_optimal */
2818 /* clabel.status = rf_ds_optimal; */
2819
2820 raidmarkdirty(raidPtr, sparecol);
2821 }
2822 }
2823 }
2824
2825
2826 void
2827 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2828 {
2829 RF_ComponentLabel_t *clabel;
2830 int sparecol;
2831 int c;
2832 int j;
2833 int scol;
2834
2835 scol = -1;
2836
2837 /* XXX should do extra checks to make sure things really are clean,
2838 rather than blindly setting the clean bit... */
2839
2840 raidPtr->mod_counter++;
2841
2842 for (c = 0; c < raidPtr->numCol; c++) {
2843 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2844 clabel = raidget_component_label(raidPtr, c);
2845 /* make sure status is noted */
2846 clabel->status = rf_ds_optimal;
2847
2848 /* note what unit we are configured as */
2849 clabel->last_unit = raidPtr->raidid;
2850
2851 raidflush_component_label(raidPtr, c);
2852 if (final == RF_FINAL_COMPONENT_UPDATE) {
2853 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2854 raidmarkclean(raidPtr, c);
2855 }
2856 }
2857 }
2858 /* else we don't touch it.. */
2859 }
2860
2861 for( c = 0; c < raidPtr->numSpare ; c++) {
2862 sparecol = raidPtr->numCol + c;
2863 /* Need to ensure that the reconstruct actually completed! */
2864 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2865 /*
2866
2867 we claim this disk is "optimal" if it's
2868 rf_ds_used_spare, as that means it should be
2869 directly substitutable for the disk it replaced.
2870 We note that too...
2871
2872 */
2873
2874 for(j=0;j<raidPtr->numCol;j++) {
2875 if (raidPtr->Disks[j].spareCol == sparecol) {
2876 scol = j;
2877 break;
2878 }
2879 }
2880
2881 /* XXX shouldn't *really* need this... */
2882 clabel = raidget_component_label(raidPtr, sparecol);
2883 /* make sure status is noted */
2884
2885 raid_init_component_label(raidPtr, clabel);
2886
2887 clabel->column = scol;
2888 clabel->status = rf_ds_optimal;
2889 clabel->last_unit = raidPtr->raidid;
2890
2891 raidflush_component_label(raidPtr, sparecol);
2892 if (final == RF_FINAL_COMPONENT_UPDATE) {
2893 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2894 raidmarkclean(raidPtr, sparecol);
2895 }
2896 }
2897 }
2898 }
2899 }
2900
2901 void
2902 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2903 {
2904
2905 if (vp != NULL) {
2906 if (auto_configured == 1) {
2907 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2908 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2909 vput(vp);
2910
2911 } else {
2912 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2913 }
2914 }
2915 }
2916
2917
2918 void
2919 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2920 {
2921 int r,c;
2922 struct vnode *vp;
2923 int acd;
2924
2925
2926 /* We take this opportunity to close the vnodes like we should.. */
2927
2928 for (c = 0; c < raidPtr->numCol; c++) {
2929 vp = raidPtr->raid_cinfo[c].ci_vp;
2930 acd = raidPtr->Disks[c].auto_configured;
2931 rf_close_component(raidPtr, vp, acd);
2932 raidPtr->raid_cinfo[c].ci_vp = NULL;
2933 raidPtr->Disks[c].auto_configured = 0;
2934 }
2935
2936 for (r = 0; r < raidPtr->numSpare; r++) {
2937 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2938 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2939 rf_close_component(raidPtr, vp, acd);
2940 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2941 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2942 }
2943 }
2944
2945
2946 void
2947 rf_ReconThread(struct rf_recon_req *req)
2948 {
2949 int s;
2950 RF_Raid_t *raidPtr;
2951
2952 s = splbio();
2953 raidPtr = (RF_Raid_t *) req->raidPtr;
2954 raidPtr->recon_in_progress = 1;
2955
2956 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2957 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2958
2959 RF_Free(req, sizeof(*req));
2960
2961 raidPtr->recon_in_progress = 0;
2962 splx(s);
2963
2964 /* That's all... */
2965 kthread_exit(0); /* does not return */
2966 }
2967
2968 void
2969 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2970 {
2971 int retcode;
2972 int s;
2973
2974 raidPtr->parity_rewrite_stripes_done = 0;
2975 raidPtr->parity_rewrite_in_progress = 1;
2976 s = splbio();
2977 retcode = rf_RewriteParity(raidPtr);
2978 splx(s);
2979 if (retcode) {
2980 printf("raid%d: Error re-writing parity (%d)!\n",
2981 raidPtr->raidid, retcode);
2982 } else {
2983 /* set the clean bit! If we shutdown correctly,
2984 the clean bit on each component label will get
2985 set */
2986 raidPtr->parity_good = RF_RAID_CLEAN;
2987 }
2988 raidPtr->parity_rewrite_in_progress = 0;
2989
2990 /* Anyone waiting for us to stop? If so, inform them... */
2991 if (raidPtr->waitShutdown) {
2992 wakeup(&raidPtr->parity_rewrite_in_progress);
2993 }
2994
2995 /* That's all... */
2996 kthread_exit(0); /* does not return */
2997 }
2998
2999
3000 void
3001 rf_CopybackThread(RF_Raid_t *raidPtr)
3002 {
3003 int s;
3004
3005 raidPtr->copyback_in_progress = 1;
3006 s = splbio();
3007 rf_CopybackReconstructedData(raidPtr);
3008 splx(s);
3009 raidPtr->copyback_in_progress = 0;
3010
3011 /* That's all... */
3012 kthread_exit(0); /* does not return */
3013 }
3014
3015
3016 void
3017 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
3018 {
3019 int s;
3020 RF_Raid_t *raidPtr;
3021
3022 s = splbio();
3023 raidPtr = req->raidPtr;
3024 raidPtr->recon_in_progress = 1;
3025 rf_ReconstructInPlace(raidPtr, req->col);
3026 RF_Free(req, sizeof(*req));
3027 raidPtr->recon_in_progress = 0;
3028 splx(s);
3029
3030 /* That's all... */
3031 kthread_exit(0); /* does not return */
3032 }
3033
3034 static RF_AutoConfig_t *
3035 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
3036 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
3037 unsigned secsize)
3038 {
3039 int good_one = 0;
3040 RF_ComponentLabel_t *clabel;
3041 RF_AutoConfig_t *ac;
3042
3043 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
3044 if (clabel == NULL) {
3045 oomem:
3046 while(ac_list) {
3047 ac = ac_list;
3048 if (ac->clabel)
3049 free(ac->clabel, M_RAIDFRAME);
3050 ac_list = ac_list->next;
3051 free(ac, M_RAIDFRAME);
3052 }
3053 printf("RAID auto config: out of memory!\n");
3054 return NULL; /* XXX probably should panic? */
3055 }
3056
3057 if (!raidread_component_label(secsize, dev, vp, clabel)) {
3058 /* Got the label. Does it look reasonable? */
3059 if (rf_reasonable_label(clabel, numsecs) &&
3060 (rf_component_label_partitionsize(clabel) <= size)) {
3061 #ifdef DEBUG
3062 printf("Component on: %s: %llu\n",
3063 cname, (unsigned long long)size);
3064 rf_print_component_label(clabel);
3065 #endif
3066 /* if it's reasonable, add it, else ignore it. */
3067 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
3068 M_NOWAIT);
3069 if (ac == NULL) {
3070 free(clabel, M_RAIDFRAME);
3071 goto oomem;
3072 }
3073 strlcpy(ac->devname, cname, sizeof(ac->devname));
3074 ac->dev = dev;
3075 ac->vp = vp;
3076 ac->clabel = clabel;
3077 ac->next = ac_list;
3078 ac_list = ac;
3079 good_one = 1;
3080 }
3081 }
3082 if (!good_one) {
3083 /* cleanup */
3084 free(clabel, M_RAIDFRAME);
3085 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3086 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3087 vput(vp);
3088 }
3089 return ac_list;
3090 }
3091
3092 RF_AutoConfig_t *
3093 rf_find_raid_components(void)
3094 {
3095 struct vnode *vp;
3096 struct disklabel label;
3097 device_t dv;
3098 deviter_t di;
3099 dev_t dev;
3100 int bmajor, bminor, wedge, rf_part_found;
3101 int error;
3102 int i;
3103 RF_AutoConfig_t *ac_list;
3104 uint64_t numsecs;
3105 unsigned secsize;
3106
3107 /* initialize the AutoConfig list */
3108 ac_list = NULL;
3109
3110 /* we begin by trolling through *all* the devices on the system */
3111
3112 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
3113 dv = deviter_next(&di)) {
3114
3115 /* we are only interested in disks... */
3116 if (device_class(dv) != DV_DISK)
3117 continue;
3118
3119 /* we don't care about floppies... */
3120 if (device_is_a(dv, "fd")) {
3121 continue;
3122 }
3123
3124 /* we don't care about CD's... */
3125 if (device_is_a(dv, "cd")) {
3126 continue;
3127 }
3128
3129 /* we don't care about md's... */
3130 if (device_is_a(dv, "md")) {
3131 continue;
3132 }
3133
3134 /* hdfd is the Atari/Hades floppy driver */
3135 if (device_is_a(dv, "hdfd")) {
3136 continue;
3137 }
3138
3139 /* fdisa is the Atari/Milan floppy driver */
3140 if (device_is_a(dv, "fdisa")) {
3141 continue;
3142 }
3143
3144 /* need to find the device_name_to_block_device_major stuff */
3145 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
3146
3147 rf_part_found = 0; /*No raid partition as yet*/
3148
3149 /* get a vnode for the raw partition of this disk */
3150
3151 wedge = device_is_a(dv, "dk");
3152 bminor = minor(device_unit(dv));
3153 dev = wedge ? makedev(bmajor, bminor) :
3154 MAKEDISKDEV(bmajor, bminor, RAW_PART);
3155 if (bdevvp(dev, &vp))
3156 panic("RAID can't alloc vnode");
3157
3158 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
3159
3160 if (error) {
3161 /* "Who cares." Continue looking
3162 for something that exists*/
3163 vput(vp);
3164 continue;
3165 }
3166
3167 error = getdisksize(vp, &numsecs, &secsize);
3168 if (error) {
3169 vput(vp);
3170 continue;
3171 }
3172 if (wedge) {
3173 struct dkwedge_info dkw;
3174 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
3175 NOCRED);
3176 if (error) {
3177 printf("RAIDframe: can't get wedge info for "
3178 "dev %s (%d)\n", device_xname(dv), error);
3179 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3180 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3181 vput(vp);
3182 continue;
3183 }
3184
3185 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
3186 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3187 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3188 vput(vp);
3189 continue;
3190 }
3191
3192 ac_list = rf_get_component(ac_list, dev, vp,
3193 device_xname(dv), dkw.dkw_size, numsecs, secsize);
3194 rf_part_found = 1; /*There is a raid component on this disk*/
3195 continue;
3196 }
3197
3198 /* Ok, the disk exists. Go get the disklabel. */
3199 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
3200 if (error) {
3201 /*
3202 * XXX can't happen - open() would
3203 * have errored out (or faked up one)
3204 */
3205 if (error != ENOTTY)
3206 printf("RAIDframe: can't get label for dev "
3207 "%s (%d)\n", device_xname(dv), error);
3208 }
3209
3210 /* don't need this any more. We'll allocate it again
3211 a little later if we really do... */
3212 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3213 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3214 vput(vp);
3215
3216 if (error)
3217 continue;
3218
3219 rf_part_found = 0; /*No raid partitions yet*/
3220 for (i = 0; i < label.d_npartitions; i++) {
3221 char cname[sizeof(ac_list->devname)];
3222
3223 /* We only support partitions marked as RAID */
3224 if (label.d_partitions[i].p_fstype != FS_RAID)
3225 continue;
3226
3227 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
3228 if (bdevvp(dev, &vp))
3229 panic("RAID can't alloc vnode");
3230
3231 error = VOP_OPEN(vp, FREAD, NOCRED);
3232 if (error) {
3233 /* Whatever... */
3234 vput(vp);
3235 continue;
3236 }
3237 snprintf(cname, sizeof(cname), "%s%c",
3238 device_xname(dv), 'a' + i);
3239 ac_list = rf_get_component(ac_list, dev, vp, cname,
3240 label.d_partitions[i].p_size, numsecs, secsize);
3241 rf_part_found = 1; /*There is at least one raid partition on this disk*/
3242 }
3243
3244 /*
3245 *If there is no raid component on this disk, either in a
3246 *disklabel or inside a wedge, check the raw partition as well,
3247 *as it is possible to configure raid components on raw disk
3248 *devices.
3249 */
3250
3251 if (!rf_part_found) {
3252 char cname[sizeof(ac_list->devname)];
3253
3254 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
3255 if (bdevvp(dev, &vp))
3256 panic("RAID can't alloc vnode");
3257
3258 error = VOP_OPEN(vp, FREAD, NOCRED);
3259 if (error) {
3260 /* Whatever... */
3261 vput(vp);
3262 continue;
3263 }
3264 snprintf(cname, sizeof(cname), "%s%c",
3265 device_xname(dv), 'a' + RAW_PART);
3266 ac_list = rf_get_component(ac_list, dev, vp, cname,
3267 label.d_partitions[RAW_PART].p_size, numsecs, secsize);
3268 }
3269 }
3270 deviter_release(&di);
3271 return ac_list;
3272 }
3273
3274
3275 int
3276 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3277 {
3278
3279 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
3280 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
3281 ((clabel->clean == RF_RAID_CLEAN) ||
3282 (clabel->clean == RF_RAID_DIRTY)) &&
3283 clabel->row >=0 &&
3284 clabel->column >= 0 &&
3285 clabel->num_rows > 0 &&
3286 clabel->num_columns > 0 &&
3287 clabel->row < clabel->num_rows &&
3288 clabel->column < clabel->num_columns &&
3289 clabel->blockSize > 0 &&
3290 /*
3291 * numBlocksHi may contain garbage, but it is ok since
3292 * the type is unsigned. If it is really garbage,
3293 * rf_fix_old_label_size() will fix it.
3294 */
3295 rf_component_label_numblocks(clabel) > 0) {
3296 /*
3297 * label looks reasonable enough...
3298 * let's make sure it has no old garbage.
3299 */
3300 if (numsecs)
3301 rf_fix_old_label_size(clabel, numsecs);
3302 return(1);
3303 }
3304 return(0);
3305 }
3306
3307
3308 /*
3309 * For reasons yet unknown, some old component labels have garbage in
3310 * the newer numBlocksHi region, and this causes lossage. Since those
3311 * disks will also have numsecs set to less than 32 bits of sectors,
3312 * we can determine when this corruption has occurred, and fix it.
3313 *
3314 * The exact same problem, with the same unknown reason, happens to
3315 * the partitionSizeHi member as well.
3316 */
3317 static void
3318 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3319 {
3320
3321 if (numsecs < ((uint64_t)1 << 32)) {
3322 if (clabel->numBlocksHi) {
3323 printf("WARNING: total sectors < 32 bits, yet "
3324 "numBlocksHi set\n"
3325 "WARNING: resetting numBlocksHi to zero.\n");
3326 clabel->numBlocksHi = 0;
3327 }
3328
3329 if (clabel->partitionSizeHi) {
3330 printf("WARNING: total sectors < 32 bits, yet "
3331 "partitionSizeHi set\n"
3332 "WARNING: resetting partitionSizeHi to zero.\n");
3333 clabel->partitionSizeHi = 0;
3334 }
3335 }
3336 }
3337
3338
3339 #ifdef DEBUG
3340 void
3341 rf_print_component_label(RF_ComponentLabel_t *clabel)
3342 {
3343 uint64_t numBlocks;
3344 static const char *rp[] = {
3345 "No", "Force", "Soft", "*invalid*"
3346 };
3347
3348
3349 numBlocks = rf_component_label_numblocks(clabel);
3350
3351 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3352 clabel->row, clabel->column,
3353 clabel->num_rows, clabel->num_columns);
3354 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3355 clabel->version, clabel->serial_number,
3356 clabel->mod_counter);
3357 printf(" Clean: %s Status: %d\n",
3358 clabel->clean ? "Yes" : "No", clabel->status);
3359 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3360 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3361 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3362 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3363 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3364 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]);
3365 printf(" Last configured as: raid%d\n", clabel->last_unit);
3366 #if 0
3367 printf(" Config order: %d\n", clabel->config_order);
3368 #endif
3369
3370 }
3371 #endif
3372
3373 RF_ConfigSet_t *
3374 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3375 {
3376 RF_AutoConfig_t *ac;
3377 RF_ConfigSet_t *config_sets;
3378 RF_ConfigSet_t *cset;
3379 RF_AutoConfig_t *ac_next;
3380
3381
3382 config_sets = NULL;
3383
3384 /* Go through the AutoConfig list, and figure out which components
3385 belong to what sets. */
3386 ac = ac_list;
3387 while(ac!=NULL) {
3388 /* we're going to putz with ac->next, so save it here
3389 for use at the end of the loop */
3390 ac_next = ac->next;
3391
3392 if (config_sets == NULL) {
3393 /* will need at least this one... */
3394 config_sets = (RF_ConfigSet_t *)
3395 malloc(sizeof(RF_ConfigSet_t),
3396 M_RAIDFRAME, M_NOWAIT);
3397 if (config_sets == NULL) {
3398 panic("rf_create_auto_sets: No memory!");
3399 }
3400 /* this one is easy :) */
3401 config_sets->ac = ac;
3402 config_sets->next = NULL;
3403 config_sets->rootable = 0;
3404 ac->next = NULL;
3405 } else {
3406 /* which set does this component fit into? */
3407 cset = config_sets;
3408 while(cset!=NULL) {
3409 if (rf_does_it_fit(cset, ac)) {
3410 /* looks like it matches... */
3411 ac->next = cset->ac;
3412 cset->ac = ac;
3413 break;
3414 }
3415 cset = cset->next;
3416 }
3417 if (cset==NULL) {
3418 /* didn't find a match above... new set..*/
3419 cset = (RF_ConfigSet_t *)
3420 malloc(sizeof(RF_ConfigSet_t),
3421 M_RAIDFRAME, M_NOWAIT);
3422 if (cset == NULL) {
3423 panic("rf_create_auto_sets: No memory!");
3424 }
3425 cset->ac = ac;
3426 ac->next = NULL;
3427 cset->next = config_sets;
3428 cset->rootable = 0;
3429 config_sets = cset;
3430 }
3431 }
3432 ac = ac_next;
3433 }
3434
3435
3436 return(config_sets);
3437 }
3438
3439 static int
3440 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3441 {
3442 RF_ComponentLabel_t *clabel1, *clabel2;
3443
3444 /* If this one matches the *first* one in the set, that's good
3445 enough, since the other members of the set would have been
3446 through here too... */
3447 /* note that we are not checking partitionSize here..
3448
3449 Note that we are also not checking the mod_counters here.
3450 If everything else matches except the mod_counter, that's
3451 good enough for this test. We will deal with the mod_counters
3452 a little later in the autoconfiguration process.
3453
3454 (clabel1->mod_counter == clabel2->mod_counter) &&
3455
3456 The reason we don't check for this is that failed disks
3457 will have lower modification counts. If those disks are
3458 not added to the set they used to belong to, then they will
3459 form their own set, which may result in 2 different sets,
3460 for example, competing to be configured at raid0, and
3461 perhaps competing to be the root filesystem set. If the
3462 wrong ones get configured, or both attempt to become /,
3463 weird behaviour and or serious lossage will occur. Thus we
3464 need to bring them into the fold here, and kick them out at
3465 a later point.
3466
3467 */
3468
3469 clabel1 = cset->ac->clabel;
3470 clabel2 = ac->clabel;
3471 if ((clabel1->version == clabel2->version) &&
3472 (clabel1->serial_number == clabel2->serial_number) &&
3473 (clabel1->num_rows == clabel2->num_rows) &&
3474 (clabel1->num_columns == clabel2->num_columns) &&
3475 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3476 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3477 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3478 (clabel1->parityConfig == clabel2->parityConfig) &&
3479 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3480 (clabel1->blockSize == clabel2->blockSize) &&
3481 rf_component_label_numblocks(clabel1) ==
3482 rf_component_label_numblocks(clabel2) &&
3483 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3484 (clabel1->root_partition == clabel2->root_partition) &&
3485 (clabel1->last_unit == clabel2->last_unit) &&
3486 (clabel1->config_order == clabel2->config_order)) {
3487 /* if it get's here, it almost *has* to be a match */
3488 } else {
3489 /* it's not consistent with somebody in the set..
3490 punt */
3491 return(0);
3492 }
3493 /* all was fine.. it must fit... */
3494 return(1);
3495 }
3496
3497 int
3498 rf_have_enough_components(RF_ConfigSet_t *cset)
3499 {
3500 RF_AutoConfig_t *ac;
3501 RF_AutoConfig_t *auto_config;
3502 RF_ComponentLabel_t *clabel;
3503 int c;
3504 int num_cols;
3505 int num_missing;
3506 int mod_counter;
3507 int mod_counter_found;
3508 int even_pair_failed;
3509 char parity_type;
3510
3511
3512 /* check to see that we have enough 'live' components
3513 of this set. If so, we can configure it if necessary */
3514
3515 num_cols = cset->ac->clabel->num_columns;
3516 parity_type = cset->ac->clabel->parityConfig;
3517
3518 /* XXX Check for duplicate components!?!?!? */
3519
3520 /* Determine what the mod_counter is supposed to be for this set. */
3521
3522 mod_counter_found = 0;
3523 mod_counter = 0;
3524 ac = cset->ac;
3525 while(ac!=NULL) {
3526 if (mod_counter_found==0) {
3527 mod_counter = ac->clabel->mod_counter;
3528 mod_counter_found = 1;
3529 } else {
3530 if (ac->clabel->mod_counter > mod_counter) {
3531 mod_counter = ac->clabel->mod_counter;
3532 }
3533 }
3534 ac = ac->next;
3535 }
3536
3537 num_missing = 0;
3538 auto_config = cset->ac;
3539
3540 even_pair_failed = 0;
3541 for(c=0; c<num_cols; c++) {
3542 ac = auto_config;
3543 while(ac!=NULL) {
3544 if ((ac->clabel->column == c) &&
3545 (ac->clabel->mod_counter == mod_counter)) {
3546 /* it's this one... */
3547 #ifdef DEBUG
3548 printf("Found: %s at %d\n",
3549 ac->devname,c);
3550 #endif
3551 break;
3552 }
3553 ac=ac->next;
3554 }
3555 if (ac==NULL) {
3556 /* Didn't find one here! */
3557 /* special case for RAID 1, especially
3558 where there are more than 2
3559 components (where RAIDframe treats
3560 things a little differently :( ) */
3561 if (parity_type == '1') {
3562 if (c%2 == 0) { /* even component */
3563 even_pair_failed = 1;
3564 } else { /* odd component. If
3565 we're failed, and
3566 so is the even
3567 component, it's
3568 "Good Night, Charlie" */
3569 if (even_pair_failed == 1) {
3570 return(0);
3571 }
3572 }
3573 } else {
3574 /* normal accounting */
3575 num_missing++;
3576 }
3577 }
3578 if ((parity_type == '1') && (c%2 == 1)) {
3579 /* Just did an even component, and we didn't
3580 bail.. reset the even_pair_failed flag,
3581 and go on to the next component.... */
3582 even_pair_failed = 0;
3583 }
3584 }
3585
3586 clabel = cset->ac->clabel;
3587
3588 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3589 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3590 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3591 /* XXX this needs to be made *much* more general */
3592 /* Too many failures */
3593 return(0);
3594 }
3595 /* otherwise, all is well, and we've got enough to take a kick
3596 at autoconfiguring this set */
3597 return(1);
3598 }
3599
3600 void
3601 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3602 RF_Raid_t *raidPtr)
3603 {
3604 RF_ComponentLabel_t *clabel;
3605 int i;
3606
3607 clabel = ac->clabel;
3608
3609 /* 1. Fill in the common stuff */
3610 config->numRow = clabel->num_rows = 1;
3611 config->numCol = clabel->num_columns;
3612 config->numSpare = 0; /* XXX should this be set here? */
3613 config->sectPerSU = clabel->sectPerSU;
3614 config->SUsPerPU = clabel->SUsPerPU;
3615 config->SUsPerRU = clabel->SUsPerRU;
3616 config->parityConfig = clabel->parityConfig;
3617 /* XXX... */
3618 strcpy(config->diskQueueType,"fifo");
3619 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3620 config->layoutSpecificSize = 0; /* XXX ?? */
3621
3622 while(ac!=NULL) {
3623 /* row/col values will be in range due to the checks
3624 in reasonable_label() */
3625 strcpy(config->devnames[0][ac->clabel->column],
3626 ac->devname);
3627 ac = ac->next;
3628 }
3629
3630 for(i=0;i<RF_MAXDBGV;i++) {
3631 config->debugVars[i][0] = 0;
3632 }
3633 }
3634
3635 int
3636 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3637 {
3638 RF_ComponentLabel_t *clabel;
3639 int column;
3640 int sparecol;
3641
3642 raidPtr->autoconfigure = new_value;
3643
3644 for(column=0; column<raidPtr->numCol; column++) {
3645 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3646 clabel = raidget_component_label(raidPtr, column);
3647 clabel->autoconfigure = new_value;
3648 raidflush_component_label(raidPtr, column);
3649 }
3650 }
3651 for(column = 0; column < raidPtr->numSpare ; column++) {
3652 sparecol = raidPtr->numCol + column;
3653 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3654 clabel = raidget_component_label(raidPtr, sparecol);
3655 clabel->autoconfigure = new_value;
3656 raidflush_component_label(raidPtr, sparecol);
3657 }
3658 }
3659 return(new_value);
3660 }
3661
3662 int
3663 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3664 {
3665 RF_ComponentLabel_t *clabel;
3666 int column;
3667 int sparecol;
3668
3669 raidPtr->root_partition = new_value;
3670 for(column=0; column<raidPtr->numCol; column++) {
3671 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3672 clabel = raidget_component_label(raidPtr, column);
3673 clabel->root_partition = new_value;
3674 raidflush_component_label(raidPtr, column);
3675 }
3676 }
3677 for(column = 0; column < raidPtr->numSpare ; column++) {
3678 sparecol = raidPtr->numCol + column;
3679 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3680 clabel = raidget_component_label(raidPtr, sparecol);
3681 clabel->root_partition = new_value;
3682 raidflush_component_label(raidPtr, sparecol);
3683 }
3684 }
3685 return(new_value);
3686 }
3687
3688 void
3689 rf_release_all_vps(RF_ConfigSet_t *cset)
3690 {
3691 RF_AutoConfig_t *ac;
3692
3693 ac = cset->ac;
3694 while(ac!=NULL) {
3695 /* Close the vp, and give it back */
3696 if (ac->vp) {
3697 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3698 VOP_CLOSE(ac->vp, FREAD, NOCRED);
3699 vput(ac->vp);
3700 ac->vp = NULL;
3701 }
3702 ac = ac->next;
3703 }
3704 }
3705
3706
3707 void
3708 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3709 {
3710 RF_AutoConfig_t *ac;
3711 RF_AutoConfig_t *next_ac;
3712
3713 ac = cset->ac;
3714 while(ac!=NULL) {
3715 next_ac = ac->next;
3716 /* nuke the label */
3717 free(ac->clabel, M_RAIDFRAME);
3718 /* cleanup the config structure */
3719 free(ac, M_RAIDFRAME);
3720 /* "next.." */
3721 ac = next_ac;
3722 }
3723 /* and, finally, nuke the config set */
3724 free(cset, M_RAIDFRAME);
3725 }
3726
3727
3728 void
3729 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3730 {
3731 /* current version number */
3732 clabel->version = RF_COMPONENT_LABEL_VERSION;
3733 clabel->serial_number = raidPtr->serial_number;
3734 clabel->mod_counter = raidPtr->mod_counter;
3735
3736 clabel->num_rows = 1;
3737 clabel->num_columns = raidPtr->numCol;
3738 clabel->clean = RF_RAID_DIRTY; /* not clean */
3739 clabel->status = rf_ds_optimal; /* "It's good!" */
3740
3741 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3742 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3743 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3744
3745 clabel->blockSize = raidPtr->bytesPerSector;
3746 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3747
3748 /* XXX not portable */
3749 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3750 clabel->maxOutstanding = raidPtr->maxOutstanding;
3751 clabel->autoconfigure = raidPtr->autoconfigure;
3752 clabel->root_partition = raidPtr->root_partition;
3753 clabel->last_unit = raidPtr->raidid;
3754 clabel->config_order = raidPtr->config_order;
3755
3756 #ifndef RF_NO_PARITY_MAP
3757 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3758 #endif
3759 }
3760
3761 struct raid_softc *
3762 rf_auto_config_set(RF_ConfigSet_t *cset)
3763 {
3764 RF_Raid_t *raidPtr;
3765 RF_Config_t *config;
3766 int raidID;
3767 struct raid_softc *sc;
3768
3769 #ifdef DEBUG
3770 printf("RAID autoconfigure\n");
3771 #endif
3772
3773 /* 1. Create a config structure */
3774 config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO);
3775 if (config == NULL) {
3776 printf("Out of mem!?!?\n");
3777 /* XXX do something more intelligent here. */
3778 return NULL;
3779 }
3780
3781 /*
3782 2. Figure out what RAID ID this one is supposed to live at
3783 See if we can get the same RAID dev that it was configured
3784 on last time..
3785 */
3786
3787 raidID = cset->ac->clabel->last_unit;
3788 for (sc = raidget(raidID); sc->sc_r.valid != 0; sc = raidget(++raidID))
3789 continue;
3790 #ifdef DEBUG
3791 printf("Configuring raid%d:\n",raidID);
3792 #endif
3793
3794 raidPtr = &sc->sc_r;
3795
3796 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3797 raidPtr->softc = sc;
3798 raidPtr->raidid = raidID;
3799 raidPtr->openings = RAIDOUTSTANDING;
3800
3801 /* 3. Build the configuration structure */
3802 rf_create_configuration(cset->ac, config, raidPtr);
3803
3804 /* 4. Do the configuration */
3805 if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3806 raidinit(sc);
3807
3808 rf_markalldirty(raidPtr);
3809 raidPtr->autoconfigure = 1; /* XXX do this here? */
3810 switch (cset->ac->clabel->root_partition) {
3811 case 1: /* Force Root */
3812 case 2: /* Soft Root: root when boot partition part of raid */
3813 /*
3814 * everything configured just fine. Make a note
3815 * that this set is eligible to be root,
3816 * or forced to be root
3817 */
3818 cset->rootable = cset->ac->clabel->root_partition;
3819 /* XXX do this here? */
3820 raidPtr->root_partition = cset->rootable;
3821 break;
3822 default:
3823 break;
3824 }
3825 } else {
3826 raidput(sc);
3827 sc = NULL;
3828 }
3829
3830 /* 5. Cleanup */
3831 free(config, M_RAIDFRAME);
3832 return sc;
3833 }
3834
3835 void
3836 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3837 {
3838 struct buf *bp;
3839 struct raid_softc *rs;
3840
3841 bp = (struct buf *)desc->bp;
3842 rs = desc->raidPtr->softc;
3843 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid),
3844 (bp->b_flags & B_READ));
3845 }
3846
3847 void
3848 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3849 size_t xmin, size_t xmax)
3850 {
3851 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3852 pool_sethiwat(p, xmax);
3853 pool_prime(p, xmin);
3854 pool_setlowat(p, xmin);
3855 }
3856
3857 /*
3858 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buf_queue to see
3859 * if there is IO pending and if that IO could possibly be done for a
3860 * given RAID set. Returns 0 if IO is waiting and can be done, 1
3861 * otherwise.
3862 *
3863 */
3864
3865 int
3866 rf_buf_queue_check(RF_Raid_t *raidPtr)
3867 {
3868 struct raid_softc *rs = raidPtr->softc;
3869 if ((bufq_peek(rs->buf_queue) != NULL) && raidPtr->openings > 0) {
3870 /* there is work to do */
3871 return 0;
3872 }
3873 /* default is nothing to do */
3874 return 1;
3875 }
3876
3877 int
3878 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3879 {
3880 uint64_t numsecs;
3881 unsigned secsize;
3882 int error;
3883
3884 error = getdisksize(vp, &numsecs, &secsize);
3885 if (error == 0) {
3886 diskPtr->blockSize = secsize;
3887 diskPtr->numBlocks = numsecs - rf_protectedSectors;
3888 diskPtr->partitionSize = numsecs;
3889 return 0;
3890 }
3891 return error;
3892 }
3893
3894 static int
3895 raid_match(device_t self, cfdata_t cfdata, void *aux)
3896 {
3897 return 1;
3898 }
3899
3900 static void
3901 raid_attach(device_t parent, device_t self, void *aux)
3902 {
3903
3904 }
3905
3906
3907 static int
3908 raid_detach(device_t self, int flags)
3909 {
3910 int error;
3911 struct raid_softc *rs = raidget(device_unit(self));
3912
3913 if (rs == NULL)
3914 return ENXIO;
3915
3916 if ((error = raidlock(rs)) != 0)
3917 return (error);
3918
3919 error = raid_detach_unlocked(rs);
3920
3921 raidunlock(rs);
3922
3923 /* XXXkd: raidput(rs) ??? */
3924
3925 return error;
3926 }
3927
3928 static void
3929 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
3930 {
3931 struct disk_geom *dg = &rs->sc_dkdev.dk_geom;
3932
3933 memset(dg, 0, sizeof(*dg));
3934
3935 dg->dg_secperunit = raidPtr->totalSectors;
3936 dg->dg_secsize = raidPtr->bytesPerSector;
3937 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
3938 dg->dg_ntracks = 4 * raidPtr->numCol;
3939
3940 disk_set_info(rs->sc_dev, &rs->sc_dkdev, NULL);
3941 }
3942
3943 /*
3944 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3945 * We end up returning whatever error was returned by the first cache flush
3946 * that fails.
3947 */
3948
3949 int
3950 rf_sync_component_caches(RF_Raid_t *raidPtr)
3951 {
3952 int c, sparecol;
3953 int e,error;
3954 int force = 1;
3955
3956 error = 0;
3957 for (c = 0; c < raidPtr->numCol; c++) {
3958 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3959 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3960 &force, FWRITE, NOCRED);
3961 if (e) {
3962 if (e != ENODEV)
3963 printf("raid%d: cache flush to component %s failed.\n",
3964 raidPtr->raidid, raidPtr->Disks[c].devname);
3965 if (error == 0) {
3966 error = e;
3967 }
3968 }
3969 }
3970 }
3971
3972 for( c = 0; c < raidPtr->numSpare ; c++) {
3973 sparecol = raidPtr->numCol + c;
3974 /* Need to ensure that the reconstruct actually completed! */
3975 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3976 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3977 DIOCCACHESYNC, &force, FWRITE, NOCRED);
3978 if (e) {
3979 if (e != ENODEV)
3980 printf("raid%d: cache flush to component %s failed.\n",
3981 raidPtr->raidid, raidPtr->Disks[sparecol].devname);
3982 if (error == 0) {
3983 error = e;
3984 }
3985 }
3986 }
3987 }
3988 return error;
3989 }
3990