rf_netbsdkintf.c revision 1.312 1 /* $NetBSD: rf_netbsdkintf.c,v 1.312 2014/07/25 08:10:38 dholland Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: cd.c 1.6 90/11/28$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
89 * School of Computer Science
90 * Carnegie Mellon University
91 * Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97 /***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.312 2014/07/25 08:10:38 dholland Exp $");
105
106 #ifdef _KERNEL_OPT
107 #include "opt_compat_netbsd.h"
108 #include "opt_raid_autoconfig.h"
109 #endif
110
111 #include <sys/param.h>
112 #include <sys/errno.h>
113 #include <sys/pool.h>
114 #include <sys/proc.h>
115 #include <sys/queue.h>
116 #include <sys/disk.h>
117 #include <sys/device.h>
118 #include <sys/stat.h>
119 #include <sys/ioctl.h>
120 #include <sys/fcntl.h>
121 #include <sys/systm.h>
122 #include <sys/vnode.h>
123 #include <sys/disklabel.h>
124 #include <sys/conf.h>
125 #include <sys/buf.h>
126 #include <sys/bufq.h>
127 #include <sys/reboot.h>
128 #include <sys/kauth.h>
129
130 #include <prop/proplib.h>
131
132 #include <dev/raidframe/raidframevar.h>
133 #include <dev/raidframe/raidframeio.h>
134 #include <dev/raidframe/rf_paritymap.h>
135
136 #include "rf_raid.h"
137 #include "rf_copyback.h"
138 #include "rf_dag.h"
139 #include "rf_dagflags.h"
140 #include "rf_desc.h"
141 #include "rf_diskqueue.h"
142 #include "rf_etimer.h"
143 #include "rf_general.h"
144 #include "rf_kintf.h"
145 #include "rf_options.h"
146 #include "rf_driver.h"
147 #include "rf_parityscan.h"
148 #include "rf_threadstuff.h"
149
150 #ifdef COMPAT_50
151 #include "rf_compat50.h"
152 #endif
153
154 #ifdef DEBUG
155 int rf_kdebug_level = 0;
156 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
157 #else /* DEBUG */
158 #define db1_printf(a) { }
159 #endif /* DEBUG */
160
161 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
162 static rf_declare_mutex2(rf_sparet_wait_mutex);
163 static rf_declare_cond2(rf_sparet_wait_cv);
164 static rf_declare_cond2(rf_sparet_resp_cv);
165
166 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
167 * spare table */
168 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
169 * installation process */
170 #endif
171
172 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
173
174 /* prototypes */
175 static void KernelWakeupFunc(struct buf *);
176 static void InitBP(struct buf *, struct vnode *, unsigned,
177 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
178 void *, int, struct proc *);
179 struct raid_softc;
180 static void raidinit(struct raid_softc *);
181
182 void raidattach(int);
183 static int raid_match(device_t, cfdata_t, void *);
184 static void raid_attach(device_t, device_t, void *);
185 static int raid_detach(device_t, int);
186
187 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
188 daddr_t, daddr_t);
189 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
190 daddr_t, daddr_t, int);
191
192 static int raidwrite_component_label(unsigned,
193 dev_t, struct vnode *, RF_ComponentLabel_t *);
194 static int raidread_component_label(unsigned,
195 dev_t, struct vnode *, RF_ComponentLabel_t *);
196
197
198 dev_type_open(raidopen);
199 dev_type_close(raidclose);
200 dev_type_read(raidread);
201 dev_type_write(raidwrite);
202 dev_type_ioctl(raidioctl);
203 dev_type_strategy(raidstrategy);
204 dev_type_dump(raiddump);
205 dev_type_size(raidsize);
206
207 const struct bdevsw raid_bdevsw = {
208 .d_open = raidopen,
209 .d_close = raidclose,
210 .d_strategy = raidstrategy,
211 .d_ioctl = raidioctl,
212 .d_dump = raiddump,
213 .d_psize = raidsize,
214 .d_discard = nodiscard,
215 .d_flag = D_DISK
216 };
217
218 const struct cdevsw raid_cdevsw = {
219 .d_open = raidopen,
220 .d_close = raidclose,
221 .d_read = raidread,
222 .d_write = raidwrite,
223 .d_ioctl = raidioctl,
224 .d_stop = nostop,
225 .d_tty = notty,
226 .d_poll = nopoll,
227 .d_mmap = nommap,
228 .d_kqfilter = nokqfilter,
229 .d_discard = nodiscard,
230 .d_flag = D_DISK
231 };
232
233 static struct dkdriver rf_dkdriver = { raidstrategy, minphys };
234
235 struct raid_softc {
236 device_t sc_dev;
237 int sc_unit;
238 int sc_flags; /* flags */
239 int sc_cflags; /* configuration flags */
240 uint64_t sc_size; /* size of the raid device */
241 char sc_xname[20]; /* XXX external name */
242 struct disk sc_dkdev; /* generic disk device info */
243 struct bufq_state *buf_queue; /* used for the device queue */
244 RF_Raid_t sc_r;
245 LIST_ENTRY(raid_softc) sc_link;
246 };
247 /* sc_flags */
248 #define RAIDF_INITED 0x01 /* unit has been initialized */
249 #define RAIDF_WLABEL 0x02 /* label area is writable */
250 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
251 #define RAIDF_SHUTDOWN 0x08 /* unit is being shutdown */
252 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
253 #define RAIDF_LOCKED 0x80 /* unit is locked */
254
255 #define raidunit(x) DISKUNIT(x)
256
257 extern struct cfdriver raid_cd;
258 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
259 raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
260 DVF_DETACH_SHUTDOWN);
261
262 /*
263 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
264 * Be aware that large numbers can allow the driver to consume a lot of
265 * kernel memory, especially on writes, and in degraded mode reads.
266 *
267 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
268 * a single 64K write will typically require 64K for the old data,
269 * 64K for the old parity, and 64K for the new parity, for a total
270 * of 192K (if the parity buffer is not re-used immediately).
271 * Even it if is used immediately, that's still 128K, which when multiplied
272 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
273 *
274 * Now in degraded mode, for example, a 64K read on the above setup may
275 * require data reconstruction, which will require *all* of the 4 remaining
276 * disks to participate -- 4 * 32K/disk == 128K again.
277 */
278
279 #ifndef RAIDOUTSTANDING
280 #define RAIDOUTSTANDING 6
281 #endif
282
283 #define RAIDLABELDEV(dev) \
284 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
285
286 /* declared here, and made public, for the benefit of KVM stuff.. */
287
288 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
289 struct disklabel *);
290 static void raidgetdisklabel(dev_t);
291 static void raidmakedisklabel(struct raid_softc *);
292
293 static int raidlock(struct raid_softc *);
294 static void raidunlock(struct raid_softc *);
295
296 static int raid_detach_unlocked(struct raid_softc *);
297
298 static void rf_markalldirty(RF_Raid_t *);
299 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
300
301 void rf_ReconThread(struct rf_recon_req *);
302 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
303 void rf_CopybackThread(RF_Raid_t *raidPtr);
304 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
305 int rf_autoconfig(device_t);
306 void rf_buildroothack(RF_ConfigSet_t *);
307
308 RF_AutoConfig_t *rf_find_raid_components(void);
309 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
310 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
311 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
312 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
313 int rf_set_autoconfig(RF_Raid_t *, int);
314 int rf_set_rootpartition(RF_Raid_t *, int);
315 void rf_release_all_vps(RF_ConfigSet_t *);
316 void rf_cleanup_config_set(RF_ConfigSet_t *);
317 int rf_have_enough_components(RF_ConfigSet_t *);
318 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
319 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
320
321 /*
322 * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
323 * Note that this is overridden by having RAID_AUTOCONFIG as an option
324 * in the kernel config file.
325 */
326 #ifdef RAID_AUTOCONFIG
327 int raidautoconfig = 1;
328 #else
329 int raidautoconfig = 0;
330 #endif
331 static bool raidautoconfigdone = false;
332
333 struct RF_Pools_s rf_pools;
334
335 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
336 static kmutex_t raid_lock;
337
338 static struct raid_softc *
339 raidcreate(int unit) {
340 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
341 if (sc == NULL) {
342 #ifdef DIAGNOSTIC
343 printf("%s: out of memory\n", __func__);
344 #endif
345 return NULL;
346 }
347 sc->sc_unit = unit;
348 bufq_alloc(&sc->buf_queue, "fcfs", BUFQ_SORT_RAWBLOCK);
349 return sc;
350 }
351
352 static void
353 raiddestroy(struct raid_softc *sc) {
354 bufq_free(sc->buf_queue);
355 kmem_free(sc, sizeof(*sc));
356 }
357
358 static struct raid_softc *
359 raidget(int unit) {
360 struct raid_softc *sc;
361 if (unit < 0) {
362 #ifdef DIAGNOSTIC
363 panic("%s: unit %d!", __func__, unit);
364 #endif
365 return NULL;
366 }
367 mutex_enter(&raid_lock);
368 LIST_FOREACH(sc, &raids, sc_link) {
369 if (sc->sc_unit == unit) {
370 mutex_exit(&raid_lock);
371 return sc;
372 }
373 }
374 mutex_exit(&raid_lock);
375 if ((sc = raidcreate(unit)) == NULL)
376 return NULL;
377 mutex_enter(&raid_lock);
378 LIST_INSERT_HEAD(&raids, sc, sc_link);
379 mutex_exit(&raid_lock);
380 return sc;
381 }
382
383 static void
384 raidput(struct raid_softc *sc) {
385 mutex_enter(&raid_lock);
386 LIST_REMOVE(sc, sc_link);
387 mutex_exit(&raid_lock);
388 raiddestroy(sc);
389 }
390
391 void
392 raidattach(int num)
393 {
394 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
395 /* This is where all the initialization stuff gets done. */
396
397 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
398 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
399 rf_init_cond2(rf_sparet_wait_cv, "sparetw");
400 rf_init_cond2(rf_sparet_resp_cv, "rfgst");
401
402 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
403 #endif
404
405 if (rf_BootRaidframe() == 0)
406 aprint_verbose("Kernelized RAIDframe activated\n");
407 else
408 panic("Serious error booting RAID!!");
409
410 if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) {
411 aprint_error("raidattach: config_cfattach_attach failed?\n");
412 }
413
414 raidautoconfigdone = false;
415
416 /*
417 * Register a finalizer which will be used to auto-config RAID
418 * sets once all real hardware devices have been found.
419 */
420 if (config_finalize_register(NULL, rf_autoconfig) != 0)
421 aprint_error("WARNING: unable to register RAIDframe finalizer\n");
422 }
423
424 int
425 rf_autoconfig(device_t self)
426 {
427 RF_AutoConfig_t *ac_list;
428 RF_ConfigSet_t *config_sets;
429
430 if (!raidautoconfig || raidautoconfigdone == true)
431 return (0);
432
433 /* XXX This code can only be run once. */
434 raidautoconfigdone = true;
435
436 #ifdef __HAVE_CPU_BOOTCONF
437 /*
438 * 0. find the boot device if needed first so we can use it later
439 * this needs to be done before we autoconfigure any raid sets,
440 * because if we use wedges we are not going to be able to open
441 * the boot device later
442 */
443 if (booted_device == NULL)
444 cpu_bootconf();
445 #endif
446 /* 1. locate all RAID components on the system */
447 aprint_debug("Searching for RAID components...\n");
448 ac_list = rf_find_raid_components();
449
450 /* 2. Sort them into their respective sets. */
451 config_sets = rf_create_auto_sets(ac_list);
452
453 /*
454 * 3. Evaluate each set and configure the valid ones.
455 * This gets done in rf_buildroothack().
456 */
457 rf_buildroothack(config_sets);
458
459 return 1;
460 }
461
462 static int
463 rf_containsboot(RF_Raid_t *r, device_t bdv) {
464 const char *bootname = device_xname(bdv);
465 size_t len = strlen(bootname);
466
467 for (int col = 0; col < r->numCol; col++) {
468 const char *devname = r->Disks[col].devname;
469 devname += sizeof("/dev/") - 1;
470 if (strncmp(devname, "dk", 2) == 0) {
471 const char *parent =
472 dkwedge_get_parent_name(r->Disks[col].dev);
473 if (parent != NULL)
474 devname = parent;
475 }
476 if (strncmp(devname, bootname, len) == 0) {
477 struct raid_softc *sc = r->softc;
478 aprint_debug("raid%d includes boot device %s\n",
479 sc->sc_unit, devname);
480 return 1;
481 }
482 }
483 return 0;
484 }
485
486 void
487 rf_buildroothack(RF_ConfigSet_t *config_sets)
488 {
489 RF_ConfigSet_t *cset;
490 RF_ConfigSet_t *next_cset;
491 int num_root;
492 struct raid_softc *sc, *rsc;
493
494 sc = rsc = NULL;
495 num_root = 0;
496 cset = config_sets;
497 while (cset != NULL) {
498 next_cset = cset->next;
499 if (rf_have_enough_components(cset) &&
500 cset->ac->clabel->autoconfigure == 1) {
501 sc = rf_auto_config_set(cset);
502 if (sc != NULL) {
503 aprint_debug("raid%d: configured ok\n",
504 sc->sc_unit);
505 if (cset->rootable) {
506 rsc = sc;
507 num_root++;
508 }
509 } else {
510 /* The autoconfig didn't work :( */
511 aprint_debug("Autoconfig failed\n");
512 rf_release_all_vps(cset);
513 }
514 } else {
515 /* we're not autoconfiguring this set...
516 release the associated resources */
517 rf_release_all_vps(cset);
518 }
519 /* cleanup */
520 rf_cleanup_config_set(cset);
521 cset = next_cset;
522 }
523
524 /* if the user has specified what the root device should be
525 then we don't touch booted_device or boothowto... */
526
527 if (rootspec != NULL)
528 return;
529
530 /* we found something bootable... */
531
532 /*
533 * XXX: The following code assumes that the root raid
534 * is the first ('a') partition. This is about the best
535 * we can do with a BSD disklabel, but we might be able
536 * to do better with a GPT label, by setting a specified
537 * attribute to indicate the root partition. We can then
538 * stash the partition number in the r->root_partition
539 * high bits (the bottom 2 bits are already used). For
540 * now we just set booted_partition to 0 when we override
541 * root.
542 */
543 if (num_root == 1) {
544 device_t candidate_root;
545 if (rsc->sc_dkdev.dk_nwedges != 0) {
546 char cname[sizeof(cset->ac->devname)];
547 /* XXX: assume 'a' */
548 snprintf(cname, sizeof(cname), "%s%c",
549 device_xname(rsc->sc_dev), 'a');
550 candidate_root = dkwedge_find_by_wname(cname);
551 } else
552 candidate_root = rsc->sc_dev;
553 if (booted_device == NULL ||
554 rsc->sc_r.root_partition == 1 ||
555 rf_containsboot(&rsc->sc_r, booted_device)) {
556 booted_device = candidate_root;
557 booted_partition = 0; /* XXX assume 'a' */
558 }
559 } else if (num_root > 1) {
560
561 /*
562 * Maybe the MD code can help. If it cannot, then
563 * setroot() will discover that we have no
564 * booted_device and will ask the user if nothing was
565 * hardwired in the kernel config file
566 */
567 if (booted_device == NULL)
568 return;
569
570 num_root = 0;
571 mutex_enter(&raid_lock);
572 LIST_FOREACH(sc, &raids, sc_link) {
573 RF_Raid_t *r = &sc->sc_r;
574 if (r->valid == 0)
575 continue;
576
577 if (r->root_partition == 0)
578 continue;
579
580 if (rf_containsboot(r, booted_device)) {
581 num_root++;
582 rsc = sc;
583 }
584 }
585 mutex_exit(&raid_lock);
586
587 if (num_root == 1) {
588 booted_device = rsc->sc_dev;
589 booted_partition = 0; /* XXX assume 'a' */
590 } else {
591 /* we can't guess.. require the user to answer... */
592 boothowto |= RB_ASKNAME;
593 }
594 }
595 }
596
597
598 int
599 raidsize(dev_t dev)
600 {
601 struct raid_softc *rs;
602 struct disklabel *lp;
603 int part, unit, omask, size;
604
605 unit = raidunit(dev);
606 if ((rs = raidget(unit)) == NULL)
607 return -1;
608 if ((rs->sc_flags & RAIDF_INITED) == 0)
609 return (-1);
610
611 part = DISKPART(dev);
612 omask = rs->sc_dkdev.dk_openmask & (1 << part);
613 lp = rs->sc_dkdev.dk_label;
614
615 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
616 return (-1);
617
618 if (lp->d_partitions[part].p_fstype != FS_SWAP)
619 size = -1;
620 else
621 size = lp->d_partitions[part].p_size *
622 (lp->d_secsize / DEV_BSIZE);
623
624 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
625 return (-1);
626
627 return (size);
628
629 }
630
631 int
632 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
633 {
634 int unit = raidunit(dev);
635 struct raid_softc *rs;
636 const struct bdevsw *bdev;
637 struct disklabel *lp;
638 RF_Raid_t *raidPtr;
639 daddr_t offset;
640 int part, c, sparecol, j, scol, dumpto;
641 int error = 0;
642
643 if ((rs = raidget(unit)) == NULL)
644 return ENXIO;
645
646 raidPtr = &rs->sc_r;
647
648 if ((rs->sc_flags & RAIDF_INITED) == 0)
649 return ENXIO;
650
651 /* we only support dumping to RAID 1 sets */
652 if (raidPtr->Layout.numDataCol != 1 ||
653 raidPtr->Layout.numParityCol != 1)
654 return EINVAL;
655
656
657 if ((error = raidlock(rs)) != 0)
658 return error;
659
660 if (size % DEV_BSIZE != 0) {
661 error = EINVAL;
662 goto out;
663 }
664
665 if (blkno + size / DEV_BSIZE > rs->sc_size) {
666 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
667 "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
668 size / DEV_BSIZE, rs->sc_size);
669 error = EINVAL;
670 goto out;
671 }
672
673 part = DISKPART(dev);
674 lp = rs->sc_dkdev.dk_label;
675 offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
676
677 /* figure out what device is alive.. */
678
679 /*
680 Look for a component to dump to. The preference for the
681 component to dump to is as follows:
682 1) the master
683 2) a used_spare of the master
684 3) the slave
685 4) a used_spare of the slave
686 */
687
688 dumpto = -1;
689 for (c = 0; c < raidPtr->numCol; c++) {
690 if (raidPtr->Disks[c].status == rf_ds_optimal) {
691 /* this might be the one */
692 dumpto = c;
693 break;
694 }
695 }
696
697 /*
698 At this point we have possibly selected a live master or a
699 live slave. We now check to see if there is a spared
700 master (or a spared slave), if we didn't find a live master
701 or a live slave.
702 */
703
704 for (c = 0; c < raidPtr->numSpare; c++) {
705 sparecol = raidPtr->numCol + c;
706 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
707 /* How about this one? */
708 scol = -1;
709 for(j=0;j<raidPtr->numCol;j++) {
710 if (raidPtr->Disks[j].spareCol == sparecol) {
711 scol = j;
712 break;
713 }
714 }
715 if (scol == 0) {
716 /*
717 We must have found a spared master!
718 We'll take that over anything else
719 found so far. (We couldn't have
720 found a real master before, since
721 this is a used spare, and it's
722 saying that it's replacing the
723 master.) On reboot (with
724 autoconfiguration turned on)
725 sparecol will become the 1st
726 component (component0) of this set.
727 */
728 dumpto = sparecol;
729 break;
730 } else if (scol != -1) {
731 /*
732 Must be a spared slave. We'll dump
733 to that if we havn't found anything
734 else so far.
735 */
736 if (dumpto == -1)
737 dumpto = sparecol;
738 }
739 }
740 }
741
742 if (dumpto == -1) {
743 /* we couldn't find any live components to dump to!?!?
744 */
745 error = EINVAL;
746 goto out;
747 }
748
749 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
750
751 /*
752 Note that blkno is relative to this particular partition.
753 By adding the offset of this partition in the RAID
754 set, and also adding RF_PROTECTED_SECTORS, we get a
755 value that is relative to the partition used for the
756 underlying component.
757 */
758
759 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
760 blkno + offset, va, size);
761
762 out:
763 raidunlock(rs);
764
765 return error;
766 }
767 /* ARGSUSED */
768 int
769 raidopen(dev_t dev, int flags, int fmt,
770 struct lwp *l)
771 {
772 int unit = raidunit(dev);
773 struct raid_softc *rs;
774 struct disklabel *lp;
775 int part, pmask;
776 int error = 0;
777
778 if ((rs = raidget(unit)) == NULL)
779 return ENXIO;
780 if ((error = raidlock(rs)) != 0)
781 return (error);
782
783 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
784 error = EBUSY;
785 goto bad;
786 }
787
788 lp = rs->sc_dkdev.dk_label;
789
790 part = DISKPART(dev);
791
792 /*
793 * If there are wedges, and this is not RAW_PART, then we
794 * need to fail.
795 */
796 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
797 error = EBUSY;
798 goto bad;
799 }
800 pmask = (1 << part);
801
802 if ((rs->sc_flags & RAIDF_INITED) &&
803 (rs->sc_dkdev.dk_openmask == 0))
804 raidgetdisklabel(dev);
805
806 /* make sure that this partition exists */
807
808 if (part != RAW_PART) {
809 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
810 ((part >= lp->d_npartitions) ||
811 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
812 error = ENXIO;
813 goto bad;
814 }
815 }
816 /* Prevent this unit from being unconfigured while open. */
817 switch (fmt) {
818 case S_IFCHR:
819 rs->sc_dkdev.dk_copenmask |= pmask;
820 break;
821
822 case S_IFBLK:
823 rs->sc_dkdev.dk_bopenmask |= pmask;
824 break;
825 }
826
827 if ((rs->sc_dkdev.dk_openmask == 0) &&
828 ((rs->sc_flags & RAIDF_INITED) != 0)) {
829 /* First one... mark things as dirty... Note that we *MUST*
830 have done a configure before this. I DO NOT WANT TO BE
831 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
832 THAT THEY BELONG TOGETHER!!!!! */
833 /* XXX should check to see if we're only open for reading
834 here... If so, we needn't do this, but then need some
835 other way of keeping track of what's happened.. */
836
837 rf_markalldirty(&rs->sc_r);
838 }
839
840
841 rs->sc_dkdev.dk_openmask =
842 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
843
844 bad:
845 raidunlock(rs);
846
847 return (error);
848
849
850 }
851 /* ARGSUSED */
852 int
853 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
854 {
855 int unit = raidunit(dev);
856 struct raid_softc *rs;
857 int error = 0;
858 int part;
859
860 if ((rs = raidget(unit)) == NULL)
861 return ENXIO;
862
863 if ((error = raidlock(rs)) != 0)
864 return (error);
865
866 part = DISKPART(dev);
867
868 /* ...that much closer to allowing unconfiguration... */
869 switch (fmt) {
870 case S_IFCHR:
871 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
872 break;
873
874 case S_IFBLK:
875 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
876 break;
877 }
878 rs->sc_dkdev.dk_openmask =
879 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
880
881 if ((rs->sc_dkdev.dk_openmask == 0) &&
882 ((rs->sc_flags & RAIDF_INITED) != 0)) {
883 /* Last one... device is not unconfigured yet.
884 Device shutdown has taken care of setting the
885 clean bits if RAIDF_INITED is not set
886 mark things as clean... */
887
888 rf_update_component_labels(&rs->sc_r,
889 RF_FINAL_COMPONENT_UPDATE);
890
891 /* If the kernel is shutting down, it will detach
892 * this RAID set soon enough.
893 */
894 }
895
896 raidunlock(rs);
897 return (0);
898
899 }
900
901 void
902 raidstrategy(struct buf *bp)
903 {
904 unsigned int unit = raidunit(bp->b_dev);
905 RF_Raid_t *raidPtr;
906 int wlabel;
907 struct raid_softc *rs;
908
909 if ((rs = raidget(unit)) == NULL) {
910 bp->b_error = ENXIO;
911 goto done;
912 }
913 if ((rs->sc_flags & RAIDF_INITED) == 0) {
914 bp->b_error = ENXIO;
915 goto done;
916 }
917 raidPtr = &rs->sc_r;
918 if (!raidPtr->valid) {
919 bp->b_error = ENODEV;
920 goto done;
921 }
922 if (bp->b_bcount == 0) {
923 db1_printf(("b_bcount is zero..\n"));
924 goto done;
925 }
926
927 /*
928 * Do bounds checking and adjust transfer. If there's an
929 * error, the bounds check will flag that for us.
930 */
931
932 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
933 if (DISKPART(bp->b_dev) == RAW_PART) {
934 uint64_t size; /* device size in DEV_BSIZE unit */
935
936 if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
937 size = raidPtr->totalSectors <<
938 (raidPtr->logBytesPerSector - DEV_BSHIFT);
939 } else {
940 size = raidPtr->totalSectors >>
941 (DEV_BSHIFT - raidPtr->logBytesPerSector);
942 }
943 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
944 goto done;
945 }
946 } else {
947 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
948 db1_printf(("Bounds check failed!!:%d %d\n",
949 (int) bp->b_blkno, (int) wlabel));
950 goto done;
951 }
952 }
953
954 rf_lock_mutex2(raidPtr->iodone_lock);
955
956 bp->b_resid = 0;
957
958 /* stuff it onto our queue */
959 bufq_put(rs->buf_queue, bp);
960
961 /* scheduled the IO to happen at the next convenient time */
962 rf_signal_cond2(raidPtr->iodone_cv);
963 rf_unlock_mutex2(raidPtr->iodone_lock);
964
965 return;
966
967 done:
968 bp->b_resid = bp->b_bcount;
969 biodone(bp);
970 }
971 /* ARGSUSED */
972 int
973 raidread(dev_t dev, struct uio *uio, int flags)
974 {
975 int unit = raidunit(dev);
976 struct raid_softc *rs;
977
978 if ((rs = raidget(unit)) == NULL)
979 return ENXIO;
980
981 if ((rs->sc_flags & RAIDF_INITED) == 0)
982 return (ENXIO);
983
984 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
985
986 }
987 /* ARGSUSED */
988 int
989 raidwrite(dev_t dev, struct uio *uio, int flags)
990 {
991 int unit = raidunit(dev);
992 struct raid_softc *rs;
993
994 if ((rs = raidget(unit)) == NULL)
995 return ENXIO;
996
997 if ((rs->sc_flags & RAIDF_INITED) == 0)
998 return (ENXIO);
999
1000 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
1001
1002 }
1003
1004 static int
1005 raid_detach_unlocked(struct raid_softc *rs)
1006 {
1007 int error;
1008 RF_Raid_t *raidPtr;
1009
1010 raidPtr = &rs->sc_r;
1011
1012 /*
1013 * If somebody has a partition mounted, we shouldn't
1014 * shutdown.
1015 */
1016 if (rs->sc_dkdev.dk_openmask != 0)
1017 return EBUSY;
1018
1019 if ((rs->sc_flags & RAIDF_INITED) == 0)
1020 ; /* not initialized: nothing to do */
1021 else if ((error = rf_Shutdown(raidPtr)) != 0)
1022 return error;
1023 else
1024 rs->sc_flags &= ~(RAIDF_INITED|RAIDF_SHUTDOWN);
1025
1026 /* Detach the disk. */
1027 dkwedge_delall(&rs->sc_dkdev);
1028 disk_detach(&rs->sc_dkdev);
1029 disk_destroy(&rs->sc_dkdev);
1030
1031 aprint_normal_dev(rs->sc_dev, "detached\n");
1032
1033 return 0;
1034 }
1035
1036 int
1037 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1038 {
1039 int unit = raidunit(dev);
1040 int error = 0;
1041 int part, pmask, s;
1042 cfdata_t cf;
1043 struct raid_softc *rs;
1044 RF_Config_t *k_cfg, *u_cfg;
1045 RF_Raid_t *raidPtr;
1046 RF_RaidDisk_t *diskPtr;
1047 RF_AccTotals_t *totals;
1048 RF_DeviceConfig_t *d_cfg, **ucfgp;
1049 u_char *specific_buf;
1050 int retcode = 0;
1051 int column;
1052 /* int raidid; */
1053 struct rf_recon_req *rrcopy, *rr;
1054 RF_ComponentLabel_t *clabel;
1055 RF_ComponentLabel_t *ci_label;
1056 RF_ComponentLabel_t **clabel_ptr;
1057 RF_SingleComponent_t *sparePtr,*componentPtr;
1058 RF_SingleComponent_t component;
1059 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
1060 int i, j, d;
1061 #ifdef __HAVE_OLD_DISKLABEL
1062 struct disklabel newlabel;
1063 #endif
1064 struct dkwedge_info *dkw;
1065
1066 if ((rs = raidget(unit)) == NULL)
1067 return ENXIO;
1068 raidPtr = &rs->sc_r;
1069
1070 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1071 (int) DISKPART(dev), (int) unit, cmd));
1072
1073 /* Must be open for writes for these commands... */
1074 switch (cmd) {
1075 #ifdef DIOCGSECTORSIZE
1076 case DIOCGSECTORSIZE:
1077 *(u_int *)data = raidPtr->bytesPerSector;
1078 return 0;
1079 case DIOCGMEDIASIZE:
1080 *(off_t *)data =
1081 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
1082 return 0;
1083 #endif
1084 case DIOCSDINFO:
1085 case DIOCWDINFO:
1086 #ifdef __HAVE_OLD_DISKLABEL
1087 case ODIOCWDINFO:
1088 case ODIOCSDINFO:
1089 #endif
1090 case DIOCWLABEL:
1091 case DIOCAWEDGE:
1092 case DIOCDWEDGE:
1093 case DIOCSSTRATEGY:
1094 if ((flag & FWRITE) == 0)
1095 return (EBADF);
1096 }
1097
1098 /* Must be initialized for these... */
1099 switch (cmd) {
1100 case DIOCGDINFO:
1101 case DIOCSDINFO:
1102 case DIOCWDINFO:
1103 #ifdef __HAVE_OLD_DISKLABEL
1104 case ODIOCGDINFO:
1105 case ODIOCWDINFO:
1106 case ODIOCSDINFO:
1107 case ODIOCGDEFLABEL:
1108 #endif
1109 case DIOCGPART:
1110 case DIOCWLABEL:
1111 case DIOCGDEFLABEL:
1112 case DIOCAWEDGE:
1113 case DIOCDWEDGE:
1114 case DIOCLWEDGES:
1115 case DIOCCACHESYNC:
1116 case RAIDFRAME_SHUTDOWN:
1117 case RAIDFRAME_REWRITEPARITY:
1118 case RAIDFRAME_GET_INFO:
1119 case RAIDFRAME_RESET_ACCTOTALS:
1120 case RAIDFRAME_GET_ACCTOTALS:
1121 case RAIDFRAME_KEEP_ACCTOTALS:
1122 case RAIDFRAME_GET_SIZE:
1123 case RAIDFRAME_FAIL_DISK:
1124 case RAIDFRAME_COPYBACK:
1125 case RAIDFRAME_CHECK_RECON_STATUS:
1126 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1127 case RAIDFRAME_GET_COMPONENT_LABEL:
1128 case RAIDFRAME_SET_COMPONENT_LABEL:
1129 case RAIDFRAME_ADD_HOT_SPARE:
1130 case RAIDFRAME_REMOVE_HOT_SPARE:
1131 case RAIDFRAME_INIT_LABELS:
1132 case RAIDFRAME_REBUILD_IN_PLACE:
1133 case RAIDFRAME_CHECK_PARITY:
1134 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1135 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1136 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1137 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1138 case RAIDFRAME_SET_AUTOCONFIG:
1139 case RAIDFRAME_SET_ROOT:
1140 case RAIDFRAME_DELETE_COMPONENT:
1141 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1142 case RAIDFRAME_PARITYMAP_STATUS:
1143 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1144 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1145 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1146 case DIOCGSTRATEGY:
1147 case DIOCSSTRATEGY:
1148 if ((rs->sc_flags & RAIDF_INITED) == 0)
1149 return (ENXIO);
1150 }
1151
1152 switch (cmd) {
1153 #ifdef COMPAT_50
1154 case RAIDFRAME_GET_INFO50:
1155 return rf_get_info50(raidPtr, data);
1156
1157 case RAIDFRAME_CONFIGURE50:
1158 if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0)
1159 return retcode;
1160 goto config;
1161 #endif
1162 /* configure the system */
1163 case RAIDFRAME_CONFIGURE:
1164
1165 if (raidPtr->valid) {
1166 /* There is a valid RAID set running on this unit! */
1167 printf("raid%d: Device already configured!\n",unit);
1168 return(EINVAL);
1169 }
1170
1171 /* copy-in the configuration information */
1172 /* data points to a pointer to the configuration structure */
1173
1174 u_cfg = *((RF_Config_t **) data);
1175 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1176 if (k_cfg == NULL) {
1177 return (ENOMEM);
1178 }
1179 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1180 if (retcode) {
1181 RF_Free(k_cfg, sizeof(RF_Config_t));
1182 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1183 retcode));
1184 return (retcode);
1185 }
1186 goto config;
1187 config:
1188 /* allocate a buffer for the layout-specific data, and copy it
1189 * in */
1190 if (k_cfg->layoutSpecificSize) {
1191 if (k_cfg->layoutSpecificSize > 10000) {
1192 /* sanity check */
1193 RF_Free(k_cfg, sizeof(RF_Config_t));
1194 return (EINVAL);
1195 }
1196 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1197 (u_char *));
1198 if (specific_buf == NULL) {
1199 RF_Free(k_cfg, sizeof(RF_Config_t));
1200 return (ENOMEM);
1201 }
1202 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1203 k_cfg->layoutSpecificSize);
1204 if (retcode) {
1205 RF_Free(k_cfg, sizeof(RF_Config_t));
1206 RF_Free(specific_buf,
1207 k_cfg->layoutSpecificSize);
1208 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1209 retcode));
1210 return (retcode);
1211 }
1212 } else
1213 specific_buf = NULL;
1214 k_cfg->layoutSpecific = specific_buf;
1215
1216 /* should do some kind of sanity check on the configuration.
1217 * Store the sum of all the bytes in the last byte? */
1218
1219 /* configure the system */
1220
1221 /*
1222 * Clear the entire RAID descriptor, just to make sure
1223 * there is no stale data left in the case of a
1224 * reconfiguration
1225 */
1226 memset(raidPtr, 0, sizeof(*raidPtr));
1227 raidPtr->softc = rs;
1228 raidPtr->raidid = unit;
1229
1230 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1231
1232 if (retcode == 0) {
1233
1234 /* allow this many simultaneous IO's to
1235 this RAID device */
1236 raidPtr->openings = RAIDOUTSTANDING;
1237
1238 raidinit(rs);
1239 rf_markalldirty(raidPtr);
1240 }
1241 /* free the buffers. No return code here. */
1242 if (k_cfg->layoutSpecificSize) {
1243 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1244 }
1245 RF_Free(k_cfg, sizeof(RF_Config_t));
1246
1247 return (retcode);
1248
1249 /* shutdown the system */
1250 case RAIDFRAME_SHUTDOWN:
1251
1252 part = DISKPART(dev);
1253 pmask = (1 << part);
1254
1255 if ((error = raidlock(rs)) != 0)
1256 return (error);
1257
1258 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
1259 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1260 (rs->sc_dkdev.dk_copenmask & pmask)))
1261 retcode = EBUSY;
1262 else {
1263 rs->sc_flags |= RAIDF_SHUTDOWN;
1264 rs->sc_dkdev.dk_copenmask &= ~pmask;
1265 rs->sc_dkdev.dk_bopenmask &= ~pmask;
1266 rs->sc_dkdev.dk_openmask &= ~pmask;
1267 retcode = 0;
1268 }
1269
1270 raidunlock(rs);
1271
1272 if (retcode != 0)
1273 return retcode;
1274
1275 /* free the pseudo device attach bits */
1276
1277 cf = device_cfdata(rs->sc_dev);
1278 if ((retcode = config_detach(rs->sc_dev, DETACH_QUIET)) == 0)
1279 free(cf, M_RAIDFRAME);
1280
1281 return (retcode);
1282 case RAIDFRAME_GET_COMPONENT_LABEL:
1283 clabel_ptr = (RF_ComponentLabel_t **) data;
1284 /* need to read the component label for the disk indicated
1285 by row,column in clabel */
1286
1287 /*
1288 * Perhaps there should be an option to skip the in-core
1289 * copy and hit the disk, as with disklabel(8).
1290 */
1291 RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *));
1292
1293 retcode = copyin(*clabel_ptr, clabel, sizeof(*clabel));
1294
1295 if (retcode) {
1296 RF_Free(clabel, sizeof(*clabel));
1297 return retcode;
1298 }
1299
1300 clabel->row = 0; /* Don't allow looking at anything else.*/
1301
1302 column = clabel->column;
1303
1304 if ((column < 0) || (column >= raidPtr->numCol +
1305 raidPtr->numSpare)) {
1306 RF_Free(clabel, sizeof(*clabel));
1307 return EINVAL;
1308 }
1309
1310 RF_Free(clabel, sizeof(*clabel));
1311
1312 clabel = raidget_component_label(raidPtr, column);
1313
1314 return copyout(clabel, *clabel_ptr, sizeof(**clabel_ptr));
1315
1316 #if 0
1317 case RAIDFRAME_SET_COMPONENT_LABEL:
1318 clabel = (RF_ComponentLabel_t *) data;
1319
1320 /* XXX check the label for valid stuff... */
1321 /* Note that some things *should not* get modified --
1322 the user should be re-initing the labels instead of
1323 trying to patch things.
1324 */
1325
1326 raidid = raidPtr->raidid;
1327 #ifdef DEBUG
1328 printf("raid%d: Got component label:\n", raidid);
1329 printf("raid%d: Version: %d\n", raidid, clabel->version);
1330 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1331 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1332 printf("raid%d: Column: %d\n", raidid, clabel->column);
1333 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1334 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1335 printf("raid%d: Status: %d\n", raidid, clabel->status);
1336 #endif
1337 clabel->row = 0;
1338 column = clabel->column;
1339
1340 if ((column < 0) || (column >= raidPtr->numCol)) {
1341 return(EINVAL);
1342 }
1343
1344 /* XXX this isn't allowed to do anything for now :-) */
1345
1346 /* XXX and before it is, we need to fill in the rest
1347 of the fields!?!?!?! */
1348 memcpy(raidget_component_label(raidPtr, column),
1349 clabel, sizeof(*clabel));
1350 raidflush_component_label(raidPtr, column);
1351 return (0);
1352 #endif
1353
1354 case RAIDFRAME_INIT_LABELS:
1355 clabel = (RF_ComponentLabel_t *) data;
1356 /*
1357 we only want the serial number from
1358 the above. We get all the rest of the information
1359 from the config that was used to create this RAID
1360 set.
1361 */
1362
1363 raidPtr->serial_number = clabel->serial_number;
1364
1365 for(column=0;column<raidPtr->numCol;column++) {
1366 diskPtr = &raidPtr->Disks[column];
1367 if (!RF_DEAD_DISK(diskPtr->status)) {
1368 ci_label = raidget_component_label(raidPtr,
1369 column);
1370 /* Zeroing this is important. */
1371 memset(ci_label, 0, sizeof(*ci_label));
1372 raid_init_component_label(raidPtr, ci_label);
1373 ci_label->serial_number =
1374 raidPtr->serial_number;
1375 ci_label->row = 0; /* we dont' pretend to support more */
1376 rf_component_label_set_partitionsize(ci_label,
1377 diskPtr->partitionSize);
1378 ci_label->column = column;
1379 raidflush_component_label(raidPtr, column);
1380 }
1381 /* XXXjld what about the spares? */
1382 }
1383
1384 return (retcode);
1385 case RAIDFRAME_SET_AUTOCONFIG:
1386 d = rf_set_autoconfig(raidPtr, *(int *) data);
1387 printf("raid%d: New autoconfig value is: %d\n",
1388 raidPtr->raidid, d);
1389 *(int *) data = d;
1390 return (retcode);
1391
1392 case RAIDFRAME_SET_ROOT:
1393 d = rf_set_rootpartition(raidPtr, *(int *) data);
1394 printf("raid%d: New rootpartition value is: %d\n",
1395 raidPtr->raidid, d);
1396 *(int *) data = d;
1397 return (retcode);
1398
1399 /* initialize all parity */
1400 case RAIDFRAME_REWRITEPARITY:
1401
1402 if (raidPtr->Layout.map->faultsTolerated == 0) {
1403 /* Parity for RAID 0 is trivially correct */
1404 raidPtr->parity_good = RF_RAID_CLEAN;
1405 return(0);
1406 }
1407
1408 if (raidPtr->parity_rewrite_in_progress == 1) {
1409 /* Re-write is already in progress! */
1410 return(EINVAL);
1411 }
1412
1413 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1414 rf_RewriteParityThread,
1415 raidPtr,"raid_parity");
1416 return (retcode);
1417
1418
1419 case RAIDFRAME_ADD_HOT_SPARE:
1420 sparePtr = (RF_SingleComponent_t *) data;
1421 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1422 retcode = rf_add_hot_spare(raidPtr, &component);
1423 return(retcode);
1424
1425 case RAIDFRAME_REMOVE_HOT_SPARE:
1426 return(retcode);
1427
1428 case RAIDFRAME_DELETE_COMPONENT:
1429 componentPtr = (RF_SingleComponent_t *)data;
1430 memcpy( &component, componentPtr,
1431 sizeof(RF_SingleComponent_t));
1432 retcode = rf_delete_component(raidPtr, &component);
1433 return(retcode);
1434
1435 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1436 componentPtr = (RF_SingleComponent_t *)data;
1437 memcpy( &component, componentPtr,
1438 sizeof(RF_SingleComponent_t));
1439 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1440 return(retcode);
1441
1442 case RAIDFRAME_REBUILD_IN_PLACE:
1443
1444 if (raidPtr->Layout.map->faultsTolerated == 0) {
1445 /* Can't do this on a RAID 0!! */
1446 return(EINVAL);
1447 }
1448
1449 if (raidPtr->recon_in_progress == 1) {
1450 /* a reconstruct is already in progress! */
1451 return(EINVAL);
1452 }
1453
1454 componentPtr = (RF_SingleComponent_t *) data;
1455 memcpy( &component, componentPtr,
1456 sizeof(RF_SingleComponent_t));
1457 component.row = 0; /* we don't support any more */
1458 column = component.column;
1459
1460 if ((column < 0) || (column >= raidPtr->numCol)) {
1461 return(EINVAL);
1462 }
1463
1464 rf_lock_mutex2(raidPtr->mutex);
1465 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1466 (raidPtr->numFailures > 0)) {
1467 /* XXX 0 above shouldn't be constant!!! */
1468 /* some component other than this has failed.
1469 Let's not make things worse than they already
1470 are... */
1471 printf("raid%d: Unable to reconstruct to disk at:\n",
1472 raidPtr->raidid);
1473 printf("raid%d: Col: %d Too many failures.\n",
1474 raidPtr->raidid, column);
1475 rf_unlock_mutex2(raidPtr->mutex);
1476 return (EINVAL);
1477 }
1478 if (raidPtr->Disks[column].status ==
1479 rf_ds_reconstructing) {
1480 printf("raid%d: Unable to reconstruct to disk at:\n",
1481 raidPtr->raidid);
1482 printf("raid%d: Col: %d Reconstruction already occurring!\n", raidPtr->raidid, column);
1483
1484 rf_unlock_mutex2(raidPtr->mutex);
1485 return (EINVAL);
1486 }
1487 if (raidPtr->Disks[column].status == rf_ds_spared) {
1488 rf_unlock_mutex2(raidPtr->mutex);
1489 return (EINVAL);
1490 }
1491 rf_unlock_mutex2(raidPtr->mutex);
1492
1493 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1494 if (rrcopy == NULL)
1495 return(ENOMEM);
1496
1497 rrcopy->raidPtr = (void *) raidPtr;
1498 rrcopy->col = column;
1499
1500 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1501 rf_ReconstructInPlaceThread,
1502 rrcopy,"raid_reconip");
1503 return(retcode);
1504
1505 case RAIDFRAME_GET_INFO:
1506 if (!raidPtr->valid)
1507 return (ENODEV);
1508 ucfgp = (RF_DeviceConfig_t **) data;
1509 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1510 (RF_DeviceConfig_t *));
1511 if (d_cfg == NULL)
1512 return (ENOMEM);
1513 d_cfg->rows = 1; /* there is only 1 row now */
1514 d_cfg->cols = raidPtr->numCol;
1515 d_cfg->ndevs = raidPtr->numCol;
1516 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1517 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1518 return (ENOMEM);
1519 }
1520 d_cfg->nspares = raidPtr->numSpare;
1521 if (d_cfg->nspares >= RF_MAX_DISKS) {
1522 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1523 return (ENOMEM);
1524 }
1525 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1526 d = 0;
1527 for (j = 0; j < d_cfg->cols; j++) {
1528 d_cfg->devs[d] = raidPtr->Disks[j];
1529 d++;
1530 }
1531 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1532 d_cfg->spares[i] = raidPtr->Disks[j];
1533 }
1534 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1535 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1536
1537 return (retcode);
1538
1539 case RAIDFRAME_CHECK_PARITY:
1540 *(int *) data = raidPtr->parity_good;
1541 return (0);
1542
1543 case RAIDFRAME_PARITYMAP_STATUS:
1544 if (rf_paritymap_ineligible(raidPtr))
1545 return EINVAL;
1546 rf_paritymap_status(raidPtr->parity_map,
1547 (struct rf_pmstat *)data);
1548 return 0;
1549
1550 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1551 if (rf_paritymap_ineligible(raidPtr))
1552 return EINVAL;
1553 if (raidPtr->parity_map == NULL)
1554 return ENOENT; /* ??? */
1555 if (0 != rf_paritymap_set_params(raidPtr->parity_map,
1556 (struct rf_pmparams *)data, 1))
1557 return EINVAL;
1558 return 0;
1559
1560 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1561 if (rf_paritymap_ineligible(raidPtr))
1562 return EINVAL;
1563 *(int *) data = rf_paritymap_get_disable(raidPtr);
1564 return 0;
1565
1566 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1567 if (rf_paritymap_ineligible(raidPtr))
1568 return EINVAL;
1569 rf_paritymap_set_disable(raidPtr, *(int *)data);
1570 /* XXX should errors be passed up? */
1571 return 0;
1572
1573 case RAIDFRAME_RESET_ACCTOTALS:
1574 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1575 return (0);
1576
1577 case RAIDFRAME_GET_ACCTOTALS:
1578 totals = (RF_AccTotals_t *) data;
1579 *totals = raidPtr->acc_totals;
1580 return (0);
1581
1582 case RAIDFRAME_KEEP_ACCTOTALS:
1583 raidPtr->keep_acc_totals = *(int *)data;
1584 return (0);
1585
1586 case RAIDFRAME_GET_SIZE:
1587 *(int *) data = raidPtr->totalSectors;
1588 return (0);
1589
1590 /* fail a disk & optionally start reconstruction */
1591 case RAIDFRAME_FAIL_DISK:
1592
1593 if (raidPtr->Layout.map->faultsTolerated == 0) {
1594 /* Can't do this on a RAID 0!! */
1595 return(EINVAL);
1596 }
1597
1598 rr = (struct rf_recon_req *) data;
1599 rr->row = 0;
1600 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1601 return (EINVAL);
1602
1603
1604 rf_lock_mutex2(raidPtr->mutex);
1605 if (raidPtr->status == rf_rs_reconstructing) {
1606 /* you can't fail a disk while we're reconstructing! */
1607 /* XXX wrong for RAID6 */
1608 rf_unlock_mutex2(raidPtr->mutex);
1609 return (EINVAL);
1610 }
1611 if ((raidPtr->Disks[rr->col].status ==
1612 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1613 /* some other component has failed. Let's not make
1614 things worse. XXX wrong for RAID6 */
1615 rf_unlock_mutex2(raidPtr->mutex);
1616 return (EINVAL);
1617 }
1618 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1619 /* Can't fail a spared disk! */
1620 rf_unlock_mutex2(raidPtr->mutex);
1621 return (EINVAL);
1622 }
1623 rf_unlock_mutex2(raidPtr->mutex);
1624
1625 /* make a copy of the recon request so that we don't rely on
1626 * the user's buffer */
1627 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1628 if (rrcopy == NULL)
1629 return(ENOMEM);
1630 memcpy(rrcopy, rr, sizeof(*rr));
1631 rrcopy->raidPtr = (void *) raidPtr;
1632
1633 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1634 rf_ReconThread,
1635 rrcopy,"raid_recon");
1636 return (0);
1637
1638 /* invoke a copyback operation after recon on whatever disk
1639 * needs it, if any */
1640 case RAIDFRAME_COPYBACK:
1641
1642 if (raidPtr->Layout.map->faultsTolerated == 0) {
1643 /* This makes no sense on a RAID 0!! */
1644 return(EINVAL);
1645 }
1646
1647 if (raidPtr->copyback_in_progress == 1) {
1648 /* Copyback is already in progress! */
1649 return(EINVAL);
1650 }
1651
1652 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1653 rf_CopybackThread,
1654 raidPtr,"raid_copyback");
1655 return (retcode);
1656
1657 /* return the percentage completion of reconstruction */
1658 case RAIDFRAME_CHECK_RECON_STATUS:
1659 if (raidPtr->Layout.map->faultsTolerated == 0) {
1660 /* This makes no sense on a RAID 0, so tell the
1661 user it's done. */
1662 *(int *) data = 100;
1663 return(0);
1664 }
1665 if (raidPtr->status != rf_rs_reconstructing)
1666 *(int *) data = 100;
1667 else {
1668 if (raidPtr->reconControl->numRUsTotal > 0) {
1669 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1670 } else {
1671 *(int *) data = 0;
1672 }
1673 }
1674 return (0);
1675 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1676 progressInfoPtr = (RF_ProgressInfo_t **) data;
1677 if (raidPtr->status != rf_rs_reconstructing) {
1678 progressInfo.remaining = 0;
1679 progressInfo.completed = 100;
1680 progressInfo.total = 100;
1681 } else {
1682 progressInfo.total =
1683 raidPtr->reconControl->numRUsTotal;
1684 progressInfo.completed =
1685 raidPtr->reconControl->numRUsComplete;
1686 progressInfo.remaining = progressInfo.total -
1687 progressInfo.completed;
1688 }
1689 retcode = copyout(&progressInfo, *progressInfoPtr,
1690 sizeof(RF_ProgressInfo_t));
1691 return (retcode);
1692
1693 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1694 if (raidPtr->Layout.map->faultsTolerated == 0) {
1695 /* This makes no sense on a RAID 0, so tell the
1696 user it's done. */
1697 *(int *) data = 100;
1698 return(0);
1699 }
1700 if (raidPtr->parity_rewrite_in_progress == 1) {
1701 *(int *) data = 100 *
1702 raidPtr->parity_rewrite_stripes_done /
1703 raidPtr->Layout.numStripe;
1704 } else {
1705 *(int *) data = 100;
1706 }
1707 return (0);
1708
1709 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1710 progressInfoPtr = (RF_ProgressInfo_t **) data;
1711 if (raidPtr->parity_rewrite_in_progress == 1) {
1712 progressInfo.total = raidPtr->Layout.numStripe;
1713 progressInfo.completed =
1714 raidPtr->parity_rewrite_stripes_done;
1715 progressInfo.remaining = progressInfo.total -
1716 progressInfo.completed;
1717 } else {
1718 progressInfo.remaining = 0;
1719 progressInfo.completed = 100;
1720 progressInfo.total = 100;
1721 }
1722 retcode = copyout(&progressInfo, *progressInfoPtr,
1723 sizeof(RF_ProgressInfo_t));
1724 return (retcode);
1725
1726 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1727 if (raidPtr->Layout.map->faultsTolerated == 0) {
1728 /* This makes no sense on a RAID 0 */
1729 *(int *) data = 100;
1730 return(0);
1731 }
1732 if (raidPtr->copyback_in_progress == 1) {
1733 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1734 raidPtr->Layout.numStripe;
1735 } else {
1736 *(int *) data = 100;
1737 }
1738 return (0);
1739
1740 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1741 progressInfoPtr = (RF_ProgressInfo_t **) data;
1742 if (raidPtr->copyback_in_progress == 1) {
1743 progressInfo.total = raidPtr->Layout.numStripe;
1744 progressInfo.completed =
1745 raidPtr->copyback_stripes_done;
1746 progressInfo.remaining = progressInfo.total -
1747 progressInfo.completed;
1748 } else {
1749 progressInfo.remaining = 0;
1750 progressInfo.completed = 100;
1751 progressInfo.total = 100;
1752 }
1753 retcode = copyout(&progressInfo, *progressInfoPtr,
1754 sizeof(RF_ProgressInfo_t));
1755 return (retcode);
1756
1757 /* the sparetable daemon calls this to wait for the kernel to
1758 * need a spare table. this ioctl does not return until a
1759 * spare table is needed. XXX -- calling mpsleep here in the
1760 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1761 * -- I should either compute the spare table in the kernel,
1762 * or have a different -- XXX XXX -- interface (a different
1763 * character device) for delivering the table -- XXX */
1764 #if 0
1765 case RAIDFRAME_SPARET_WAIT:
1766 rf_lock_mutex2(rf_sparet_wait_mutex);
1767 while (!rf_sparet_wait_queue)
1768 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1769 waitreq = rf_sparet_wait_queue;
1770 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1771 rf_unlock_mutex2(rf_sparet_wait_mutex);
1772
1773 /* structure assignment */
1774 *((RF_SparetWait_t *) data) = *waitreq;
1775
1776 RF_Free(waitreq, sizeof(*waitreq));
1777 return (0);
1778
1779 /* wakes up a process waiting on SPARET_WAIT and puts an error
1780 * code in it that will cause the dameon to exit */
1781 case RAIDFRAME_ABORT_SPARET_WAIT:
1782 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1783 waitreq->fcol = -1;
1784 rf_lock_mutex2(rf_sparet_wait_mutex);
1785 waitreq->next = rf_sparet_wait_queue;
1786 rf_sparet_wait_queue = waitreq;
1787 rf_broadcast_conf2(rf_sparet_wait_cv);
1788 rf_unlock_mutex2(rf_sparet_wait_mutex);
1789 return (0);
1790
1791 /* used by the spare table daemon to deliver a spare table
1792 * into the kernel */
1793 case RAIDFRAME_SEND_SPARET:
1794
1795 /* install the spare table */
1796 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1797
1798 /* respond to the requestor. the return status of the spare
1799 * table installation is passed in the "fcol" field */
1800 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1801 waitreq->fcol = retcode;
1802 rf_lock_mutex2(rf_sparet_wait_mutex);
1803 waitreq->next = rf_sparet_resp_queue;
1804 rf_sparet_resp_queue = waitreq;
1805 rf_broadcast_cond2(rf_sparet_resp_cv);
1806 rf_unlock_mutex2(rf_sparet_wait_mutex);
1807
1808 return (retcode);
1809 #endif
1810
1811 default:
1812 break; /* fall through to the os-specific code below */
1813
1814 }
1815
1816 if (!raidPtr->valid)
1817 return (EINVAL);
1818
1819 /*
1820 * Add support for "regular" device ioctls here.
1821 */
1822
1823 error = disk_ioctl(&rs->sc_dkdev, cmd, data, flag, l);
1824 if (error != EPASSTHROUGH)
1825 return (error);
1826
1827 switch (cmd) {
1828 case DIOCGDINFO:
1829 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1830 break;
1831 #ifdef __HAVE_OLD_DISKLABEL
1832 case ODIOCGDINFO:
1833 newlabel = *(rs->sc_dkdev.dk_label);
1834 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1835 return ENOTTY;
1836 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1837 break;
1838 #endif
1839
1840 case DIOCGPART:
1841 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1842 ((struct partinfo *) data)->part =
1843 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1844 break;
1845
1846 case DIOCWDINFO:
1847 case DIOCSDINFO:
1848 #ifdef __HAVE_OLD_DISKLABEL
1849 case ODIOCWDINFO:
1850 case ODIOCSDINFO:
1851 #endif
1852 {
1853 struct disklabel *lp;
1854 #ifdef __HAVE_OLD_DISKLABEL
1855 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1856 memset(&newlabel, 0, sizeof newlabel);
1857 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1858 lp = &newlabel;
1859 } else
1860 #endif
1861 lp = (struct disklabel *)data;
1862
1863 if ((error = raidlock(rs)) != 0)
1864 return (error);
1865
1866 rs->sc_flags |= RAIDF_LABELLING;
1867
1868 error = setdisklabel(rs->sc_dkdev.dk_label,
1869 lp, 0, rs->sc_dkdev.dk_cpulabel);
1870 if (error == 0) {
1871 if (cmd == DIOCWDINFO
1872 #ifdef __HAVE_OLD_DISKLABEL
1873 || cmd == ODIOCWDINFO
1874 #endif
1875 )
1876 error = writedisklabel(RAIDLABELDEV(dev),
1877 raidstrategy, rs->sc_dkdev.dk_label,
1878 rs->sc_dkdev.dk_cpulabel);
1879 }
1880 rs->sc_flags &= ~RAIDF_LABELLING;
1881
1882 raidunlock(rs);
1883
1884 if (error)
1885 return (error);
1886 break;
1887 }
1888
1889 case DIOCWLABEL:
1890 if (*(int *) data != 0)
1891 rs->sc_flags |= RAIDF_WLABEL;
1892 else
1893 rs->sc_flags &= ~RAIDF_WLABEL;
1894 break;
1895
1896 case DIOCGDEFLABEL:
1897 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1898 break;
1899
1900 #ifdef __HAVE_OLD_DISKLABEL
1901 case ODIOCGDEFLABEL:
1902 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1903 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1904 return ENOTTY;
1905 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1906 break;
1907 #endif
1908
1909 case DIOCAWEDGE:
1910 case DIOCDWEDGE:
1911 dkw = (void *)data;
1912
1913 /* If the ioctl happens here, the parent is us. */
1914 (void)strcpy(dkw->dkw_parent, rs->sc_xname);
1915 return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw);
1916
1917 case DIOCLWEDGES:
1918 return dkwedge_list(&rs->sc_dkdev,
1919 (struct dkwedge_list *)data, l);
1920 case DIOCCACHESYNC:
1921 return rf_sync_component_caches(raidPtr);
1922
1923 case DIOCGSTRATEGY:
1924 {
1925 struct disk_strategy *dks = (void *)data;
1926
1927 s = splbio();
1928 strlcpy(dks->dks_name, bufq_getstrategyname(rs->buf_queue),
1929 sizeof(dks->dks_name));
1930 splx(s);
1931 dks->dks_paramlen = 0;
1932
1933 return 0;
1934 }
1935
1936 case DIOCSSTRATEGY:
1937 {
1938 struct disk_strategy *dks = (void *)data;
1939 struct bufq_state *new;
1940 struct bufq_state *old;
1941
1942 if (dks->dks_param != NULL) {
1943 return EINVAL;
1944 }
1945 dks->dks_name[sizeof(dks->dks_name) - 1] = 0; /* ensure term */
1946 error = bufq_alloc(&new, dks->dks_name,
1947 BUFQ_EXACT|BUFQ_SORT_RAWBLOCK);
1948 if (error) {
1949 return error;
1950 }
1951 s = splbio();
1952 old = rs->buf_queue;
1953 bufq_move(new, old);
1954 rs->buf_queue = new;
1955 splx(s);
1956 bufq_free(old);
1957
1958 return 0;
1959 }
1960
1961 default:
1962 retcode = ENOTTY;
1963 }
1964 return (retcode);
1965
1966 }
1967
1968
1969 /* raidinit -- complete the rest of the initialization for the
1970 RAIDframe device. */
1971
1972
1973 static void
1974 raidinit(struct raid_softc *rs)
1975 {
1976 cfdata_t cf;
1977 int unit;
1978 RF_Raid_t *raidPtr = &rs->sc_r;
1979
1980 unit = raidPtr->raidid;
1981
1982
1983 /* XXX should check return code first... */
1984 rs->sc_flags |= RAIDF_INITED;
1985
1986 /* XXX doesn't check bounds. */
1987 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1988
1989 /* attach the pseudo device */
1990 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1991 cf->cf_name = raid_cd.cd_name;
1992 cf->cf_atname = raid_cd.cd_name;
1993 cf->cf_unit = unit;
1994 cf->cf_fstate = FSTATE_STAR;
1995
1996 rs->sc_dev = config_attach_pseudo(cf);
1997
1998 if (rs->sc_dev == NULL) {
1999 printf("raid%d: config_attach_pseudo failed\n",
2000 raidPtr->raidid);
2001 rs->sc_flags &= ~RAIDF_INITED;
2002 free(cf, M_RAIDFRAME);
2003 return;
2004 }
2005
2006 /* disk_attach actually creates space for the CPU disklabel, among
2007 * other things, so it's critical to call this *BEFORE* we try putzing
2008 * with disklabels. */
2009
2010 disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver);
2011 disk_attach(&rs->sc_dkdev);
2012 disk_blocksize(&rs->sc_dkdev, raidPtr->bytesPerSector);
2013
2014 /* XXX There may be a weird interaction here between this, and
2015 * protectedSectors, as used in RAIDframe. */
2016
2017 rs->sc_size = raidPtr->totalSectors;
2018
2019 dkwedge_discover(&rs->sc_dkdev);
2020
2021 rf_set_geometry(rs, raidPtr);
2022
2023 }
2024 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
2025 /* wake up the daemon & tell it to get us a spare table
2026 * XXX
2027 * the entries in the queues should be tagged with the raidPtr
2028 * so that in the extremely rare case that two recons happen at once,
2029 * we know for which device were requesting a spare table
2030 * XXX
2031 *
2032 * XXX This code is not currently used. GO
2033 */
2034 int
2035 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
2036 {
2037 int retcode;
2038
2039 rf_lock_mutex2(rf_sparet_wait_mutex);
2040 req->next = rf_sparet_wait_queue;
2041 rf_sparet_wait_queue = req;
2042 rf_broadcast_cond2(rf_sparet_wait_cv);
2043
2044 /* mpsleep unlocks the mutex */
2045 while (!rf_sparet_resp_queue) {
2046 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
2047 }
2048 req = rf_sparet_resp_queue;
2049 rf_sparet_resp_queue = req->next;
2050 rf_unlock_mutex2(rf_sparet_wait_mutex);
2051
2052 retcode = req->fcol;
2053 RF_Free(req, sizeof(*req)); /* this is not the same req as we
2054 * alloc'd */
2055 return (retcode);
2056 }
2057 #endif
2058
2059 /* a wrapper around rf_DoAccess that extracts appropriate info from the
2060 * bp & passes it down.
2061 * any calls originating in the kernel must use non-blocking I/O
2062 * do some extra sanity checking to return "appropriate" error values for
2063 * certain conditions (to make some standard utilities work)
2064 *
2065 * Formerly known as: rf_DoAccessKernel
2066 */
2067 void
2068 raidstart(RF_Raid_t *raidPtr)
2069 {
2070 RF_SectorCount_t num_blocks, pb, sum;
2071 RF_RaidAddr_t raid_addr;
2072 struct partition *pp;
2073 daddr_t blocknum;
2074 struct raid_softc *rs;
2075 int do_async;
2076 struct buf *bp;
2077 int rc;
2078
2079 rs = raidPtr->softc;
2080 /* quick check to see if anything has died recently */
2081 rf_lock_mutex2(raidPtr->mutex);
2082 if (raidPtr->numNewFailures > 0) {
2083 rf_unlock_mutex2(raidPtr->mutex);
2084 rf_update_component_labels(raidPtr,
2085 RF_NORMAL_COMPONENT_UPDATE);
2086 rf_lock_mutex2(raidPtr->mutex);
2087 raidPtr->numNewFailures--;
2088 }
2089
2090 /* Check to see if we're at the limit... */
2091 while (raidPtr->openings > 0) {
2092 rf_unlock_mutex2(raidPtr->mutex);
2093
2094 /* get the next item, if any, from the queue */
2095 if ((bp = bufq_get(rs->buf_queue)) == NULL) {
2096 /* nothing more to do */
2097 return;
2098 }
2099
2100 /* Ok, for the bp we have here, bp->b_blkno is relative to the
2101 * partition.. Need to make it absolute to the underlying
2102 * device.. */
2103
2104 blocknum = bp->b_blkno << DEV_BSHIFT >> raidPtr->logBytesPerSector;
2105 if (DISKPART(bp->b_dev) != RAW_PART) {
2106 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
2107 blocknum += pp->p_offset;
2108 }
2109
2110 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
2111 (int) blocknum));
2112
2113 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
2114 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
2115
2116 /* *THIS* is where we adjust what block we're going to...
2117 * but DO NOT TOUCH bp->b_blkno!!! */
2118 raid_addr = blocknum;
2119
2120 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
2121 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
2122 sum = raid_addr + num_blocks + pb;
2123 if (1 || rf_debugKernelAccess) {
2124 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
2125 (int) raid_addr, (int) sum, (int) num_blocks,
2126 (int) pb, (int) bp->b_resid));
2127 }
2128 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
2129 || (sum < num_blocks) || (sum < pb)) {
2130 bp->b_error = ENOSPC;
2131 bp->b_resid = bp->b_bcount;
2132 biodone(bp);
2133 rf_lock_mutex2(raidPtr->mutex);
2134 continue;
2135 }
2136 /*
2137 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
2138 */
2139
2140 if (bp->b_bcount & raidPtr->sectorMask) {
2141 bp->b_error = EINVAL;
2142 bp->b_resid = bp->b_bcount;
2143 biodone(bp);
2144 rf_lock_mutex2(raidPtr->mutex);
2145 continue;
2146
2147 }
2148 db1_printf(("Calling DoAccess..\n"));
2149
2150
2151 rf_lock_mutex2(raidPtr->mutex);
2152 raidPtr->openings--;
2153 rf_unlock_mutex2(raidPtr->mutex);
2154
2155 /*
2156 * Everything is async.
2157 */
2158 do_async = 1;
2159
2160 disk_busy(&rs->sc_dkdev);
2161
2162 /* XXX we're still at splbio() here... do we *really*
2163 need to be? */
2164
2165 /* don't ever condition on bp->b_flags & B_WRITE.
2166 * always condition on B_READ instead */
2167
2168 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
2169 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
2170 do_async, raid_addr, num_blocks,
2171 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
2172
2173 if (rc) {
2174 bp->b_error = rc;
2175 bp->b_resid = bp->b_bcount;
2176 biodone(bp);
2177 /* continue loop */
2178 }
2179
2180 rf_lock_mutex2(raidPtr->mutex);
2181 }
2182 rf_unlock_mutex2(raidPtr->mutex);
2183 }
2184
2185
2186
2187
2188 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
2189
2190 int
2191 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
2192 {
2193 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2194 struct buf *bp;
2195
2196 req->queue = queue;
2197 bp = req->bp;
2198
2199 switch (req->type) {
2200 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2201 /* XXX need to do something extra here.. */
2202 /* I'm leaving this in, as I've never actually seen it used,
2203 * and I'd like folks to report it... GO */
2204 printf(("WAKEUP CALLED\n"));
2205 queue->numOutstanding++;
2206
2207 bp->b_flags = 0;
2208 bp->b_private = req;
2209
2210 KernelWakeupFunc(bp);
2211 break;
2212
2213 case RF_IO_TYPE_READ:
2214 case RF_IO_TYPE_WRITE:
2215 #if RF_ACC_TRACE > 0
2216 if (req->tracerec) {
2217 RF_ETIMER_START(req->tracerec->timer);
2218 }
2219 #endif
2220 InitBP(bp, queue->rf_cinfo->ci_vp,
2221 op, queue->rf_cinfo->ci_dev,
2222 req->sectorOffset, req->numSector,
2223 req->buf, KernelWakeupFunc, (void *) req,
2224 queue->raidPtr->logBytesPerSector, req->b_proc);
2225
2226 if (rf_debugKernelAccess) {
2227 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2228 (long) bp->b_blkno));
2229 }
2230 queue->numOutstanding++;
2231 queue->last_deq_sector = req->sectorOffset;
2232 /* acc wouldn't have been let in if there were any pending
2233 * reqs at any other priority */
2234 queue->curPriority = req->priority;
2235
2236 db1_printf(("Going for %c to unit %d col %d\n",
2237 req->type, queue->raidPtr->raidid,
2238 queue->col));
2239 db1_printf(("sector %d count %d (%d bytes) %d\n",
2240 (int) req->sectorOffset, (int) req->numSector,
2241 (int) (req->numSector <<
2242 queue->raidPtr->logBytesPerSector),
2243 (int) queue->raidPtr->logBytesPerSector));
2244
2245 /*
2246 * XXX: drop lock here since this can block at
2247 * least with backing SCSI devices. Retake it
2248 * to minimize fuss with calling interfaces.
2249 */
2250
2251 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2252 bdev_strategy(bp);
2253 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2254 break;
2255
2256 default:
2257 panic("bad req->type in rf_DispatchKernelIO");
2258 }
2259 db1_printf(("Exiting from DispatchKernelIO\n"));
2260
2261 return (0);
2262 }
2263 /* this is the callback function associated with a I/O invoked from
2264 kernel code.
2265 */
2266 static void
2267 KernelWakeupFunc(struct buf *bp)
2268 {
2269 RF_DiskQueueData_t *req = NULL;
2270 RF_DiskQueue_t *queue;
2271
2272 db1_printf(("recovering the request queue:\n"));
2273
2274 req = bp->b_private;
2275
2276 queue = (RF_DiskQueue_t *) req->queue;
2277
2278 rf_lock_mutex2(queue->raidPtr->iodone_lock);
2279
2280 #if RF_ACC_TRACE > 0
2281 if (req->tracerec) {
2282 RF_ETIMER_STOP(req->tracerec->timer);
2283 RF_ETIMER_EVAL(req->tracerec->timer);
2284 rf_lock_mutex2(rf_tracing_mutex);
2285 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2286 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2287 req->tracerec->num_phys_ios++;
2288 rf_unlock_mutex2(rf_tracing_mutex);
2289 }
2290 #endif
2291
2292 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2293 * ballistic, and mark the component as hosed... */
2294
2295 if (bp->b_error != 0) {
2296 /* Mark the disk as dead */
2297 /* but only mark it once... */
2298 /* and only if it wouldn't leave this RAID set
2299 completely broken */
2300 if (((queue->raidPtr->Disks[queue->col].status ==
2301 rf_ds_optimal) ||
2302 (queue->raidPtr->Disks[queue->col].status ==
2303 rf_ds_used_spare)) &&
2304 (queue->raidPtr->numFailures <
2305 queue->raidPtr->Layout.map->faultsTolerated)) {
2306 printf("raid%d: IO Error. Marking %s as failed.\n",
2307 queue->raidPtr->raidid,
2308 queue->raidPtr->Disks[queue->col].devname);
2309 queue->raidPtr->Disks[queue->col].status =
2310 rf_ds_failed;
2311 queue->raidPtr->status = rf_rs_degraded;
2312 queue->raidPtr->numFailures++;
2313 queue->raidPtr->numNewFailures++;
2314 } else { /* Disk is already dead... */
2315 /* printf("Disk already marked as dead!\n"); */
2316 }
2317
2318 }
2319
2320 /* Fill in the error value */
2321 req->error = bp->b_error;
2322
2323 /* Drop this one on the "finished" queue... */
2324 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2325
2326 /* Let the raidio thread know there is work to be done. */
2327 rf_signal_cond2(queue->raidPtr->iodone_cv);
2328
2329 rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2330 }
2331
2332
2333 /*
2334 * initialize a buf structure for doing an I/O in the kernel.
2335 */
2336 static void
2337 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2338 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2339 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2340 struct proc *b_proc)
2341 {
2342 /* bp->b_flags = B_PHYS | rw_flag; */
2343 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */
2344 bp->b_oflags = 0;
2345 bp->b_cflags = 0;
2346 bp->b_bcount = numSect << logBytesPerSector;
2347 bp->b_bufsize = bp->b_bcount;
2348 bp->b_error = 0;
2349 bp->b_dev = dev;
2350 bp->b_data = bf;
2351 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2352 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2353 if (bp->b_bcount == 0) {
2354 panic("bp->b_bcount is zero in InitBP!!");
2355 }
2356 bp->b_proc = b_proc;
2357 bp->b_iodone = cbFunc;
2358 bp->b_private = cbArg;
2359 }
2360
2361 static void
2362 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2363 struct disklabel *lp)
2364 {
2365 memset(lp, 0, sizeof(*lp));
2366
2367 /* fabricate a label... */
2368 lp->d_secperunit = raidPtr->totalSectors;
2369 lp->d_secsize = raidPtr->bytesPerSector;
2370 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2371 lp->d_ntracks = 4 * raidPtr->numCol;
2372 lp->d_ncylinders = raidPtr->totalSectors /
2373 (lp->d_nsectors * lp->d_ntracks);
2374 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2375
2376 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2377 lp->d_type = DTYPE_RAID;
2378 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2379 lp->d_rpm = 3600;
2380 lp->d_interleave = 1;
2381 lp->d_flags = 0;
2382
2383 lp->d_partitions[RAW_PART].p_offset = 0;
2384 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2385 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2386 lp->d_npartitions = RAW_PART + 1;
2387
2388 lp->d_magic = DISKMAGIC;
2389 lp->d_magic2 = DISKMAGIC;
2390 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2391
2392 }
2393 /*
2394 * Read the disklabel from the raid device. If one is not present, fake one
2395 * up.
2396 */
2397 static void
2398 raidgetdisklabel(dev_t dev)
2399 {
2400 int unit = raidunit(dev);
2401 struct raid_softc *rs;
2402 const char *errstring;
2403 struct disklabel *lp;
2404 struct cpu_disklabel *clp;
2405 RF_Raid_t *raidPtr;
2406
2407 if ((rs = raidget(unit)) == NULL)
2408 return;
2409
2410 lp = rs->sc_dkdev.dk_label;
2411 clp = rs->sc_dkdev.dk_cpulabel;
2412
2413 db1_printf(("Getting the disklabel...\n"));
2414
2415 memset(clp, 0, sizeof(*clp));
2416
2417 raidPtr = &rs->sc_r;
2418
2419 raidgetdefaultlabel(raidPtr, rs, lp);
2420
2421 /*
2422 * Call the generic disklabel extraction routine.
2423 */
2424 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2425 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2426 if (errstring)
2427 raidmakedisklabel(rs);
2428 else {
2429 int i;
2430 struct partition *pp;
2431
2432 /*
2433 * Sanity check whether the found disklabel is valid.
2434 *
2435 * This is necessary since total size of the raid device
2436 * may vary when an interleave is changed even though exactly
2437 * same components are used, and old disklabel may used
2438 * if that is found.
2439 */
2440 if (lp->d_secperunit != rs->sc_size)
2441 printf("raid%d: WARNING: %s: "
2442 "total sector size in disklabel (%" PRIu32 ") != "
2443 "the size of raid (%" PRIu64 ")\n", unit, rs->sc_xname,
2444 lp->d_secperunit, rs->sc_size);
2445 for (i = 0; i < lp->d_npartitions; i++) {
2446 pp = &lp->d_partitions[i];
2447 if (pp->p_offset + pp->p_size > rs->sc_size)
2448 printf("raid%d: WARNING: %s: end of partition `%c' "
2449 "exceeds the size of raid (%" PRIu64 ")\n",
2450 unit, rs->sc_xname, 'a' + i, rs->sc_size);
2451 }
2452 }
2453
2454 }
2455 /*
2456 * Take care of things one might want to take care of in the event
2457 * that a disklabel isn't present.
2458 */
2459 static void
2460 raidmakedisklabel(struct raid_softc *rs)
2461 {
2462 struct disklabel *lp = rs->sc_dkdev.dk_label;
2463 db1_printf(("Making a label..\n"));
2464
2465 /*
2466 * For historical reasons, if there's no disklabel present
2467 * the raw partition must be marked FS_BSDFFS.
2468 */
2469
2470 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2471
2472 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2473
2474 lp->d_checksum = dkcksum(lp);
2475 }
2476 /*
2477 * Wait interruptibly for an exclusive lock.
2478 *
2479 * XXX
2480 * Several drivers do this; it should be abstracted and made MP-safe.
2481 * (Hmm... where have we seen this warning before :-> GO )
2482 */
2483 static int
2484 raidlock(struct raid_softc *rs)
2485 {
2486 int error;
2487
2488 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2489 rs->sc_flags |= RAIDF_WANTED;
2490 if ((error =
2491 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2492 return (error);
2493 }
2494 rs->sc_flags |= RAIDF_LOCKED;
2495 return (0);
2496 }
2497 /*
2498 * Unlock and wake up any waiters.
2499 */
2500 static void
2501 raidunlock(struct raid_softc *rs)
2502 {
2503
2504 rs->sc_flags &= ~RAIDF_LOCKED;
2505 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2506 rs->sc_flags &= ~RAIDF_WANTED;
2507 wakeup(rs);
2508 }
2509 }
2510
2511
2512 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2513 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2514 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2515
2516 static daddr_t
2517 rf_component_info_offset(void)
2518 {
2519
2520 return RF_COMPONENT_INFO_OFFSET;
2521 }
2522
2523 static daddr_t
2524 rf_component_info_size(unsigned secsize)
2525 {
2526 daddr_t info_size;
2527
2528 KASSERT(secsize);
2529 if (secsize > RF_COMPONENT_INFO_SIZE)
2530 info_size = secsize;
2531 else
2532 info_size = RF_COMPONENT_INFO_SIZE;
2533
2534 return info_size;
2535 }
2536
2537 static daddr_t
2538 rf_parity_map_offset(RF_Raid_t *raidPtr)
2539 {
2540 daddr_t map_offset;
2541
2542 KASSERT(raidPtr->bytesPerSector);
2543 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2544 map_offset = raidPtr->bytesPerSector;
2545 else
2546 map_offset = RF_COMPONENT_INFO_SIZE;
2547 map_offset += rf_component_info_offset();
2548
2549 return map_offset;
2550 }
2551
2552 static daddr_t
2553 rf_parity_map_size(RF_Raid_t *raidPtr)
2554 {
2555 daddr_t map_size;
2556
2557 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2558 map_size = raidPtr->bytesPerSector;
2559 else
2560 map_size = RF_PARITY_MAP_SIZE;
2561
2562 return map_size;
2563 }
2564
2565 int
2566 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2567 {
2568 RF_ComponentLabel_t *clabel;
2569
2570 clabel = raidget_component_label(raidPtr, col);
2571 clabel->clean = RF_RAID_CLEAN;
2572 raidflush_component_label(raidPtr, col);
2573 return(0);
2574 }
2575
2576
2577 int
2578 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2579 {
2580 RF_ComponentLabel_t *clabel;
2581
2582 clabel = raidget_component_label(raidPtr, col);
2583 clabel->clean = RF_RAID_DIRTY;
2584 raidflush_component_label(raidPtr, col);
2585 return(0);
2586 }
2587
2588 int
2589 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2590 {
2591 KASSERT(raidPtr->bytesPerSector);
2592 return raidread_component_label(raidPtr->bytesPerSector,
2593 raidPtr->Disks[col].dev,
2594 raidPtr->raid_cinfo[col].ci_vp,
2595 &raidPtr->raid_cinfo[col].ci_label);
2596 }
2597
2598 RF_ComponentLabel_t *
2599 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2600 {
2601 return &raidPtr->raid_cinfo[col].ci_label;
2602 }
2603
2604 int
2605 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2606 {
2607 RF_ComponentLabel_t *label;
2608
2609 label = &raidPtr->raid_cinfo[col].ci_label;
2610 label->mod_counter = raidPtr->mod_counter;
2611 #ifndef RF_NO_PARITY_MAP
2612 label->parity_map_modcount = label->mod_counter;
2613 #endif
2614 return raidwrite_component_label(raidPtr->bytesPerSector,
2615 raidPtr->Disks[col].dev,
2616 raidPtr->raid_cinfo[col].ci_vp, label);
2617 }
2618
2619
2620 static int
2621 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2622 RF_ComponentLabel_t *clabel)
2623 {
2624 return raidread_component_area(dev, b_vp, clabel,
2625 sizeof(RF_ComponentLabel_t),
2626 rf_component_info_offset(),
2627 rf_component_info_size(secsize));
2628 }
2629
2630 /* ARGSUSED */
2631 static int
2632 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2633 size_t msize, daddr_t offset, daddr_t dsize)
2634 {
2635 struct buf *bp;
2636 const struct bdevsw *bdev;
2637 int error;
2638
2639 /* XXX should probably ensure that we don't try to do this if
2640 someone has changed rf_protected_sectors. */
2641
2642 if (b_vp == NULL) {
2643 /* For whatever reason, this component is not valid.
2644 Don't try to read a component label from it. */
2645 return(EINVAL);
2646 }
2647
2648 /* get a block of the appropriate size... */
2649 bp = geteblk((int)dsize);
2650 bp->b_dev = dev;
2651
2652 /* get our ducks in a row for the read */
2653 bp->b_blkno = offset / DEV_BSIZE;
2654 bp->b_bcount = dsize;
2655 bp->b_flags |= B_READ;
2656 bp->b_resid = dsize;
2657
2658 bdev = bdevsw_lookup(bp->b_dev);
2659 if (bdev == NULL)
2660 return (ENXIO);
2661 (*bdev->d_strategy)(bp);
2662
2663 error = biowait(bp);
2664
2665 if (!error) {
2666 memcpy(data, bp->b_data, msize);
2667 }
2668
2669 brelse(bp, 0);
2670 return(error);
2671 }
2672
2673
2674 static int
2675 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2676 RF_ComponentLabel_t *clabel)
2677 {
2678 return raidwrite_component_area(dev, b_vp, clabel,
2679 sizeof(RF_ComponentLabel_t),
2680 rf_component_info_offset(),
2681 rf_component_info_size(secsize), 0);
2682 }
2683
2684 /* ARGSUSED */
2685 static int
2686 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2687 size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2688 {
2689 struct buf *bp;
2690 const struct bdevsw *bdev;
2691 int error;
2692
2693 /* get a block of the appropriate size... */
2694 bp = geteblk((int)dsize);
2695 bp->b_dev = dev;
2696
2697 /* get our ducks in a row for the write */
2698 bp->b_blkno = offset / DEV_BSIZE;
2699 bp->b_bcount = dsize;
2700 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2701 bp->b_resid = dsize;
2702
2703 memset(bp->b_data, 0, dsize);
2704 memcpy(bp->b_data, data, msize);
2705
2706 bdev = bdevsw_lookup(bp->b_dev);
2707 if (bdev == NULL)
2708 return (ENXIO);
2709 (*bdev->d_strategy)(bp);
2710 if (asyncp)
2711 return 0;
2712 error = biowait(bp);
2713 brelse(bp, 0);
2714 if (error) {
2715 #if 1
2716 printf("Failed to write RAID component info!\n");
2717 #endif
2718 }
2719
2720 return(error);
2721 }
2722
2723 void
2724 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2725 {
2726 int c;
2727
2728 for (c = 0; c < raidPtr->numCol; c++) {
2729 /* Skip dead disks. */
2730 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2731 continue;
2732 /* XXXjld: what if an error occurs here? */
2733 raidwrite_component_area(raidPtr->Disks[c].dev,
2734 raidPtr->raid_cinfo[c].ci_vp, map,
2735 RF_PARITYMAP_NBYTE,
2736 rf_parity_map_offset(raidPtr),
2737 rf_parity_map_size(raidPtr), 0);
2738 }
2739 }
2740
2741 void
2742 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2743 {
2744 struct rf_paritymap_ondisk tmp;
2745 int c,first;
2746
2747 first=1;
2748 for (c = 0; c < raidPtr->numCol; c++) {
2749 /* Skip dead disks. */
2750 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2751 continue;
2752 raidread_component_area(raidPtr->Disks[c].dev,
2753 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2754 RF_PARITYMAP_NBYTE,
2755 rf_parity_map_offset(raidPtr),
2756 rf_parity_map_size(raidPtr));
2757 if (first) {
2758 memcpy(map, &tmp, sizeof(*map));
2759 first = 0;
2760 } else {
2761 rf_paritymap_merge(map, &tmp);
2762 }
2763 }
2764 }
2765
2766 void
2767 rf_markalldirty(RF_Raid_t *raidPtr)
2768 {
2769 RF_ComponentLabel_t *clabel;
2770 int sparecol;
2771 int c;
2772 int j;
2773 int scol = -1;
2774
2775 raidPtr->mod_counter++;
2776 for (c = 0; c < raidPtr->numCol; c++) {
2777 /* we don't want to touch (at all) a disk that has
2778 failed */
2779 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2780 clabel = raidget_component_label(raidPtr, c);
2781 if (clabel->status == rf_ds_spared) {
2782 /* XXX do something special...
2783 but whatever you do, don't
2784 try to access it!! */
2785 } else {
2786 raidmarkdirty(raidPtr, c);
2787 }
2788 }
2789 }
2790
2791 for( c = 0; c < raidPtr->numSpare ; c++) {
2792 sparecol = raidPtr->numCol + c;
2793 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2794 /*
2795
2796 we claim this disk is "optimal" if it's
2797 rf_ds_used_spare, as that means it should be
2798 directly substitutable for the disk it replaced.
2799 We note that too...
2800
2801 */
2802
2803 for(j=0;j<raidPtr->numCol;j++) {
2804 if (raidPtr->Disks[j].spareCol == sparecol) {
2805 scol = j;
2806 break;
2807 }
2808 }
2809
2810 clabel = raidget_component_label(raidPtr, sparecol);
2811 /* make sure status is noted */
2812
2813 raid_init_component_label(raidPtr, clabel);
2814
2815 clabel->row = 0;
2816 clabel->column = scol;
2817 /* Note: we *don't* change status from rf_ds_used_spare
2818 to rf_ds_optimal */
2819 /* clabel.status = rf_ds_optimal; */
2820
2821 raidmarkdirty(raidPtr, sparecol);
2822 }
2823 }
2824 }
2825
2826
2827 void
2828 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2829 {
2830 RF_ComponentLabel_t *clabel;
2831 int sparecol;
2832 int c;
2833 int j;
2834 int scol;
2835
2836 scol = -1;
2837
2838 /* XXX should do extra checks to make sure things really are clean,
2839 rather than blindly setting the clean bit... */
2840
2841 raidPtr->mod_counter++;
2842
2843 for (c = 0; c < raidPtr->numCol; c++) {
2844 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2845 clabel = raidget_component_label(raidPtr, c);
2846 /* make sure status is noted */
2847 clabel->status = rf_ds_optimal;
2848
2849 /* note what unit we are configured as */
2850 clabel->last_unit = raidPtr->raidid;
2851
2852 raidflush_component_label(raidPtr, c);
2853 if (final == RF_FINAL_COMPONENT_UPDATE) {
2854 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2855 raidmarkclean(raidPtr, c);
2856 }
2857 }
2858 }
2859 /* else we don't touch it.. */
2860 }
2861
2862 for( c = 0; c < raidPtr->numSpare ; c++) {
2863 sparecol = raidPtr->numCol + c;
2864 /* Need to ensure that the reconstruct actually completed! */
2865 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2866 /*
2867
2868 we claim this disk is "optimal" if it's
2869 rf_ds_used_spare, as that means it should be
2870 directly substitutable for the disk it replaced.
2871 We note that too...
2872
2873 */
2874
2875 for(j=0;j<raidPtr->numCol;j++) {
2876 if (raidPtr->Disks[j].spareCol == sparecol) {
2877 scol = j;
2878 break;
2879 }
2880 }
2881
2882 /* XXX shouldn't *really* need this... */
2883 clabel = raidget_component_label(raidPtr, sparecol);
2884 /* make sure status is noted */
2885
2886 raid_init_component_label(raidPtr, clabel);
2887
2888 clabel->column = scol;
2889 clabel->status = rf_ds_optimal;
2890 clabel->last_unit = raidPtr->raidid;
2891
2892 raidflush_component_label(raidPtr, sparecol);
2893 if (final == RF_FINAL_COMPONENT_UPDATE) {
2894 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2895 raidmarkclean(raidPtr, sparecol);
2896 }
2897 }
2898 }
2899 }
2900 }
2901
2902 void
2903 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2904 {
2905
2906 if (vp != NULL) {
2907 if (auto_configured == 1) {
2908 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2909 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2910 vput(vp);
2911
2912 } else {
2913 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2914 }
2915 }
2916 }
2917
2918
2919 void
2920 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2921 {
2922 int r,c;
2923 struct vnode *vp;
2924 int acd;
2925
2926
2927 /* We take this opportunity to close the vnodes like we should.. */
2928
2929 for (c = 0; c < raidPtr->numCol; c++) {
2930 vp = raidPtr->raid_cinfo[c].ci_vp;
2931 acd = raidPtr->Disks[c].auto_configured;
2932 rf_close_component(raidPtr, vp, acd);
2933 raidPtr->raid_cinfo[c].ci_vp = NULL;
2934 raidPtr->Disks[c].auto_configured = 0;
2935 }
2936
2937 for (r = 0; r < raidPtr->numSpare; r++) {
2938 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2939 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2940 rf_close_component(raidPtr, vp, acd);
2941 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2942 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2943 }
2944 }
2945
2946
2947 void
2948 rf_ReconThread(struct rf_recon_req *req)
2949 {
2950 int s;
2951 RF_Raid_t *raidPtr;
2952
2953 s = splbio();
2954 raidPtr = (RF_Raid_t *) req->raidPtr;
2955 raidPtr->recon_in_progress = 1;
2956
2957 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2958 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2959
2960 RF_Free(req, sizeof(*req));
2961
2962 raidPtr->recon_in_progress = 0;
2963 splx(s);
2964
2965 /* That's all... */
2966 kthread_exit(0); /* does not return */
2967 }
2968
2969 void
2970 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2971 {
2972 int retcode;
2973 int s;
2974
2975 raidPtr->parity_rewrite_stripes_done = 0;
2976 raidPtr->parity_rewrite_in_progress = 1;
2977 s = splbio();
2978 retcode = rf_RewriteParity(raidPtr);
2979 splx(s);
2980 if (retcode) {
2981 printf("raid%d: Error re-writing parity (%d)!\n",
2982 raidPtr->raidid, retcode);
2983 } else {
2984 /* set the clean bit! If we shutdown correctly,
2985 the clean bit on each component label will get
2986 set */
2987 raidPtr->parity_good = RF_RAID_CLEAN;
2988 }
2989 raidPtr->parity_rewrite_in_progress = 0;
2990
2991 /* Anyone waiting for us to stop? If so, inform them... */
2992 if (raidPtr->waitShutdown) {
2993 wakeup(&raidPtr->parity_rewrite_in_progress);
2994 }
2995
2996 /* That's all... */
2997 kthread_exit(0); /* does not return */
2998 }
2999
3000
3001 void
3002 rf_CopybackThread(RF_Raid_t *raidPtr)
3003 {
3004 int s;
3005
3006 raidPtr->copyback_in_progress = 1;
3007 s = splbio();
3008 rf_CopybackReconstructedData(raidPtr);
3009 splx(s);
3010 raidPtr->copyback_in_progress = 0;
3011
3012 /* That's all... */
3013 kthread_exit(0); /* does not return */
3014 }
3015
3016
3017 void
3018 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
3019 {
3020 int s;
3021 RF_Raid_t *raidPtr;
3022
3023 s = splbio();
3024 raidPtr = req->raidPtr;
3025 raidPtr->recon_in_progress = 1;
3026 rf_ReconstructInPlace(raidPtr, req->col);
3027 RF_Free(req, sizeof(*req));
3028 raidPtr->recon_in_progress = 0;
3029 splx(s);
3030
3031 /* That's all... */
3032 kthread_exit(0); /* does not return */
3033 }
3034
3035 static RF_AutoConfig_t *
3036 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
3037 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
3038 unsigned secsize)
3039 {
3040 int good_one = 0;
3041 RF_ComponentLabel_t *clabel;
3042 RF_AutoConfig_t *ac;
3043
3044 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
3045 if (clabel == NULL) {
3046 oomem:
3047 while(ac_list) {
3048 ac = ac_list;
3049 if (ac->clabel)
3050 free(ac->clabel, M_RAIDFRAME);
3051 ac_list = ac_list->next;
3052 free(ac, M_RAIDFRAME);
3053 }
3054 printf("RAID auto config: out of memory!\n");
3055 return NULL; /* XXX probably should panic? */
3056 }
3057
3058 if (!raidread_component_label(secsize, dev, vp, clabel)) {
3059 /* Got the label. Does it look reasonable? */
3060 if (rf_reasonable_label(clabel, numsecs) &&
3061 (rf_component_label_partitionsize(clabel) <= size)) {
3062 #ifdef DEBUG
3063 printf("Component on: %s: %llu\n",
3064 cname, (unsigned long long)size);
3065 rf_print_component_label(clabel);
3066 #endif
3067 /* if it's reasonable, add it, else ignore it. */
3068 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
3069 M_NOWAIT);
3070 if (ac == NULL) {
3071 free(clabel, M_RAIDFRAME);
3072 goto oomem;
3073 }
3074 strlcpy(ac->devname, cname, sizeof(ac->devname));
3075 ac->dev = dev;
3076 ac->vp = vp;
3077 ac->clabel = clabel;
3078 ac->next = ac_list;
3079 ac_list = ac;
3080 good_one = 1;
3081 }
3082 }
3083 if (!good_one) {
3084 /* cleanup */
3085 free(clabel, M_RAIDFRAME);
3086 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3087 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3088 vput(vp);
3089 }
3090 return ac_list;
3091 }
3092
3093 RF_AutoConfig_t *
3094 rf_find_raid_components(void)
3095 {
3096 struct vnode *vp;
3097 struct disklabel label;
3098 device_t dv;
3099 deviter_t di;
3100 dev_t dev;
3101 int bmajor, bminor, wedge, rf_part_found;
3102 int error;
3103 int i;
3104 RF_AutoConfig_t *ac_list;
3105 uint64_t numsecs;
3106 unsigned secsize;
3107
3108 /* initialize the AutoConfig list */
3109 ac_list = NULL;
3110
3111 /* we begin by trolling through *all* the devices on the system */
3112
3113 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
3114 dv = deviter_next(&di)) {
3115
3116 /* we are only interested in disks... */
3117 if (device_class(dv) != DV_DISK)
3118 continue;
3119
3120 /* we don't care about floppies... */
3121 if (device_is_a(dv, "fd")) {
3122 continue;
3123 }
3124
3125 /* we don't care about CD's... */
3126 if (device_is_a(dv, "cd")) {
3127 continue;
3128 }
3129
3130 /* we don't care about md's... */
3131 if (device_is_a(dv, "md")) {
3132 continue;
3133 }
3134
3135 /* hdfd is the Atari/Hades floppy driver */
3136 if (device_is_a(dv, "hdfd")) {
3137 continue;
3138 }
3139
3140 /* fdisa is the Atari/Milan floppy driver */
3141 if (device_is_a(dv, "fdisa")) {
3142 continue;
3143 }
3144
3145 /* need to find the device_name_to_block_device_major stuff */
3146 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
3147
3148 rf_part_found = 0; /*No raid partition as yet*/
3149
3150 /* get a vnode for the raw partition of this disk */
3151
3152 wedge = device_is_a(dv, "dk");
3153 bminor = minor(device_unit(dv));
3154 dev = wedge ? makedev(bmajor, bminor) :
3155 MAKEDISKDEV(bmajor, bminor, RAW_PART);
3156 if (bdevvp(dev, &vp))
3157 panic("RAID can't alloc vnode");
3158
3159 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
3160
3161 if (error) {
3162 /* "Who cares." Continue looking
3163 for something that exists*/
3164 vput(vp);
3165 continue;
3166 }
3167
3168 error = getdisksize(vp, &numsecs, &secsize);
3169 if (error) {
3170 vput(vp);
3171 continue;
3172 }
3173 if (wedge) {
3174 struct dkwedge_info dkw;
3175 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
3176 NOCRED);
3177 if (error) {
3178 printf("RAIDframe: can't get wedge info for "
3179 "dev %s (%d)\n", device_xname(dv), error);
3180 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3181 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3182 vput(vp);
3183 continue;
3184 }
3185
3186 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
3187 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3188 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3189 vput(vp);
3190 continue;
3191 }
3192
3193 ac_list = rf_get_component(ac_list, dev, vp,
3194 device_xname(dv), dkw.dkw_size, numsecs, secsize);
3195 rf_part_found = 1; /*There is a raid component on this disk*/
3196 continue;
3197 }
3198
3199 /* Ok, the disk exists. Go get the disklabel. */
3200 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
3201 if (error) {
3202 /*
3203 * XXX can't happen - open() would
3204 * have errored out (or faked up one)
3205 */
3206 if (error != ENOTTY)
3207 printf("RAIDframe: can't get label for dev "
3208 "%s (%d)\n", device_xname(dv), error);
3209 }
3210
3211 /* don't need this any more. We'll allocate it again
3212 a little later if we really do... */
3213 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3214 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3215 vput(vp);
3216
3217 if (error)
3218 continue;
3219
3220 rf_part_found = 0; /*No raid partitions yet*/
3221 for (i = 0; i < label.d_npartitions; i++) {
3222 char cname[sizeof(ac_list->devname)];
3223
3224 /* We only support partitions marked as RAID */
3225 if (label.d_partitions[i].p_fstype != FS_RAID)
3226 continue;
3227
3228 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
3229 if (bdevvp(dev, &vp))
3230 panic("RAID can't alloc vnode");
3231
3232 error = VOP_OPEN(vp, FREAD, NOCRED);
3233 if (error) {
3234 /* Whatever... */
3235 vput(vp);
3236 continue;
3237 }
3238 snprintf(cname, sizeof(cname), "%s%c",
3239 device_xname(dv), 'a' + i);
3240 ac_list = rf_get_component(ac_list, dev, vp, cname,
3241 label.d_partitions[i].p_size, numsecs, secsize);
3242 rf_part_found = 1; /*There is at least one raid partition on this disk*/
3243 }
3244
3245 /*
3246 *If there is no raid component on this disk, either in a
3247 *disklabel or inside a wedge, check the raw partition as well,
3248 *as it is possible to configure raid components on raw disk
3249 *devices.
3250 */
3251
3252 if (!rf_part_found) {
3253 char cname[sizeof(ac_list->devname)];
3254
3255 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
3256 if (bdevvp(dev, &vp))
3257 panic("RAID can't alloc vnode");
3258
3259 error = VOP_OPEN(vp, FREAD, NOCRED);
3260 if (error) {
3261 /* Whatever... */
3262 vput(vp);
3263 continue;
3264 }
3265 snprintf(cname, sizeof(cname), "%s%c",
3266 device_xname(dv), 'a' + RAW_PART);
3267 ac_list = rf_get_component(ac_list, dev, vp, cname,
3268 label.d_partitions[RAW_PART].p_size, numsecs, secsize);
3269 }
3270 }
3271 deviter_release(&di);
3272 return ac_list;
3273 }
3274
3275
3276 int
3277 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3278 {
3279
3280 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
3281 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
3282 ((clabel->clean == RF_RAID_CLEAN) ||
3283 (clabel->clean == RF_RAID_DIRTY)) &&
3284 clabel->row >=0 &&
3285 clabel->column >= 0 &&
3286 clabel->num_rows > 0 &&
3287 clabel->num_columns > 0 &&
3288 clabel->row < clabel->num_rows &&
3289 clabel->column < clabel->num_columns &&
3290 clabel->blockSize > 0 &&
3291 /*
3292 * numBlocksHi may contain garbage, but it is ok since
3293 * the type is unsigned. If it is really garbage,
3294 * rf_fix_old_label_size() will fix it.
3295 */
3296 rf_component_label_numblocks(clabel) > 0) {
3297 /*
3298 * label looks reasonable enough...
3299 * let's make sure it has no old garbage.
3300 */
3301 if (numsecs)
3302 rf_fix_old_label_size(clabel, numsecs);
3303 return(1);
3304 }
3305 return(0);
3306 }
3307
3308
3309 /*
3310 * For reasons yet unknown, some old component labels have garbage in
3311 * the newer numBlocksHi region, and this causes lossage. Since those
3312 * disks will also have numsecs set to less than 32 bits of sectors,
3313 * we can determine when this corruption has occurred, and fix it.
3314 *
3315 * The exact same problem, with the same unknown reason, happens to
3316 * the partitionSizeHi member as well.
3317 */
3318 static void
3319 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3320 {
3321
3322 if (numsecs < ((uint64_t)1 << 32)) {
3323 if (clabel->numBlocksHi) {
3324 printf("WARNING: total sectors < 32 bits, yet "
3325 "numBlocksHi set\n"
3326 "WARNING: resetting numBlocksHi to zero.\n");
3327 clabel->numBlocksHi = 0;
3328 }
3329
3330 if (clabel->partitionSizeHi) {
3331 printf("WARNING: total sectors < 32 bits, yet "
3332 "partitionSizeHi set\n"
3333 "WARNING: resetting partitionSizeHi to zero.\n");
3334 clabel->partitionSizeHi = 0;
3335 }
3336 }
3337 }
3338
3339
3340 #ifdef DEBUG
3341 void
3342 rf_print_component_label(RF_ComponentLabel_t *clabel)
3343 {
3344 uint64_t numBlocks;
3345 static const char *rp[] = {
3346 "No", "Force", "Soft", "*invalid*"
3347 };
3348
3349
3350 numBlocks = rf_component_label_numblocks(clabel);
3351
3352 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3353 clabel->row, clabel->column,
3354 clabel->num_rows, clabel->num_columns);
3355 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3356 clabel->version, clabel->serial_number,
3357 clabel->mod_counter);
3358 printf(" Clean: %s Status: %d\n",
3359 clabel->clean ? "Yes" : "No", clabel->status);
3360 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3361 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3362 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3363 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3364 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3365 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]);
3366 printf(" Last configured as: raid%d\n", clabel->last_unit);
3367 #if 0
3368 printf(" Config order: %d\n", clabel->config_order);
3369 #endif
3370
3371 }
3372 #endif
3373
3374 RF_ConfigSet_t *
3375 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3376 {
3377 RF_AutoConfig_t *ac;
3378 RF_ConfigSet_t *config_sets;
3379 RF_ConfigSet_t *cset;
3380 RF_AutoConfig_t *ac_next;
3381
3382
3383 config_sets = NULL;
3384
3385 /* Go through the AutoConfig list, and figure out which components
3386 belong to what sets. */
3387 ac = ac_list;
3388 while(ac!=NULL) {
3389 /* we're going to putz with ac->next, so save it here
3390 for use at the end of the loop */
3391 ac_next = ac->next;
3392
3393 if (config_sets == NULL) {
3394 /* will need at least this one... */
3395 config_sets = (RF_ConfigSet_t *)
3396 malloc(sizeof(RF_ConfigSet_t),
3397 M_RAIDFRAME, M_NOWAIT);
3398 if (config_sets == NULL) {
3399 panic("rf_create_auto_sets: No memory!");
3400 }
3401 /* this one is easy :) */
3402 config_sets->ac = ac;
3403 config_sets->next = NULL;
3404 config_sets->rootable = 0;
3405 ac->next = NULL;
3406 } else {
3407 /* which set does this component fit into? */
3408 cset = config_sets;
3409 while(cset!=NULL) {
3410 if (rf_does_it_fit(cset, ac)) {
3411 /* looks like it matches... */
3412 ac->next = cset->ac;
3413 cset->ac = ac;
3414 break;
3415 }
3416 cset = cset->next;
3417 }
3418 if (cset==NULL) {
3419 /* didn't find a match above... new set..*/
3420 cset = (RF_ConfigSet_t *)
3421 malloc(sizeof(RF_ConfigSet_t),
3422 M_RAIDFRAME, M_NOWAIT);
3423 if (cset == NULL) {
3424 panic("rf_create_auto_sets: No memory!");
3425 }
3426 cset->ac = ac;
3427 ac->next = NULL;
3428 cset->next = config_sets;
3429 cset->rootable = 0;
3430 config_sets = cset;
3431 }
3432 }
3433 ac = ac_next;
3434 }
3435
3436
3437 return(config_sets);
3438 }
3439
3440 static int
3441 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3442 {
3443 RF_ComponentLabel_t *clabel1, *clabel2;
3444
3445 /* If this one matches the *first* one in the set, that's good
3446 enough, since the other members of the set would have been
3447 through here too... */
3448 /* note that we are not checking partitionSize here..
3449
3450 Note that we are also not checking the mod_counters here.
3451 If everything else matches except the mod_counter, that's
3452 good enough for this test. We will deal with the mod_counters
3453 a little later in the autoconfiguration process.
3454
3455 (clabel1->mod_counter == clabel2->mod_counter) &&
3456
3457 The reason we don't check for this is that failed disks
3458 will have lower modification counts. If those disks are
3459 not added to the set they used to belong to, then they will
3460 form their own set, which may result in 2 different sets,
3461 for example, competing to be configured at raid0, and
3462 perhaps competing to be the root filesystem set. If the
3463 wrong ones get configured, or both attempt to become /,
3464 weird behaviour and or serious lossage will occur. Thus we
3465 need to bring them into the fold here, and kick them out at
3466 a later point.
3467
3468 */
3469
3470 clabel1 = cset->ac->clabel;
3471 clabel2 = ac->clabel;
3472 if ((clabel1->version == clabel2->version) &&
3473 (clabel1->serial_number == clabel2->serial_number) &&
3474 (clabel1->num_rows == clabel2->num_rows) &&
3475 (clabel1->num_columns == clabel2->num_columns) &&
3476 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3477 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3478 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3479 (clabel1->parityConfig == clabel2->parityConfig) &&
3480 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3481 (clabel1->blockSize == clabel2->blockSize) &&
3482 rf_component_label_numblocks(clabel1) ==
3483 rf_component_label_numblocks(clabel2) &&
3484 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3485 (clabel1->root_partition == clabel2->root_partition) &&
3486 (clabel1->last_unit == clabel2->last_unit) &&
3487 (clabel1->config_order == clabel2->config_order)) {
3488 /* if it get's here, it almost *has* to be a match */
3489 } else {
3490 /* it's not consistent with somebody in the set..
3491 punt */
3492 return(0);
3493 }
3494 /* all was fine.. it must fit... */
3495 return(1);
3496 }
3497
3498 int
3499 rf_have_enough_components(RF_ConfigSet_t *cset)
3500 {
3501 RF_AutoConfig_t *ac;
3502 RF_AutoConfig_t *auto_config;
3503 RF_ComponentLabel_t *clabel;
3504 int c;
3505 int num_cols;
3506 int num_missing;
3507 int mod_counter;
3508 int mod_counter_found;
3509 int even_pair_failed;
3510 char parity_type;
3511
3512
3513 /* check to see that we have enough 'live' components
3514 of this set. If so, we can configure it if necessary */
3515
3516 num_cols = cset->ac->clabel->num_columns;
3517 parity_type = cset->ac->clabel->parityConfig;
3518
3519 /* XXX Check for duplicate components!?!?!? */
3520
3521 /* Determine what the mod_counter is supposed to be for this set. */
3522
3523 mod_counter_found = 0;
3524 mod_counter = 0;
3525 ac = cset->ac;
3526 while(ac!=NULL) {
3527 if (mod_counter_found==0) {
3528 mod_counter = ac->clabel->mod_counter;
3529 mod_counter_found = 1;
3530 } else {
3531 if (ac->clabel->mod_counter > mod_counter) {
3532 mod_counter = ac->clabel->mod_counter;
3533 }
3534 }
3535 ac = ac->next;
3536 }
3537
3538 num_missing = 0;
3539 auto_config = cset->ac;
3540
3541 even_pair_failed = 0;
3542 for(c=0; c<num_cols; c++) {
3543 ac = auto_config;
3544 while(ac!=NULL) {
3545 if ((ac->clabel->column == c) &&
3546 (ac->clabel->mod_counter == mod_counter)) {
3547 /* it's this one... */
3548 #ifdef DEBUG
3549 printf("Found: %s at %d\n",
3550 ac->devname,c);
3551 #endif
3552 break;
3553 }
3554 ac=ac->next;
3555 }
3556 if (ac==NULL) {
3557 /* Didn't find one here! */
3558 /* special case for RAID 1, especially
3559 where there are more than 2
3560 components (where RAIDframe treats
3561 things a little differently :( ) */
3562 if (parity_type == '1') {
3563 if (c%2 == 0) { /* even component */
3564 even_pair_failed = 1;
3565 } else { /* odd component. If
3566 we're failed, and
3567 so is the even
3568 component, it's
3569 "Good Night, Charlie" */
3570 if (even_pair_failed == 1) {
3571 return(0);
3572 }
3573 }
3574 } else {
3575 /* normal accounting */
3576 num_missing++;
3577 }
3578 }
3579 if ((parity_type == '1') && (c%2 == 1)) {
3580 /* Just did an even component, and we didn't
3581 bail.. reset the even_pair_failed flag,
3582 and go on to the next component.... */
3583 even_pair_failed = 0;
3584 }
3585 }
3586
3587 clabel = cset->ac->clabel;
3588
3589 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3590 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3591 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3592 /* XXX this needs to be made *much* more general */
3593 /* Too many failures */
3594 return(0);
3595 }
3596 /* otherwise, all is well, and we've got enough to take a kick
3597 at autoconfiguring this set */
3598 return(1);
3599 }
3600
3601 void
3602 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3603 RF_Raid_t *raidPtr)
3604 {
3605 RF_ComponentLabel_t *clabel;
3606 int i;
3607
3608 clabel = ac->clabel;
3609
3610 /* 1. Fill in the common stuff */
3611 config->numRow = clabel->num_rows = 1;
3612 config->numCol = clabel->num_columns;
3613 config->numSpare = 0; /* XXX should this be set here? */
3614 config->sectPerSU = clabel->sectPerSU;
3615 config->SUsPerPU = clabel->SUsPerPU;
3616 config->SUsPerRU = clabel->SUsPerRU;
3617 config->parityConfig = clabel->parityConfig;
3618 /* XXX... */
3619 strcpy(config->diskQueueType,"fifo");
3620 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3621 config->layoutSpecificSize = 0; /* XXX ?? */
3622
3623 while(ac!=NULL) {
3624 /* row/col values will be in range due to the checks
3625 in reasonable_label() */
3626 strcpy(config->devnames[0][ac->clabel->column],
3627 ac->devname);
3628 ac = ac->next;
3629 }
3630
3631 for(i=0;i<RF_MAXDBGV;i++) {
3632 config->debugVars[i][0] = 0;
3633 }
3634 }
3635
3636 int
3637 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3638 {
3639 RF_ComponentLabel_t *clabel;
3640 int column;
3641 int sparecol;
3642
3643 raidPtr->autoconfigure = new_value;
3644
3645 for(column=0; column<raidPtr->numCol; column++) {
3646 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3647 clabel = raidget_component_label(raidPtr, column);
3648 clabel->autoconfigure = new_value;
3649 raidflush_component_label(raidPtr, column);
3650 }
3651 }
3652 for(column = 0; column < raidPtr->numSpare ; column++) {
3653 sparecol = raidPtr->numCol + column;
3654 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3655 clabel = raidget_component_label(raidPtr, sparecol);
3656 clabel->autoconfigure = new_value;
3657 raidflush_component_label(raidPtr, sparecol);
3658 }
3659 }
3660 return(new_value);
3661 }
3662
3663 int
3664 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3665 {
3666 RF_ComponentLabel_t *clabel;
3667 int column;
3668 int sparecol;
3669
3670 raidPtr->root_partition = new_value;
3671 for(column=0; column<raidPtr->numCol; column++) {
3672 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3673 clabel = raidget_component_label(raidPtr, column);
3674 clabel->root_partition = new_value;
3675 raidflush_component_label(raidPtr, column);
3676 }
3677 }
3678 for(column = 0; column < raidPtr->numSpare ; column++) {
3679 sparecol = raidPtr->numCol + column;
3680 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3681 clabel = raidget_component_label(raidPtr, sparecol);
3682 clabel->root_partition = new_value;
3683 raidflush_component_label(raidPtr, sparecol);
3684 }
3685 }
3686 return(new_value);
3687 }
3688
3689 void
3690 rf_release_all_vps(RF_ConfigSet_t *cset)
3691 {
3692 RF_AutoConfig_t *ac;
3693
3694 ac = cset->ac;
3695 while(ac!=NULL) {
3696 /* Close the vp, and give it back */
3697 if (ac->vp) {
3698 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3699 VOP_CLOSE(ac->vp, FREAD, NOCRED);
3700 vput(ac->vp);
3701 ac->vp = NULL;
3702 }
3703 ac = ac->next;
3704 }
3705 }
3706
3707
3708 void
3709 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3710 {
3711 RF_AutoConfig_t *ac;
3712 RF_AutoConfig_t *next_ac;
3713
3714 ac = cset->ac;
3715 while(ac!=NULL) {
3716 next_ac = ac->next;
3717 /* nuke the label */
3718 free(ac->clabel, M_RAIDFRAME);
3719 /* cleanup the config structure */
3720 free(ac, M_RAIDFRAME);
3721 /* "next.." */
3722 ac = next_ac;
3723 }
3724 /* and, finally, nuke the config set */
3725 free(cset, M_RAIDFRAME);
3726 }
3727
3728
3729 void
3730 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3731 {
3732 /* current version number */
3733 clabel->version = RF_COMPONENT_LABEL_VERSION;
3734 clabel->serial_number = raidPtr->serial_number;
3735 clabel->mod_counter = raidPtr->mod_counter;
3736
3737 clabel->num_rows = 1;
3738 clabel->num_columns = raidPtr->numCol;
3739 clabel->clean = RF_RAID_DIRTY; /* not clean */
3740 clabel->status = rf_ds_optimal; /* "It's good!" */
3741
3742 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3743 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3744 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3745
3746 clabel->blockSize = raidPtr->bytesPerSector;
3747 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3748
3749 /* XXX not portable */
3750 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3751 clabel->maxOutstanding = raidPtr->maxOutstanding;
3752 clabel->autoconfigure = raidPtr->autoconfigure;
3753 clabel->root_partition = raidPtr->root_partition;
3754 clabel->last_unit = raidPtr->raidid;
3755 clabel->config_order = raidPtr->config_order;
3756
3757 #ifndef RF_NO_PARITY_MAP
3758 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3759 #endif
3760 }
3761
3762 struct raid_softc *
3763 rf_auto_config_set(RF_ConfigSet_t *cset)
3764 {
3765 RF_Raid_t *raidPtr;
3766 RF_Config_t *config;
3767 int raidID;
3768 struct raid_softc *sc;
3769
3770 #ifdef DEBUG
3771 printf("RAID autoconfigure\n");
3772 #endif
3773
3774 /* 1. Create a config structure */
3775 config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO);
3776 if (config == NULL) {
3777 printf("Out of mem!?!?\n");
3778 /* XXX do something more intelligent here. */
3779 return NULL;
3780 }
3781
3782 /*
3783 2. Figure out what RAID ID this one is supposed to live at
3784 See if we can get the same RAID dev that it was configured
3785 on last time..
3786 */
3787
3788 raidID = cset->ac->clabel->last_unit;
3789 for (sc = raidget(raidID); sc->sc_r.valid != 0; sc = raidget(++raidID))
3790 continue;
3791 #ifdef DEBUG
3792 printf("Configuring raid%d:\n",raidID);
3793 #endif
3794
3795 raidPtr = &sc->sc_r;
3796
3797 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3798 raidPtr->softc = sc;
3799 raidPtr->raidid = raidID;
3800 raidPtr->openings = RAIDOUTSTANDING;
3801
3802 /* 3. Build the configuration structure */
3803 rf_create_configuration(cset->ac, config, raidPtr);
3804
3805 /* 4. Do the configuration */
3806 if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3807 raidinit(sc);
3808
3809 rf_markalldirty(raidPtr);
3810 raidPtr->autoconfigure = 1; /* XXX do this here? */
3811 switch (cset->ac->clabel->root_partition) {
3812 case 1: /* Force Root */
3813 case 2: /* Soft Root: root when boot partition part of raid */
3814 /*
3815 * everything configured just fine. Make a note
3816 * that this set is eligible to be root,
3817 * or forced to be root
3818 */
3819 cset->rootable = cset->ac->clabel->root_partition;
3820 /* XXX do this here? */
3821 raidPtr->root_partition = cset->rootable;
3822 break;
3823 default:
3824 break;
3825 }
3826 } else {
3827 raidput(sc);
3828 sc = NULL;
3829 }
3830
3831 /* 5. Cleanup */
3832 free(config, M_RAIDFRAME);
3833 return sc;
3834 }
3835
3836 void
3837 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3838 {
3839 struct buf *bp;
3840 struct raid_softc *rs;
3841
3842 bp = (struct buf *)desc->bp;
3843 rs = desc->raidPtr->softc;
3844 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid),
3845 (bp->b_flags & B_READ));
3846 }
3847
3848 void
3849 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3850 size_t xmin, size_t xmax)
3851 {
3852 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3853 pool_sethiwat(p, xmax);
3854 pool_prime(p, xmin);
3855 pool_setlowat(p, xmin);
3856 }
3857
3858 /*
3859 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buf_queue to see
3860 * if there is IO pending and if that IO could possibly be done for a
3861 * given RAID set. Returns 0 if IO is waiting and can be done, 1
3862 * otherwise.
3863 *
3864 */
3865
3866 int
3867 rf_buf_queue_check(RF_Raid_t *raidPtr)
3868 {
3869 struct raid_softc *rs = raidPtr->softc;
3870 if ((bufq_peek(rs->buf_queue) != NULL) && raidPtr->openings > 0) {
3871 /* there is work to do */
3872 return 0;
3873 }
3874 /* default is nothing to do */
3875 return 1;
3876 }
3877
3878 int
3879 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3880 {
3881 uint64_t numsecs;
3882 unsigned secsize;
3883 int error;
3884
3885 error = getdisksize(vp, &numsecs, &secsize);
3886 if (error == 0) {
3887 diskPtr->blockSize = secsize;
3888 diskPtr->numBlocks = numsecs - rf_protectedSectors;
3889 diskPtr->partitionSize = numsecs;
3890 return 0;
3891 }
3892 return error;
3893 }
3894
3895 static int
3896 raid_match(device_t self, cfdata_t cfdata, void *aux)
3897 {
3898 return 1;
3899 }
3900
3901 static void
3902 raid_attach(device_t parent, device_t self, void *aux)
3903 {
3904
3905 }
3906
3907
3908 static int
3909 raid_detach(device_t self, int flags)
3910 {
3911 int error;
3912 struct raid_softc *rs = raidget(device_unit(self));
3913
3914 if (rs == NULL)
3915 return ENXIO;
3916
3917 if ((error = raidlock(rs)) != 0)
3918 return (error);
3919
3920 error = raid_detach_unlocked(rs);
3921
3922 raidunlock(rs);
3923
3924 /* XXXkd: raidput(rs) ??? */
3925
3926 return error;
3927 }
3928
3929 static void
3930 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
3931 {
3932 struct disk_geom *dg = &rs->sc_dkdev.dk_geom;
3933
3934 memset(dg, 0, sizeof(*dg));
3935
3936 dg->dg_secperunit = raidPtr->totalSectors;
3937 dg->dg_secsize = raidPtr->bytesPerSector;
3938 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
3939 dg->dg_ntracks = 4 * raidPtr->numCol;
3940
3941 disk_set_info(rs->sc_dev, &rs->sc_dkdev, NULL);
3942 }
3943
3944 /*
3945 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3946 * We end up returning whatever error was returned by the first cache flush
3947 * that fails.
3948 */
3949
3950 int
3951 rf_sync_component_caches(RF_Raid_t *raidPtr)
3952 {
3953 int c, sparecol;
3954 int e,error;
3955 int force = 1;
3956
3957 error = 0;
3958 for (c = 0; c < raidPtr->numCol; c++) {
3959 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3960 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3961 &force, FWRITE, NOCRED);
3962 if (e) {
3963 if (e != ENODEV)
3964 printf("raid%d: cache flush to component %s failed.\n",
3965 raidPtr->raidid, raidPtr->Disks[c].devname);
3966 if (error == 0) {
3967 error = e;
3968 }
3969 }
3970 }
3971 }
3972
3973 for( c = 0; c < raidPtr->numSpare ; c++) {
3974 sparecol = raidPtr->numCol + c;
3975 /* Need to ensure that the reconstruct actually completed! */
3976 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3977 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3978 DIOCCACHESYNC, &force, FWRITE, NOCRED);
3979 if (e) {
3980 if (e != ENODEV)
3981 printf("raid%d: cache flush to component %s failed.\n",
3982 raidPtr->raidid, raidPtr->Disks[sparecol].devname);
3983 if (error == 0) {
3984 error = e;
3985 }
3986 }
3987 }
3988 }
3989 return error;
3990 }
3991