rf_netbsdkintf.c revision 1.326 1 /* $NetBSD: rf_netbsdkintf.c,v 1.326 2015/12/08 20:36:15 christos Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: cd.c 1.6 90/11/28$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
89 * School of Computer Science
90 * Carnegie Mellon University
91 * Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97 /***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.326 2015/12/08 20:36:15 christos Exp $");
105
106 #ifdef _KERNEL_OPT
107 #include "opt_compat_netbsd.h"
108 #include "opt_raid_autoconfig.h"
109 #endif
110
111 #include <sys/param.h>
112 #include <sys/errno.h>
113 #include <sys/pool.h>
114 #include <sys/proc.h>
115 #include <sys/queue.h>
116 #include <sys/disk.h>
117 #include <sys/device.h>
118 #include <sys/stat.h>
119 #include <sys/ioctl.h>
120 #include <sys/fcntl.h>
121 #include <sys/systm.h>
122 #include <sys/vnode.h>
123 #include <sys/disklabel.h>
124 #include <sys/conf.h>
125 #include <sys/buf.h>
126 #include <sys/bufq.h>
127 #include <sys/reboot.h>
128 #include <sys/kauth.h>
129
130 #include <prop/proplib.h>
131
132 #include <dev/raidframe/raidframevar.h>
133 #include <dev/raidframe/raidframeio.h>
134 #include <dev/raidframe/rf_paritymap.h>
135
136 #include "rf_raid.h"
137 #include "rf_copyback.h"
138 #include "rf_dag.h"
139 #include "rf_dagflags.h"
140 #include "rf_desc.h"
141 #include "rf_diskqueue.h"
142 #include "rf_etimer.h"
143 #include "rf_general.h"
144 #include "rf_kintf.h"
145 #include "rf_options.h"
146 #include "rf_driver.h"
147 #include "rf_parityscan.h"
148 #include "rf_threadstuff.h"
149
150 #ifdef COMPAT_50
151 #include "rf_compat50.h"
152 #endif
153
154 #include "ioconf.h"
155
156 #ifdef DEBUG
157 int rf_kdebug_level = 0;
158 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
159 #else /* DEBUG */
160 #define db1_printf(a) { }
161 #endif /* DEBUG */
162
163 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
164 static rf_declare_mutex2(rf_sparet_wait_mutex);
165 static rf_declare_cond2(rf_sparet_wait_cv);
166 static rf_declare_cond2(rf_sparet_resp_cv);
167
168 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
169 * spare table */
170 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
171 * installation process */
172 #endif
173
174 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
175
176 /* prototypes */
177 static void KernelWakeupFunc(struct buf *);
178 static void InitBP(struct buf *, struct vnode *, unsigned,
179 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
180 void *, int, struct proc *);
181 struct raid_softc;
182 static void raidinit(struct raid_softc *);
183
184 static int raid_match(device_t, cfdata_t, void *);
185 static void raid_attach(device_t, device_t, void *);
186 static int raid_detach(device_t, int);
187
188 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
189 daddr_t, daddr_t);
190 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
191 daddr_t, daddr_t, int);
192
193 static int raidwrite_component_label(unsigned,
194 dev_t, struct vnode *, RF_ComponentLabel_t *);
195 static int raidread_component_label(unsigned,
196 dev_t, struct vnode *, RF_ComponentLabel_t *);
197
198
199 static dev_type_open(raidopen);
200 static dev_type_close(raidclose);
201 static dev_type_read(raidread);
202 static dev_type_write(raidwrite);
203 static dev_type_ioctl(raidioctl);
204 static dev_type_strategy(raidstrategy);
205 static dev_type_dump(raiddump);
206 static dev_type_size(raidsize);
207
208 const struct bdevsw raid_bdevsw = {
209 .d_open = raidopen,
210 .d_close = raidclose,
211 .d_strategy = raidstrategy,
212 .d_ioctl = raidioctl,
213 .d_dump = raiddump,
214 .d_psize = raidsize,
215 .d_discard = nodiscard,
216 .d_flag = D_DISK
217 };
218
219 const struct cdevsw raid_cdevsw = {
220 .d_open = raidopen,
221 .d_close = raidclose,
222 .d_read = raidread,
223 .d_write = raidwrite,
224 .d_ioctl = raidioctl,
225 .d_stop = nostop,
226 .d_tty = notty,
227 .d_poll = nopoll,
228 .d_mmap = nommap,
229 .d_kqfilter = nokqfilter,
230 .d_discard = nodiscard,
231 .d_flag = D_DISK
232 };
233
234 static struct dkdriver rf_dkdriver = {
235 .d_strategy = raidstrategy,
236 .d_minphys = minphys
237 };
238
239 struct raid_softc {
240 device_t sc_dev;
241 int sc_unit;
242 int sc_flags; /* flags */
243 int sc_cflags; /* configuration flags */
244 uint64_t sc_size; /* size of the raid device */
245 char sc_xname[20]; /* XXX external name */
246 struct disk sc_dkdev; /* generic disk device info */
247 struct bufq_state *buf_queue; /* used for the device queue */
248 RF_Raid_t sc_r;
249 LIST_ENTRY(raid_softc) sc_link;
250 };
251 /* sc_flags */
252 #define RAIDF_INITED 0x01 /* unit has been initialized */
253 #define RAIDF_WLABEL 0x02 /* label area is writable */
254 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
255 #define RAIDF_SHUTDOWN 0x08 /* unit is being shutdown */
256 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
257 #define RAIDF_LOCKED 0x80 /* unit is locked */
258
259 #define raidunit(x) DISKUNIT(x)
260
261 extern struct cfdriver raid_cd;
262 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
263 raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
264 DVF_DETACH_SHUTDOWN);
265
266 /*
267 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
268 * Be aware that large numbers can allow the driver to consume a lot of
269 * kernel memory, especially on writes, and in degraded mode reads.
270 *
271 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
272 * a single 64K write will typically require 64K for the old data,
273 * 64K for the old parity, and 64K for the new parity, for a total
274 * of 192K (if the parity buffer is not re-used immediately).
275 * Even it if is used immediately, that's still 128K, which when multiplied
276 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
277 *
278 * Now in degraded mode, for example, a 64K read on the above setup may
279 * require data reconstruction, which will require *all* of the 4 remaining
280 * disks to participate -- 4 * 32K/disk == 128K again.
281 */
282
283 #ifndef RAIDOUTSTANDING
284 #define RAIDOUTSTANDING 6
285 #endif
286
287 #define RAIDLABELDEV(dev) \
288 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
289
290 /* declared here, and made public, for the benefit of KVM stuff.. */
291
292 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
293 struct disklabel *);
294 static void raidgetdisklabel(dev_t);
295 static void raidmakedisklabel(struct raid_softc *);
296
297 static int raidlock(struct raid_softc *);
298 static void raidunlock(struct raid_softc *);
299
300 static int raid_detach_unlocked(struct raid_softc *);
301
302 static void rf_markalldirty(RF_Raid_t *);
303 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
304
305 void rf_ReconThread(struct rf_recon_req *);
306 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
307 void rf_CopybackThread(RF_Raid_t *raidPtr);
308 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
309 int rf_autoconfig(device_t);
310 void rf_buildroothack(RF_ConfigSet_t *);
311
312 RF_AutoConfig_t *rf_find_raid_components(void);
313 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
314 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
315 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
316 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
317 int rf_set_autoconfig(RF_Raid_t *, int);
318 int rf_set_rootpartition(RF_Raid_t *, int);
319 void rf_release_all_vps(RF_ConfigSet_t *);
320 void rf_cleanup_config_set(RF_ConfigSet_t *);
321 int rf_have_enough_components(RF_ConfigSet_t *);
322 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
323 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
324
325 /*
326 * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
327 * Note that this is overridden by having RAID_AUTOCONFIG as an option
328 * in the kernel config file.
329 */
330 #ifdef RAID_AUTOCONFIG
331 int raidautoconfig = 1;
332 #else
333 int raidautoconfig = 0;
334 #endif
335 static bool raidautoconfigdone = false;
336
337 struct RF_Pools_s rf_pools;
338
339 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
340 static kmutex_t raid_lock;
341
342 static struct raid_softc *
343 raidcreate(int unit) {
344 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
345 if (sc == NULL) {
346 #ifdef DIAGNOSTIC
347 printf("%s: out of memory\n", __func__);
348 #endif
349 return NULL;
350 }
351 sc->sc_unit = unit;
352 bufq_alloc(&sc->buf_queue, "fcfs", BUFQ_SORT_RAWBLOCK);
353 return sc;
354 }
355
356 static void
357 raiddestroy(struct raid_softc *sc) {
358 bufq_free(sc->buf_queue);
359 kmem_free(sc, sizeof(*sc));
360 }
361
362 static struct raid_softc *
363 raidget(int unit) {
364 struct raid_softc *sc;
365 if (unit < 0) {
366 #ifdef DIAGNOSTIC
367 panic("%s: unit %d!", __func__, unit);
368 #endif
369 return NULL;
370 }
371 mutex_enter(&raid_lock);
372 LIST_FOREACH(sc, &raids, sc_link) {
373 if (sc->sc_unit == unit) {
374 mutex_exit(&raid_lock);
375 return sc;
376 }
377 }
378 mutex_exit(&raid_lock);
379 if ((sc = raidcreate(unit)) == NULL)
380 return NULL;
381 mutex_enter(&raid_lock);
382 LIST_INSERT_HEAD(&raids, sc, sc_link);
383 mutex_exit(&raid_lock);
384 return sc;
385 }
386
387 static void
388 raidput(struct raid_softc *sc) {
389 mutex_enter(&raid_lock);
390 LIST_REMOVE(sc, sc_link);
391 mutex_exit(&raid_lock);
392 raiddestroy(sc);
393 }
394
395 void
396 raidattach(int num)
397 {
398 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
399 /* This is where all the initialization stuff gets done. */
400
401 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
402 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
403 rf_init_cond2(rf_sparet_wait_cv, "sparetw");
404 rf_init_cond2(rf_sparet_resp_cv, "rfgst");
405
406 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
407 #endif
408
409 if (rf_BootRaidframe() == 0)
410 aprint_verbose("Kernelized RAIDframe activated\n");
411 else
412 panic("Serious error booting RAID!!");
413
414 if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) {
415 aprint_error("raidattach: config_cfattach_attach failed?\n");
416 }
417
418 raidautoconfigdone = false;
419
420 /*
421 * Register a finalizer which will be used to auto-config RAID
422 * sets once all real hardware devices have been found.
423 */
424 if (config_finalize_register(NULL, rf_autoconfig) != 0)
425 aprint_error("WARNING: unable to register RAIDframe finalizer\n");
426 }
427
428 int
429 rf_autoconfig(device_t self)
430 {
431 RF_AutoConfig_t *ac_list;
432 RF_ConfigSet_t *config_sets;
433
434 if (!raidautoconfig || raidautoconfigdone == true)
435 return (0);
436
437 /* XXX This code can only be run once. */
438 raidautoconfigdone = true;
439
440 #ifdef __HAVE_CPU_BOOTCONF
441 /*
442 * 0. find the boot device if needed first so we can use it later
443 * this needs to be done before we autoconfigure any raid sets,
444 * because if we use wedges we are not going to be able to open
445 * the boot device later
446 */
447 if (booted_device == NULL)
448 cpu_bootconf();
449 #endif
450 /* 1. locate all RAID components on the system */
451 aprint_debug("Searching for RAID components...\n");
452 ac_list = rf_find_raid_components();
453
454 /* 2. Sort them into their respective sets. */
455 config_sets = rf_create_auto_sets(ac_list);
456
457 /*
458 * 3. Evaluate each set and configure the valid ones.
459 * This gets done in rf_buildroothack().
460 */
461 rf_buildroothack(config_sets);
462
463 return 1;
464 }
465
466 static int
467 rf_containsboot(RF_Raid_t *r, device_t bdv) {
468 const char *bootname = device_xname(bdv);
469 size_t len = strlen(bootname);
470
471 for (int col = 0; col < r->numCol; col++) {
472 const char *devname = r->Disks[col].devname;
473 devname += sizeof("/dev/") - 1;
474 if (strncmp(devname, "dk", 2) == 0) {
475 const char *parent =
476 dkwedge_get_parent_name(r->Disks[col].dev);
477 if (parent != NULL)
478 devname = parent;
479 }
480 if (strncmp(devname, bootname, len) == 0) {
481 struct raid_softc *sc = r->softc;
482 aprint_debug("raid%d includes boot device %s\n",
483 sc->sc_unit, devname);
484 return 1;
485 }
486 }
487 return 0;
488 }
489
490 void
491 rf_buildroothack(RF_ConfigSet_t *config_sets)
492 {
493 RF_ConfigSet_t *cset;
494 RF_ConfigSet_t *next_cset;
495 int num_root;
496 struct raid_softc *sc, *rsc;
497
498 sc = rsc = NULL;
499 num_root = 0;
500 cset = config_sets;
501 while (cset != NULL) {
502 next_cset = cset->next;
503 if (rf_have_enough_components(cset) &&
504 cset->ac->clabel->autoconfigure == 1) {
505 sc = rf_auto_config_set(cset);
506 if (sc != NULL) {
507 aprint_debug("raid%d: configured ok\n",
508 sc->sc_unit);
509 if (cset->rootable) {
510 rsc = sc;
511 num_root++;
512 }
513 } else {
514 /* The autoconfig didn't work :( */
515 aprint_debug("Autoconfig failed\n");
516 rf_release_all_vps(cset);
517 }
518 } else {
519 /* we're not autoconfiguring this set...
520 release the associated resources */
521 rf_release_all_vps(cset);
522 }
523 /* cleanup */
524 rf_cleanup_config_set(cset);
525 cset = next_cset;
526 }
527
528 /* if the user has specified what the root device should be
529 then we don't touch booted_device or boothowto... */
530
531 if (rootspec != NULL)
532 return;
533
534 /* we found something bootable... */
535
536 /*
537 * XXX: The following code assumes that the root raid
538 * is the first ('a') partition. This is about the best
539 * we can do with a BSD disklabel, but we might be able
540 * to do better with a GPT label, by setting a specified
541 * attribute to indicate the root partition. We can then
542 * stash the partition number in the r->root_partition
543 * high bits (the bottom 2 bits are already used). For
544 * now we just set booted_partition to 0 when we override
545 * root.
546 */
547 if (num_root == 1) {
548 device_t candidate_root;
549 if (rsc->sc_dkdev.dk_nwedges != 0) {
550 char cname[sizeof(cset->ac->devname)];
551 /* XXX: assume 'a' */
552 snprintf(cname, sizeof(cname), "%s%c",
553 device_xname(rsc->sc_dev), 'a');
554 candidate_root = dkwedge_find_by_wname(cname);
555 } else
556 candidate_root = rsc->sc_dev;
557 if (booted_device == NULL ||
558 rsc->sc_r.root_partition == 1 ||
559 rf_containsboot(&rsc->sc_r, booted_device)) {
560 booted_device = candidate_root;
561 booted_partition = 0; /* XXX assume 'a' */
562 }
563 } else if (num_root > 1) {
564
565 /*
566 * Maybe the MD code can help. If it cannot, then
567 * setroot() will discover that we have no
568 * booted_device and will ask the user if nothing was
569 * hardwired in the kernel config file
570 */
571 if (booted_device == NULL)
572 return;
573
574 num_root = 0;
575 mutex_enter(&raid_lock);
576 LIST_FOREACH(sc, &raids, sc_link) {
577 RF_Raid_t *r = &sc->sc_r;
578 if (r->valid == 0)
579 continue;
580
581 if (r->root_partition == 0)
582 continue;
583
584 if (rf_containsboot(r, booted_device)) {
585 num_root++;
586 rsc = sc;
587 }
588 }
589 mutex_exit(&raid_lock);
590
591 if (num_root == 1) {
592 booted_device = rsc->sc_dev;
593 booted_partition = 0; /* XXX assume 'a' */
594 } else {
595 /* we can't guess.. require the user to answer... */
596 boothowto |= RB_ASKNAME;
597 }
598 }
599 }
600
601 static int
602 raidsize(dev_t dev)
603 {
604 struct raid_softc *rs;
605 struct disklabel *lp;
606 int part, unit, omask, size;
607
608 unit = raidunit(dev);
609 if ((rs = raidget(unit)) == NULL)
610 return -1;
611 if ((rs->sc_flags & RAIDF_INITED) == 0)
612 return (-1);
613
614 part = DISKPART(dev);
615 omask = rs->sc_dkdev.dk_openmask & (1 << part);
616 lp = rs->sc_dkdev.dk_label;
617
618 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
619 return (-1);
620
621 if (lp->d_partitions[part].p_fstype != FS_SWAP)
622 size = -1;
623 else
624 size = lp->d_partitions[part].p_size *
625 (lp->d_secsize / DEV_BSIZE);
626
627 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
628 return (-1);
629
630 return (size);
631
632 }
633
634 static int
635 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
636 {
637 int unit = raidunit(dev);
638 struct raid_softc *rs;
639 const struct bdevsw *bdev;
640 struct disklabel *lp;
641 RF_Raid_t *raidPtr;
642 daddr_t offset;
643 int part, c, sparecol, j, scol, dumpto;
644 int error = 0;
645
646 if ((rs = raidget(unit)) == NULL)
647 return ENXIO;
648
649 raidPtr = &rs->sc_r;
650
651 if ((rs->sc_flags & RAIDF_INITED) == 0)
652 return ENXIO;
653
654 /* we only support dumping to RAID 1 sets */
655 if (raidPtr->Layout.numDataCol != 1 ||
656 raidPtr->Layout.numParityCol != 1)
657 return EINVAL;
658
659
660 if ((error = raidlock(rs)) != 0)
661 return error;
662
663 if (size % DEV_BSIZE != 0) {
664 error = EINVAL;
665 goto out;
666 }
667
668 if (blkno + size / DEV_BSIZE > rs->sc_size) {
669 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
670 "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
671 size / DEV_BSIZE, rs->sc_size);
672 error = EINVAL;
673 goto out;
674 }
675
676 part = DISKPART(dev);
677 lp = rs->sc_dkdev.dk_label;
678 offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
679
680 /* figure out what device is alive.. */
681
682 /*
683 Look for a component to dump to. The preference for the
684 component to dump to is as follows:
685 1) the master
686 2) a used_spare of the master
687 3) the slave
688 4) a used_spare of the slave
689 */
690
691 dumpto = -1;
692 for (c = 0; c < raidPtr->numCol; c++) {
693 if (raidPtr->Disks[c].status == rf_ds_optimal) {
694 /* this might be the one */
695 dumpto = c;
696 break;
697 }
698 }
699
700 /*
701 At this point we have possibly selected a live master or a
702 live slave. We now check to see if there is a spared
703 master (or a spared slave), if we didn't find a live master
704 or a live slave.
705 */
706
707 for (c = 0; c < raidPtr->numSpare; c++) {
708 sparecol = raidPtr->numCol + c;
709 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
710 /* How about this one? */
711 scol = -1;
712 for(j=0;j<raidPtr->numCol;j++) {
713 if (raidPtr->Disks[j].spareCol == sparecol) {
714 scol = j;
715 break;
716 }
717 }
718 if (scol == 0) {
719 /*
720 We must have found a spared master!
721 We'll take that over anything else
722 found so far. (We couldn't have
723 found a real master before, since
724 this is a used spare, and it's
725 saying that it's replacing the
726 master.) On reboot (with
727 autoconfiguration turned on)
728 sparecol will become the 1st
729 component (component0) of this set.
730 */
731 dumpto = sparecol;
732 break;
733 } else if (scol != -1) {
734 /*
735 Must be a spared slave. We'll dump
736 to that if we havn't found anything
737 else so far.
738 */
739 if (dumpto == -1)
740 dumpto = sparecol;
741 }
742 }
743 }
744
745 if (dumpto == -1) {
746 /* we couldn't find any live components to dump to!?!?
747 */
748 error = EINVAL;
749 goto out;
750 }
751
752 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
753
754 /*
755 Note that blkno is relative to this particular partition.
756 By adding the offset of this partition in the RAID
757 set, and also adding RF_PROTECTED_SECTORS, we get a
758 value that is relative to the partition used for the
759 underlying component.
760 */
761
762 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
763 blkno + offset, va, size);
764
765 out:
766 raidunlock(rs);
767
768 return error;
769 }
770
771 /* ARGSUSED */
772 static int
773 raidopen(dev_t dev, int flags, int fmt,
774 struct lwp *l)
775 {
776 int unit = raidunit(dev);
777 struct raid_softc *rs;
778 struct disklabel *lp;
779 int part, pmask;
780 int error = 0;
781
782 if ((rs = raidget(unit)) == NULL)
783 return ENXIO;
784 if ((error = raidlock(rs)) != 0)
785 return (error);
786
787 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
788 error = EBUSY;
789 goto bad;
790 }
791
792 lp = rs->sc_dkdev.dk_label;
793
794 part = DISKPART(dev);
795
796 /*
797 * If there are wedges, and this is not RAW_PART, then we
798 * need to fail.
799 */
800 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
801 error = EBUSY;
802 goto bad;
803 }
804 pmask = (1 << part);
805
806 if ((rs->sc_flags & RAIDF_INITED) &&
807 (rs->sc_dkdev.dk_nwedges == 0) &&
808 (rs->sc_dkdev.dk_openmask == 0))
809 raidgetdisklabel(dev);
810
811 /* make sure that this partition exists */
812
813 if (part != RAW_PART) {
814 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
815 ((part >= lp->d_npartitions) ||
816 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
817 error = ENXIO;
818 goto bad;
819 }
820 }
821 /* Prevent this unit from being unconfigured while open. */
822 switch (fmt) {
823 case S_IFCHR:
824 rs->sc_dkdev.dk_copenmask |= pmask;
825 break;
826
827 case S_IFBLK:
828 rs->sc_dkdev.dk_bopenmask |= pmask;
829 break;
830 }
831
832 if ((rs->sc_dkdev.dk_openmask == 0) &&
833 ((rs->sc_flags & RAIDF_INITED) != 0)) {
834 /* First one... mark things as dirty... Note that we *MUST*
835 have done a configure before this. I DO NOT WANT TO BE
836 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
837 THAT THEY BELONG TOGETHER!!!!! */
838 /* XXX should check to see if we're only open for reading
839 here... If so, we needn't do this, but then need some
840 other way of keeping track of what's happened.. */
841
842 rf_markalldirty(&rs->sc_r);
843 }
844
845
846 rs->sc_dkdev.dk_openmask =
847 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
848
849 bad:
850 raidunlock(rs);
851
852 return (error);
853
854
855 }
856
857 /* ARGSUSED */
858 static int
859 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
860 {
861 int unit = raidunit(dev);
862 struct raid_softc *rs;
863 int error = 0;
864 int part;
865
866 if ((rs = raidget(unit)) == NULL)
867 return ENXIO;
868
869 if ((error = raidlock(rs)) != 0)
870 return (error);
871
872 part = DISKPART(dev);
873
874 /* ...that much closer to allowing unconfiguration... */
875 switch (fmt) {
876 case S_IFCHR:
877 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
878 break;
879
880 case S_IFBLK:
881 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
882 break;
883 }
884 rs->sc_dkdev.dk_openmask =
885 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
886
887 if ((rs->sc_dkdev.dk_openmask == 0) &&
888 ((rs->sc_flags & RAIDF_INITED) != 0)) {
889 /* Last one... device is not unconfigured yet.
890 Device shutdown has taken care of setting the
891 clean bits if RAIDF_INITED is not set
892 mark things as clean... */
893
894 rf_update_component_labels(&rs->sc_r,
895 RF_FINAL_COMPONENT_UPDATE);
896
897 /* If the kernel is shutting down, it will detach
898 * this RAID set soon enough.
899 */
900 }
901
902 raidunlock(rs);
903 return (0);
904
905 }
906
907 static void
908 raidstrategy(struct buf *bp)
909 {
910 unsigned int unit = raidunit(bp->b_dev);
911 RF_Raid_t *raidPtr;
912 int wlabel;
913 struct raid_softc *rs;
914
915 if ((rs = raidget(unit)) == NULL) {
916 bp->b_error = ENXIO;
917 goto done;
918 }
919 if ((rs->sc_flags & RAIDF_INITED) == 0) {
920 bp->b_error = ENXIO;
921 goto done;
922 }
923 raidPtr = &rs->sc_r;
924 if (!raidPtr->valid) {
925 bp->b_error = ENODEV;
926 goto done;
927 }
928 if (bp->b_bcount == 0) {
929 db1_printf(("b_bcount is zero..\n"));
930 goto done;
931 }
932
933 /*
934 * Do bounds checking and adjust transfer. If there's an
935 * error, the bounds check will flag that for us.
936 */
937
938 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
939 if (DISKPART(bp->b_dev) == RAW_PART) {
940 uint64_t size; /* device size in DEV_BSIZE unit */
941
942 if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
943 size = raidPtr->totalSectors <<
944 (raidPtr->logBytesPerSector - DEV_BSHIFT);
945 } else {
946 size = raidPtr->totalSectors >>
947 (DEV_BSHIFT - raidPtr->logBytesPerSector);
948 }
949 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
950 goto done;
951 }
952 } else {
953 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
954 db1_printf(("Bounds check failed!!:%d %d\n",
955 (int) bp->b_blkno, (int) wlabel));
956 goto done;
957 }
958 }
959
960 rf_lock_mutex2(raidPtr->iodone_lock);
961
962 bp->b_resid = 0;
963
964 /* stuff it onto our queue */
965 bufq_put(rs->buf_queue, bp);
966
967 /* scheduled the IO to happen at the next convenient time */
968 rf_signal_cond2(raidPtr->iodone_cv);
969 rf_unlock_mutex2(raidPtr->iodone_lock);
970
971 return;
972
973 done:
974 bp->b_resid = bp->b_bcount;
975 biodone(bp);
976 }
977
978 /* ARGSUSED */
979 static int
980 raidread(dev_t dev, struct uio *uio, int flags)
981 {
982 int unit = raidunit(dev);
983 struct raid_softc *rs;
984
985 if ((rs = raidget(unit)) == NULL)
986 return ENXIO;
987
988 if ((rs->sc_flags & RAIDF_INITED) == 0)
989 return (ENXIO);
990
991 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
992
993 }
994
995 /* ARGSUSED */
996 static int
997 raidwrite(dev_t dev, struct uio *uio, int flags)
998 {
999 int unit = raidunit(dev);
1000 struct raid_softc *rs;
1001
1002 if ((rs = raidget(unit)) == NULL)
1003 return ENXIO;
1004
1005 if ((rs->sc_flags & RAIDF_INITED) == 0)
1006 return (ENXIO);
1007
1008 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
1009
1010 }
1011
1012 static int
1013 raid_detach_unlocked(struct raid_softc *rs)
1014 {
1015 int error;
1016 RF_Raid_t *raidPtr;
1017
1018 raidPtr = &rs->sc_r;
1019
1020 /*
1021 * If somebody has a partition mounted, we shouldn't
1022 * shutdown.
1023 */
1024 if (rs->sc_dkdev.dk_openmask != 0)
1025 return EBUSY;
1026
1027 if ((rs->sc_flags & RAIDF_INITED) == 0)
1028 ; /* not initialized: nothing to do */
1029 else if ((error = rf_Shutdown(raidPtr)) != 0)
1030 return error;
1031 else
1032 rs->sc_flags &= ~(RAIDF_INITED|RAIDF_SHUTDOWN);
1033
1034 /* Detach the disk. */
1035 dkwedge_delall(&rs->sc_dkdev);
1036 disk_detach(&rs->sc_dkdev);
1037 disk_destroy(&rs->sc_dkdev);
1038
1039 aprint_normal_dev(rs->sc_dev, "detached\n");
1040
1041 return 0;
1042 }
1043
1044 static int
1045 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1046 {
1047 int unit = raidunit(dev);
1048 int error = 0;
1049 int part, pmask, s;
1050 cfdata_t cf;
1051 struct raid_softc *rs;
1052 RF_Config_t *k_cfg, *u_cfg;
1053 RF_Raid_t *raidPtr;
1054 RF_RaidDisk_t *diskPtr;
1055 RF_AccTotals_t *totals;
1056 RF_DeviceConfig_t *d_cfg, **ucfgp;
1057 u_char *specific_buf;
1058 int retcode = 0;
1059 int column;
1060 /* int raidid; */
1061 struct rf_recon_req *rrcopy, *rr;
1062 RF_ComponentLabel_t *clabel;
1063 RF_ComponentLabel_t *ci_label;
1064 RF_ComponentLabel_t **clabel_ptr;
1065 RF_SingleComponent_t *sparePtr,*componentPtr;
1066 RF_SingleComponent_t component;
1067 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
1068 int i, j, d;
1069 #ifdef __HAVE_OLD_DISKLABEL
1070 struct disklabel newlabel;
1071 #endif
1072
1073 if ((rs = raidget(unit)) == NULL)
1074 return ENXIO;
1075 raidPtr = &rs->sc_r;
1076
1077 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1078 (int) DISKPART(dev), (int) unit, cmd));
1079
1080 /* Must be open for writes for these commands... */
1081 switch (cmd) {
1082 #ifdef DIOCGSECTORSIZE
1083 case DIOCGSECTORSIZE:
1084 *(u_int *)data = raidPtr->bytesPerSector;
1085 return 0;
1086 case DIOCGMEDIASIZE:
1087 *(off_t *)data =
1088 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
1089 return 0;
1090 #endif
1091 case DIOCSDINFO:
1092 case DIOCWDINFO:
1093 #ifdef __HAVE_OLD_DISKLABEL
1094 case ODIOCWDINFO:
1095 case ODIOCSDINFO:
1096 #endif
1097 case DIOCWLABEL:
1098 case DIOCAWEDGE:
1099 case DIOCDWEDGE:
1100 case DIOCMWEDGES:
1101 case DIOCSSTRATEGY:
1102 if ((flag & FWRITE) == 0)
1103 return (EBADF);
1104 }
1105
1106 /* Must be initialized for these... */
1107 switch (cmd) {
1108 case DIOCGDINFO:
1109 case DIOCSDINFO:
1110 case DIOCWDINFO:
1111 #ifdef __HAVE_OLD_DISKLABEL
1112 case ODIOCGDINFO:
1113 case ODIOCWDINFO:
1114 case ODIOCSDINFO:
1115 case ODIOCGDEFLABEL:
1116 #endif
1117 case DIOCGPARTINFO:
1118 case DIOCWLABEL:
1119 case DIOCGDEFLABEL:
1120 case DIOCAWEDGE:
1121 case DIOCDWEDGE:
1122 case DIOCLWEDGES:
1123 case DIOCMWEDGES:
1124 case DIOCCACHESYNC:
1125 case RAIDFRAME_SHUTDOWN:
1126 case RAIDFRAME_REWRITEPARITY:
1127 case RAIDFRAME_GET_INFO:
1128 case RAIDFRAME_RESET_ACCTOTALS:
1129 case RAIDFRAME_GET_ACCTOTALS:
1130 case RAIDFRAME_KEEP_ACCTOTALS:
1131 case RAIDFRAME_GET_SIZE:
1132 case RAIDFRAME_FAIL_DISK:
1133 case RAIDFRAME_COPYBACK:
1134 case RAIDFRAME_CHECK_RECON_STATUS:
1135 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1136 case RAIDFRAME_GET_COMPONENT_LABEL:
1137 case RAIDFRAME_SET_COMPONENT_LABEL:
1138 case RAIDFRAME_ADD_HOT_SPARE:
1139 case RAIDFRAME_REMOVE_HOT_SPARE:
1140 case RAIDFRAME_INIT_LABELS:
1141 case RAIDFRAME_REBUILD_IN_PLACE:
1142 case RAIDFRAME_CHECK_PARITY:
1143 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1144 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1145 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1146 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1147 case RAIDFRAME_SET_AUTOCONFIG:
1148 case RAIDFRAME_SET_ROOT:
1149 case RAIDFRAME_DELETE_COMPONENT:
1150 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1151 case RAIDFRAME_PARITYMAP_STATUS:
1152 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1153 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1154 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1155 case DIOCGSTRATEGY:
1156 case DIOCSSTRATEGY:
1157 if ((rs->sc_flags & RAIDF_INITED) == 0)
1158 return (ENXIO);
1159 }
1160
1161 switch (cmd) {
1162 #ifdef COMPAT_50
1163 case RAIDFRAME_GET_INFO50:
1164 return rf_get_info50(raidPtr, data);
1165
1166 case RAIDFRAME_CONFIGURE50:
1167 if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0)
1168 return retcode;
1169 goto config;
1170 #endif
1171 /* configure the system */
1172 case RAIDFRAME_CONFIGURE:
1173
1174 if (raidPtr->valid) {
1175 /* There is a valid RAID set running on this unit! */
1176 printf("raid%d: Device already configured!\n",unit);
1177 return(EINVAL);
1178 }
1179
1180 /* copy-in the configuration information */
1181 /* data points to a pointer to the configuration structure */
1182
1183 u_cfg = *((RF_Config_t **) data);
1184 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1185 if (k_cfg == NULL) {
1186 return (ENOMEM);
1187 }
1188 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1189 if (retcode) {
1190 RF_Free(k_cfg, sizeof(RF_Config_t));
1191 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1192 retcode));
1193 return (retcode);
1194 }
1195 goto config;
1196 config:
1197 /* allocate a buffer for the layout-specific data, and copy it
1198 * in */
1199 if (k_cfg->layoutSpecificSize) {
1200 if (k_cfg->layoutSpecificSize > 10000) {
1201 /* sanity check */
1202 RF_Free(k_cfg, sizeof(RF_Config_t));
1203 return (EINVAL);
1204 }
1205 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1206 (u_char *));
1207 if (specific_buf == NULL) {
1208 RF_Free(k_cfg, sizeof(RF_Config_t));
1209 return (ENOMEM);
1210 }
1211 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1212 k_cfg->layoutSpecificSize);
1213 if (retcode) {
1214 RF_Free(k_cfg, sizeof(RF_Config_t));
1215 RF_Free(specific_buf,
1216 k_cfg->layoutSpecificSize);
1217 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1218 retcode));
1219 return (retcode);
1220 }
1221 } else
1222 specific_buf = NULL;
1223 k_cfg->layoutSpecific = specific_buf;
1224
1225 /* should do some kind of sanity check on the configuration.
1226 * Store the sum of all the bytes in the last byte? */
1227
1228 /* configure the system */
1229
1230 /*
1231 * Clear the entire RAID descriptor, just to make sure
1232 * there is no stale data left in the case of a
1233 * reconfiguration
1234 */
1235 memset(raidPtr, 0, sizeof(*raidPtr));
1236 raidPtr->softc = rs;
1237 raidPtr->raidid = unit;
1238
1239 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1240
1241 if (retcode == 0) {
1242
1243 /* allow this many simultaneous IO's to
1244 this RAID device */
1245 raidPtr->openings = RAIDOUTSTANDING;
1246
1247 raidinit(rs);
1248 rf_markalldirty(raidPtr);
1249 }
1250 /* free the buffers. No return code here. */
1251 if (k_cfg->layoutSpecificSize) {
1252 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1253 }
1254 RF_Free(k_cfg, sizeof(RF_Config_t));
1255
1256 return (retcode);
1257
1258 /* shutdown the system */
1259 case RAIDFRAME_SHUTDOWN:
1260
1261 part = DISKPART(dev);
1262 pmask = (1 << part);
1263
1264 if ((error = raidlock(rs)) != 0)
1265 return (error);
1266
1267 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
1268 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1269 (rs->sc_dkdev.dk_copenmask & pmask)))
1270 retcode = EBUSY;
1271 else {
1272 rs->sc_flags |= RAIDF_SHUTDOWN;
1273 rs->sc_dkdev.dk_copenmask &= ~pmask;
1274 rs->sc_dkdev.dk_bopenmask &= ~pmask;
1275 rs->sc_dkdev.dk_openmask &= ~pmask;
1276 retcode = 0;
1277 }
1278
1279 raidunlock(rs);
1280
1281 if (retcode != 0)
1282 return retcode;
1283
1284 /* free the pseudo device attach bits */
1285
1286 cf = device_cfdata(rs->sc_dev);
1287 if ((retcode = config_detach(rs->sc_dev, DETACH_QUIET)) == 0)
1288 free(cf, M_RAIDFRAME);
1289
1290 return (retcode);
1291 case RAIDFRAME_GET_COMPONENT_LABEL:
1292 clabel_ptr = (RF_ComponentLabel_t **) data;
1293 /* need to read the component label for the disk indicated
1294 by row,column in clabel */
1295
1296 /*
1297 * Perhaps there should be an option to skip the in-core
1298 * copy and hit the disk, as with disklabel(8).
1299 */
1300 RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *));
1301
1302 retcode = copyin(*clabel_ptr, clabel, sizeof(*clabel));
1303
1304 if (retcode) {
1305 RF_Free(clabel, sizeof(*clabel));
1306 return retcode;
1307 }
1308
1309 clabel->row = 0; /* Don't allow looking at anything else.*/
1310
1311 column = clabel->column;
1312
1313 if ((column < 0) || (column >= raidPtr->numCol +
1314 raidPtr->numSpare)) {
1315 RF_Free(clabel, sizeof(*clabel));
1316 return EINVAL;
1317 }
1318
1319 RF_Free(clabel, sizeof(*clabel));
1320
1321 clabel = raidget_component_label(raidPtr, column);
1322
1323 return copyout(clabel, *clabel_ptr, sizeof(**clabel_ptr));
1324
1325 #if 0
1326 case RAIDFRAME_SET_COMPONENT_LABEL:
1327 clabel = (RF_ComponentLabel_t *) data;
1328
1329 /* XXX check the label for valid stuff... */
1330 /* Note that some things *should not* get modified --
1331 the user should be re-initing the labels instead of
1332 trying to patch things.
1333 */
1334
1335 raidid = raidPtr->raidid;
1336 #ifdef DEBUG
1337 printf("raid%d: Got component label:\n", raidid);
1338 printf("raid%d: Version: %d\n", raidid, clabel->version);
1339 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1340 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1341 printf("raid%d: Column: %d\n", raidid, clabel->column);
1342 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1343 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1344 printf("raid%d: Status: %d\n", raidid, clabel->status);
1345 #endif
1346 clabel->row = 0;
1347 column = clabel->column;
1348
1349 if ((column < 0) || (column >= raidPtr->numCol)) {
1350 return(EINVAL);
1351 }
1352
1353 /* XXX this isn't allowed to do anything for now :-) */
1354
1355 /* XXX and before it is, we need to fill in the rest
1356 of the fields!?!?!?! */
1357 memcpy(raidget_component_label(raidPtr, column),
1358 clabel, sizeof(*clabel));
1359 raidflush_component_label(raidPtr, column);
1360 return (0);
1361 #endif
1362
1363 case RAIDFRAME_INIT_LABELS:
1364 clabel = (RF_ComponentLabel_t *) data;
1365 /*
1366 we only want the serial number from
1367 the above. We get all the rest of the information
1368 from the config that was used to create this RAID
1369 set.
1370 */
1371
1372 raidPtr->serial_number = clabel->serial_number;
1373
1374 for(column=0;column<raidPtr->numCol;column++) {
1375 diskPtr = &raidPtr->Disks[column];
1376 if (!RF_DEAD_DISK(diskPtr->status)) {
1377 ci_label = raidget_component_label(raidPtr,
1378 column);
1379 /* Zeroing this is important. */
1380 memset(ci_label, 0, sizeof(*ci_label));
1381 raid_init_component_label(raidPtr, ci_label);
1382 ci_label->serial_number =
1383 raidPtr->serial_number;
1384 ci_label->row = 0; /* we dont' pretend to support more */
1385 rf_component_label_set_partitionsize(ci_label,
1386 diskPtr->partitionSize);
1387 ci_label->column = column;
1388 raidflush_component_label(raidPtr, column);
1389 }
1390 /* XXXjld what about the spares? */
1391 }
1392
1393 return (retcode);
1394 case RAIDFRAME_SET_AUTOCONFIG:
1395 d = rf_set_autoconfig(raidPtr, *(int *) data);
1396 printf("raid%d: New autoconfig value is: %d\n",
1397 raidPtr->raidid, d);
1398 *(int *) data = d;
1399 return (retcode);
1400
1401 case RAIDFRAME_SET_ROOT:
1402 d = rf_set_rootpartition(raidPtr, *(int *) data);
1403 printf("raid%d: New rootpartition value is: %d\n",
1404 raidPtr->raidid, d);
1405 *(int *) data = d;
1406 return (retcode);
1407
1408 /* initialize all parity */
1409 case RAIDFRAME_REWRITEPARITY:
1410
1411 if (raidPtr->Layout.map->faultsTolerated == 0) {
1412 /* Parity for RAID 0 is trivially correct */
1413 raidPtr->parity_good = RF_RAID_CLEAN;
1414 return(0);
1415 }
1416
1417 if (raidPtr->parity_rewrite_in_progress == 1) {
1418 /* Re-write is already in progress! */
1419 return(EINVAL);
1420 }
1421
1422 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1423 rf_RewriteParityThread,
1424 raidPtr,"raid_parity");
1425 return (retcode);
1426
1427
1428 case RAIDFRAME_ADD_HOT_SPARE:
1429 sparePtr = (RF_SingleComponent_t *) data;
1430 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1431 retcode = rf_add_hot_spare(raidPtr, &component);
1432 return(retcode);
1433
1434 case RAIDFRAME_REMOVE_HOT_SPARE:
1435 return(retcode);
1436
1437 case RAIDFRAME_DELETE_COMPONENT:
1438 componentPtr = (RF_SingleComponent_t *)data;
1439 memcpy( &component, componentPtr,
1440 sizeof(RF_SingleComponent_t));
1441 retcode = rf_delete_component(raidPtr, &component);
1442 return(retcode);
1443
1444 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1445 componentPtr = (RF_SingleComponent_t *)data;
1446 memcpy( &component, componentPtr,
1447 sizeof(RF_SingleComponent_t));
1448 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1449 return(retcode);
1450
1451 case RAIDFRAME_REBUILD_IN_PLACE:
1452
1453 if (raidPtr->Layout.map->faultsTolerated == 0) {
1454 /* Can't do this on a RAID 0!! */
1455 return(EINVAL);
1456 }
1457
1458 if (raidPtr->recon_in_progress == 1) {
1459 /* a reconstruct is already in progress! */
1460 return(EINVAL);
1461 }
1462
1463 componentPtr = (RF_SingleComponent_t *) data;
1464 memcpy( &component, componentPtr,
1465 sizeof(RF_SingleComponent_t));
1466 component.row = 0; /* we don't support any more */
1467 column = component.column;
1468
1469 if ((column < 0) || (column >= raidPtr->numCol)) {
1470 return(EINVAL);
1471 }
1472
1473 rf_lock_mutex2(raidPtr->mutex);
1474 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1475 (raidPtr->numFailures > 0)) {
1476 /* XXX 0 above shouldn't be constant!!! */
1477 /* some component other than this has failed.
1478 Let's not make things worse than they already
1479 are... */
1480 printf("raid%d: Unable to reconstruct to disk at:\n",
1481 raidPtr->raidid);
1482 printf("raid%d: Col: %d Too many failures.\n",
1483 raidPtr->raidid, column);
1484 rf_unlock_mutex2(raidPtr->mutex);
1485 return (EINVAL);
1486 }
1487 if (raidPtr->Disks[column].status ==
1488 rf_ds_reconstructing) {
1489 printf("raid%d: Unable to reconstruct to disk at:\n",
1490 raidPtr->raidid);
1491 printf("raid%d: Col: %d Reconstruction already occurring!\n", raidPtr->raidid, column);
1492
1493 rf_unlock_mutex2(raidPtr->mutex);
1494 return (EINVAL);
1495 }
1496 if (raidPtr->Disks[column].status == rf_ds_spared) {
1497 rf_unlock_mutex2(raidPtr->mutex);
1498 return (EINVAL);
1499 }
1500 rf_unlock_mutex2(raidPtr->mutex);
1501
1502 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1503 if (rrcopy == NULL)
1504 return(ENOMEM);
1505
1506 rrcopy->raidPtr = (void *) raidPtr;
1507 rrcopy->col = column;
1508
1509 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1510 rf_ReconstructInPlaceThread,
1511 rrcopy,"raid_reconip");
1512 return(retcode);
1513
1514 case RAIDFRAME_GET_INFO:
1515 if (!raidPtr->valid)
1516 return (ENODEV);
1517 ucfgp = (RF_DeviceConfig_t **) data;
1518 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1519 (RF_DeviceConfig_t *));
1520 if (d_cfg == NULL)
1521 return (ENOMEM);
1522 d_cfg->rows = 1; /* there is only 1 row now */
1523 d_cfg->cols = raidPtr->numCol;
1524 d_cfg->ndevs = raidPtr->numCol;
1525 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1526 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1527 return (ENOMEM);
1528 }
1529 d_cfg->nspares = raidPtr->numSpare;
1530 if (d_cfg->nspares >= RF_MAX_DISKS) {
1531 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1532 return (ENOMEM);
1533 }
1534 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1535 d = 0;
1536 for (j = 0; j < d_cfg->cols; j++) {
1537 d_cfg->devs[d] = raidPtr->Disks[j];
1538 d++;
1539 }
1540 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1541 d_cfg->spares[i] = raidPtr->Disks[j];
1542 if (d_cfg->spares[i].status == rf_ds_rebuilding_spare) {
1543 /* XXX: raidctl(8) expects to see this as a used spare */
1544 d_cfg->spares[i].status = rf_ds_used_spare;
1545 }
1546 }
1547 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1548 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1549
1550 return (retcode);
1551
1552 case RAIDFRAME_CHECK_PARITY:
1553 *(int *) data = raidPtr->parity_good;
1554 return (0);
1555
1556 case RAIDFRAME_PARITYMAP_STATUS:
1557 if (rf_paritymap_ineligible(raidPtr))
1558 return EINVAL;
1559 rf_paritymap_status(raidPtr->parity_map,
1560 (struct rf_pmstat *)data);
1561 return 0;
1562
1563 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1564 if (rf_paritymap_ineligible(raidPtr))
1565 return EINVAL;
1566 if (raidPtr->parity_map == NULL)
1567 return ENOENT; /* ??? */
1568 if (0 != rf_paritymap_set_params(raidPtr->parity_map,
1569 (struct rf_pmparams *)data, 1))
1570 return EINVAL;
1571 return 0;
1572
1573 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1574 if (rf_paritymap_ineligible(raidPtr))
1575 return EINVAL;
1576 *(int *) data = rf_paritymap_get_disable(raidPtr);
1577 return 0;
1578
1579 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1580 if (rf_paritymap_ineligible(raidPtr))
1581 return EINVAL;
1582 rf_paritymap_set_disable(raidPtr, *(int *)data);
1583 /* XXX should errors be passed up? */
1584 return 0;
1585
1586 case RAIDFRAME_RESET_ACCTOTALS:
1587 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1588 return (0);
1589
1590 case RAIDFRAME_GET_ACCTOTALS:
1591 totals = (RF_AccTotals_t *) data;
1592 *totals = raidPtr->acc_totals;
1593 return (0);
1594
1595 case RAIDFRAME_KEEP_ACCTOTALS:
1596 raidPtr->keep_acc_totals = *(int *)data;
1597 return (0);
1598
1599 case RAIDFRAME_GET_SIZE:
1600 *(int *) data = raidPtr->totalSectors;
1601 return (0);
1602
1603 /* fail a disk & optionally start reconstruction */
1604 case RAIDFRAME_FAIL_DISK:
1605
1606 if (raidPtr->Layout.map->faultsTolerated == 0) {
1607 /* Can't do this on a RAID 0!! */
1608 return(EINVAL);
1609 }
1610
1611 rr = (struct rf_recon_req *) data;
1612 rr->row = 0;
1613 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1614 return (EINVAL);
1615
1616
1617 rf_lock_mutex2(raidPtr->mutex);
1618 if (raidPtr->status == rf_rs_reconstructing) {
1619 /* you can't fail a disk while we're reconstructing! */
1620 /* XXX wrong for RAID6 */
1621 rf_unlock_mutex2(raidPtr->mutex);
1622 return (EINVAL);
1623 }
1624 if ((raidPtr->Disks[rr->col].status ==
1625 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1626 /* some other component has failed. Let's not make
1627 things worse. XXX wrong for RAID6 */
1628 rf_unlock_mutex2(raidPtr->mutex);
1629 return (EINVAL);
1630 }
1631 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1632 /* Can't fail a spared disk! */
1633 rf_unlock_mutex2(raidPtr->mutex);
1634 return (EINVAL);
1635 }
1636 rf_unlock_mutex2(raidPtr->mutex);
1637
1638 /* make a copy of the recon request so that we don't rely on
1639 * the user's buffer */
1640 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1641 if (rrcopy == NULL)
1642 return(ENOMEM);
1643 memcpy(rrcopy, rr, sizeof(*rr));
1644 rrcopy->raidPtr = (void *) raidPtr;
1645
1646 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1647 rf_ReconThread,
1648 rrcopy,"raid_recon");
1649 return (0);
1650
1651 /* invoke a copyback operation after recon on whatever disk
1652 * needs it, if any */
1653 case RAIDFRAME_COPYBACK:
1654
1655 if (raidPtr->Layout.map->faultsTolerated == 0) {
1656 /* This makes no sense on a RAID 0!! */
1657 return(EINVAL);
1658 }
1659
1660 if (raidPtr->copyback_in_progress == 1) {
1661 /* Copyback is already in progress! */
1662 return(EINVAL);
1663 }
1664
1665 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1666 rf_CopybackThread,
1667 raidPtr,"raid_copyback");
1668 return (retcode);
1669
1670 /* return the percentage completion of reconstruction */
1671 case RAIDFRAME_CHECK_RECON_STATUS:
1672 if (raidPtr->Layout.map->faultsTolerated == 0) {
1673 /* This makes no sense on a RAID 0, so tell the
1674 user it's done. */
1675 *(int *) data = 100;
1676 return(0);
1677 }
1678 if (raidPtr->status != rf_rs_reconstructing)
1679 *(int *) data = 100;
1680 else {
1681 if (raidPtr->reconControl->numRUsTotal > 0) {
1682 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1683 } else {
1684 *(int *) data = 0;
1685 }
1686 }
1687 return (0);
1688 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1689 progressInfoPtr = (RF_ProgressInfo_t **) data;
1690 if (raidPtr->status != rf_rs_reconstructing) {
1691 progressInfo.remaining = 0;
1692 progressInfo.completed = 100;
1693 progressInfo.total = 100;
1694 } else {
1695 progressInfo.total =
1696 raidPtr->reconControl->numRUsTotal;
1697 progressInfo.completed =
1698 raidPtr->reconControl->numRUsComplete;
1699 progressInfo.remaining = progressInfo.total -
1700 progressInfo.completed;
1701 }
1702 retcode = copyout(&progressInfo, *progressInfoPtr,
1703 sizeof(RF_ProgressInfo_t));
1704 return (retcode);
1705
1706 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1707 if (raidPtr->Layout.map->faultsTolerated == 0) {
1708 /* This makes no sense on a RAID 0, so tell the
1709 user it's done. */
1710 *(int *) data = 100;
1711 return(0);
1712 }
1713 if (raidPtr->parity_rewrite_in_progress == 1) {
1714 *(int *) data = 100 *
1715 raidPtr->parity_rewrite_stripes_done /
1716 raidPtr->Layout.numStripe;
1717 } else {
1718 *(int *) data = 100;
1719 }
1720 return (0);
1721
1722 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1723 progressInfoPtr = (RF_ProgressInfo_t **) data;
1724 if (raidPtr->parity_rewrite_in_progress == 1) {
1725 progressInfo.total = raidPtr->Layout.numStripe;
1726 progressInfo.completed =
1727 raidPtr->parity_rewrite_stripes_done;
1728 progressInfo.remaining = progressInfo.total -
1729 progressInfo.completed;
1730 } else {
1731 progressInfo.remaining = 0;
1732 progressInfo.completed = 100;
1733 progressInfo.total = 100;
1734 }
1735 retcode = copyout(&progressInfo, *progressInfoPtr,
1736 sizeof(RF_ProgressInfo_t));
1737 return (retcode);
1738
1739 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1740 if (raidPtr->Layout.map->faultsTolerated == 0) {
1741 /* This makes no sense on a RAID 0 */
1742 *(int *) data = 100;
1743 return(0);
1744 }
1745 if (raidPtr->copyback_in_progress == 1) {
1746 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1747 raidPtr->Layout.numStripe;
1748 } else {
1749 *(int *) data = 100;
1750 }
1751 return (0);
1752
1753 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1754 progressInfoPtr = (RF_ProgressInfo_t **) data;
1755 if (raidPtr->copyback_in_progress == 1) {
1756 progressInfo.total = raidPtr->Layout.numStripe;
1757 progressInfo.completed =
1758 raidPtr->copyback_stripes_done;
1759 progressInfo.remaining = progressInfo.total -
1760 progressInfo.completed;
1761 } else {
1762 progressInfo.remaining = 0;
1763 progressInfo.completed = 100;
1764 progressInfo.total = 100;
1765 }
1766 retcode = copyout(&progressInfo, *progressInfoPtr,
1767 sizeof(RF_ProgressInfo_t));
1768 return (retcode);
1769
1770 /* the sparetable daemon calls this to wait for the kernel to
1771 * need a spare table. this ioctl does not return until a
1772 * spare table is needed. XXX -- calling mpsleep here in the
1773 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1774 * -- I should either compute the spare table in the kernel,
1775 * or have a different -- XXX XXX -- interface (a different
1776 * character device) for delivering the table -- XXX */
1777 #if 0
1778 case RAIDFRAME_SPARET_WAIT:
1779 rf_lock_mutex2(rf_sparet_wait_mutex);
1780 while (!rf_sparet_wait_queue)
1781 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1782 waitreq = rf_sparet_wait_queue;
1783 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1784 rf_unlock_mutex2(rf_sparet_wait_mutex);
1785
1786 /* structure assignment */
1787 *((RF_SparetWait_t *) data) = *waitreq;
1788
1789 RF_Free(waitreq, sizeof(*waitreq));
1790 return (0);
1791
1792 /* wakes up a process waiting on SPARET_WAIT and puts an error
1793 * code in it that will cause the dameon to exit */
1794 case RAIDFRAME_ABORT_SPARET_WAIT:
1795 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1796 waitreq->fcol = -1;
1797 rf_lock_mutex2(rf_sparet_wait_mutex);
1798 waitreq->next = rf_sparet_wait_queue;
1799 rf_sparet_wait_queue = waitreq;
1800 rf_broadcast_conf2(rf_sparet_wait_cv);
1801 rf_unlock_mutex2(rf_sparet_wait_mutex);
1802 return (0);
1803
1804 /* used by the spare table daemon to deliver a spare table
1805 * into the kernel */
1806 case RAIDFRAME_SEND_SPARET:
1807
1808 /* install the spare table */
1809 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1810
1811 /* respond to the requestor. the return status of the spare
1812 * table installation is passed in the "fcol" field */
1813 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1814 waitreq->fcol = retcode;
1815 rf_lock_mutex2(rf_sparet_wait_mutex);
1816 waitreq->next = rf_sparet_resp_queue;
1817 rf_sparet_resp_queue = waitreq;
1818 rf_broadcast_cond2(rf_sparet_resp_cv);
1819 rf_unlock_mutex2(rf_sparet_wait_mutex);
1820
1821 return (retcode);
1822 #endif
1823
1824 default:
1825 break; /* fall through to the os-specific code below */
1826
1827 }
1828
1829 if (!raidPtr->valid)
1830 return (EINVAL);
1831
1832 /*
1833 * Add support for "regular" device ioctls here.
1834 */
1835
1836 error = disk_ioctl(&rs->sc_dkdev, dev, cmd, data, flag, l);
1837 if (error != EPASSTHROUGH)
1838 return (error);
1839
1840 switch (cmd) {
1841 case DIOCWDINFO:
1842 case DIOCSDINFO:
1843 #ifdef __HAVE_OLD_DISKLABEL
1844 case ODIOCWDINFO:
1845 case ODIOCSDINFO:
1846 #endif
1847 {
1848 struct disklabel *lp;
1849 #ifdef __HAVE_OLD_DISKLABEL
1850 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1851 memset(&newlabel, 0, sizeof newlabel);
1852 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1853 lp = &newlabel;
1854 } else
1855 #endif
1856 lp = (struct disklabel *)data;
1857
1858 if ((error = raidlock(rs)) != 0)
1859 return (error);
1860
1861 rs->sc_flags |= RAIDF_LABELLING;
1862
1863 error = setdisklabel(rs->sc_dkdev.dk_label,
1864 lp, 0, rs->sc_dkdev.dk_cpulabel);
1865 if (error == 0) {
1866 if (cmd == DIOCWDINFO
1867 #ifdef __HAVE_OLD_DISKLABEL
1868 || cmd == ODIOCWDINFO
1869 #endif
1870 )
1871 error = writedisklabel(RAIDLABELDEV(dev),
1872 raidstrategy, rs->sc_dkdev.dk_label,
1873 rs->sc_dkdev.dk_cpulabel);
1874 }
1875 rs->sc_flags &= ~RAIDF_LABELLING;
1876
1877 raidunlock(rs);
1878
1879 if (error)
1880 return (error);
1881 break;
1882 }
1883
1884 case DIOCWLABEL:
1885 if (*(int *) data != 0)
1886 rs->sc_flags |= RAIDF_WLABEL;
1887 else
1888 rs->sc_flags &= ~RAIDF_WLABEL;
1889 break;
1890
1891 case DIOCGDEFLABEL:
1892 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1893 break;
1894
1895 #ifdef __HAVE_OLD_DISKLABEL
1896 case ODIOCGDEFLABEL:
1897 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1898 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1899 return ENOTTY;
1900 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1901 break;
1902 #endif
1903
1904 case DIOCCACHESYNC:
1905 return rf_sync_component_caches(raidPtr);
1906
1907 case DIOCGSTRATEGY:
1908 {
1909 struct disk_strategy *dks = (void *)data;
1910
1911 s = splbio();
1912 strlcpy(dks->dks_name, bufq_getstrategyname(rs->buf_queue),
1913 sizeof(dks->dks_name));
1914 splx(s);
1915 dks->dks_paramlen = 0;
1916
1917 return 0;
1918 }
1919
1920 case DIOCSSTRATEGY:
1921 {
1922 struct disk_strategy *dks = (void *)data;
1923 struct bufq_state *new;
1924 struct bufq_state *old;
1925
1926 if (dks->dks_param != NULL) {
1927 return EINVAL;
1928 }
1929 dks->dks_name[sizeof(dks->dks_name) - 1] = 0; /* ensure term */
1930 error = bufq_alloc(&new, dks->dks_name,
1931 BUFQ_EXACT|BUFQ_SORT_RAWBLOCK);
1932 if (error) {
1933 return error;
1934 }
1935 s = splbio();
1936 old = rs->buf_queue;
1937 bufq_move(new, old);
1938 rs->buf_queue = new;
1939 splx(s);
1940 bufq_free(old);
1941
1942 return 0;
1943 }
1944
1945 default:
1946 retcode = ENOTTY;
1947 }
1948 return (retcode);
1949
1950 }
1951
1952
1953 /* raidinit -- complete the rest of the initialization for the
1954 RAIDframe device. */
1955
1956
1957 static void
1958 raidinit(struct raid_softc *rs)
1959 {
1960 cfdata_t cf;
1961 int unit;
1962 RF_Raid_t *raidPtr = &rs->sc_r;
1963
1964 unit = raidPtr->raidid;
1965
1966
1967 /* XXX should check return code first... */
1968 rs->sc_flags |= RAIDF_INITED;
1969
1970 /* XXX doesn't check bounds. */
1971 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1972
1973 /* attach the pseudo device */
1974 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1975 cf->cf_name = raid_cd.cd_name;
1976 cf->cf_atname = raid_cd.cd_name;
1977 cf->cf_unit = unit;
1978 cf->cf_fstate = FSTATE_STAR;
1979
1980 rs->sc_dev = config_attach_pseudo(cf);
1981
1982 if (rs->sc_dev == NULL) {
1983 printf("raid%d: config_attach_pseudo failed\n",
1984 raidPtr->raidid);
1985 rs->sc_flags &= ~RAIDF_INITED;
1986 free(cf, M_RAIDFRAME);
1987 return;
1988 }
1989
1990 /* disk_attach actually creates space for the CPU disklabel, among
1991 * other things, so it's critical to call this *BEFORE* we try putzing
1992 * with disklabels. */
1993
1994 disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1995 disk_attach(&rs->sc_dkdev);
1996
1997 /* XXX There may be a weird interaction here between this, and
1998 * protectedSectors, as used in RAIDframe. */
1999
2000 rs->sc_size = raidPtr->totalSectors;
2001
2002 rf_set_geometry(rs, raidPtr);
2003
2004 dkwedge_discover(&rs->sc_dkdev);
2005
2006 }
2007 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
2008 /* wake up the daemon & tell it to get us a spare table
2009 * XXX
2010 * the entries in the queues should be tagged with the raidPtr
2011 * so that in the extremely rare case that two recons happen at once,
2012 * we know for which device were requesting a spare table
2013 * XXX
2014 *
2015 * XXX This code is not currently used. GO
2016 */
2017 int
2018 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
2019 {
2020 int retcode;
2021
2022 rf_lock_mutex2(rf_sparet_wait_mutex);
2023 req->next = rf_sparet_wait_queue;
2024 rf_sparet_wait_queue = req;
2025 rf_broadcast_cond2(rf_sparet_wait_cv);
2026
2027 /* mpsleep unlocks the mutex */
2028 while (!rf_sparet_resp_queue) {
2029 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
2030 }
2031 req = rf_sparet_resp_queue;
2032 rf_sparet_resp_queue = req->next;
2033 rf_unlock_mutex2(rf_sparet_wait_mutex);
2034
2035 retcode = req->fcol;
2036 RF_Free(req, sizeof(*req)); /* this is not the same req as we
2037 * alloc'd */
2038 return (retcode);
2039 }
2040 #endif
2041
2042 /* a wrapper around rf_DoAccess that extracts appropriate info from the
2043 * bp & passes it down.
2044 * any calls originating in the kernel must use non-blocking I/O
2045 * do some extra sanity checking to return "appropriate" error values for
2046 * certain conditions (to make some standard utilities work)
2047 *
2048 * Formerly known as: rf_DoAccessKernel
2049 */
2050 void
2051 raidstart(RF_Raid_t *raidPtr)
2052 {
2053 RF_SectorCount_t num_blocks, pb, sum;
2054 RF_RaidAddr_t raid_addr;
2055 struct partition *pp;
2056 daddr_t blocknum;
2057 struct raid_softc *rs;
2058 int do_async;
2059 struct buf *bp;
2060 int rc;
2061
2062 rs = raidPtr->softc;
2063 /* quick check to see if anything has died recently */
2064 rf_lock_mutex2(raidPtr->mutex);
2065 if (raidPtr->numNewFailures > 0) {
2066 rf_unlock_mutex2(raidPtr->mutex);
2067 rf_update_component_labels(raidPtr,
2068 RF_NORMAL_COMPONENT_UPDATE);
2069 rf_lock_mutex2(raidPtr->mutex);
2070 raidPtr->numNewFailures--;
2071 }
2072
2073 /* Check to see if we're at the limit... */
2074 while (raidPtr->openings > 0) {
2075 rf_unlock_mutex2(raidPtr->mutex);
2076
2077 /* get the next item, if any, from the queue */
2078 if ((bp = bufq_get(rs->buf_queue)) == NULL) {
2079 /* nothing more to do */
2080 return;
2081 }
2082
2083 /* Ok, for the bp we have here, bp->b_blkno is relative to the
2084 * partition.. Need to make it absolute to the underlying
2085 * device.. */
2086
2087 blocknum = bp->b_blkno << DEV_BSHIFT >> raidPtr->logBytesPerSector;
2088 if (DISKPART(bp->b_dev) != RAW_PART) {
2089 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
2090 blocknum += pp->p_offset;
2091 }
2092
2093 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
2094 (int) blocknum));
2095
2096 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
2097 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
2098
2099 /* *THIS* is where we adjust what block we're going to...
2100 * but DO NOT TOUCH bp->b_blkno!!! */
2101 raid_addr = blocknum;
2102
2103 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
2104 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
2105 sum = raid_addr + num_blocks + pb;
2106 if (1 || rf_debugKernelAccess) {
2107 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
2108 (int) raid_addr, (int) sum, (int) num_blocks,
2109 (int) pb, (int) bp->b_resid));
2110 }
2111 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
2112 || (sum < num_blocks) || (sum < pb)) {
2113 bp->b_error = ENOSPC;
2114 bp->b_resid = bp->b_bcount;
2115 biodone(bp);
2116 rf_lock_mutex2(raidPtr->mutex);
2117 continue;
2118 }
2119 /*
2120 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
2121 */
2122
2123 if (bp->b_bcount & raidPtr->sectorMask) {
2124 bp->b_error = EINVAL;
2125 bp->b_resid = bp->b_bcount;
2126 biodone(bp);
2127 rf_lock_mutex2(raidPtr->mutex);
2128 continue;
2129
2130 }
2131 db1_printf(("Calling DoAccess..\n"));
2132
2133
2134 rf_lock_mutex2(raidPtr->mutex);
2135 raidPtr->openings--;
2136 rf_unlock_mutex2(raidPtr->mutex);
2137
2138 /*
2139 * Everything is async.
2140 */
2141 do_async = 1;
2142
2143 disk_busy(&rs->sc_dkdev);
2144
2145 /* XXX we're still at splbio() here... do we *really*
2146 need to be? */
2147
2148 /* don't ever condition on bp->b_flags & B_WRITE.
2149 * always condition on B_READ instead */
2150
2151 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
2152 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
2153 do_async, raid_addr, num_blocks,
2154 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
2155
2156 if (rc) {
2157 bp->b_error = rc;
2158 bp->b_resid = bp->b_bcount;
2159 biodone(bp);
2160 /* continue loop */
2161 }
2162
2163 rf_lock_mutex2(raidPtr->mutex);
2164 }
2165 rf_unlock_mutex2(raidPtr->mutex);
2166 }
2167
2168
2169
2170
2171 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
2172
2173 int
2174 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
2175 {
2176 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2177 struct buf *bp;
2178
2179 req->queue = queue;
2180 bp = req->bp;
2181
2182 switch (req->type) {
2183 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2184 /* XXX need to do something extra here.. */
2185 /* I'm leaving this in, as I've never actually seen it used,
2186 * and I'd like folks to report it... GO */
2187 printf(("WAKEUP CALLED\n"));
2188 queue->numOutstanding++;
2189
2190 bp->b_flags = 0;
2191 bp->b_private = req;
2192
2193 KernelWakeupFunc(bp);
2194 break;
2195
2196 case RF_IO_TYPE_READ:
2197 case RF_IO_TYPE_WRITE:
2198 #if RF_ACC_TRACE > 0
2199 if (req->tracerec) {
2200 RF_ETIMER_START(req->tracerec->timer);
2201 }
2202 #endif
2203 InitBP(bp, queue->rf_cinfo->ci_vp,
2204 op, queue->rf_cinfo->ci_dev,
2205 req->sectorOffset, req->numSector,
2206 req->buf, KernelWakeupFunc, (void *) req,
2207 queue->raidPtr->logBytesPerSector, req->b_proc);
2208
2209 if (rf_debugKernelAccess) {
2210 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2211 (long) bp->b_blkno));
2212 }
2213 queue->numOutstanding++;
2214 queue->last_deq_sector = req->sectorOffset;
2215 /* acc wouldn't have been let in if there were any pending
2216 * reqs at any other priority */
2217 queue->curPriority = req->priority;
2218
2219 db1_printf(("Going for %c to unit %d col %d\n",
2220 req->type, queue->raidPtr->raidid,
2221 queue->col));
2222 db1_printf(("sector %d count %d (%d bytes) %d\n",
2223 (int) req->sectorOffset, (int) req->numSector,
2224 (int) (req->numSector <<
2225 queue->raidPtr->logBytesPerSector),
2226 (int) queue->raidPtr->logBytesPerSector));
2227
2228 /*
2229 * XXX: drop lock here since this can block at
2230 * least with backing SCSI devices. Retake it
2231 * to minimize fuss with calling interfaces.
2232 */
2233
2234 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2235 bdev_strategy(bp);
2236 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2237 break;
2238
2239 default:
2240 panic("bad req->type in rf_DispatchKernelIO");
2241 }
2242 db1_printf(("Exiting from DispatchKernelIO\n"));
2243
2244 return (0);
2245 }
2246 /* this is the callback function associated with a I/O invoked from
2247 kernel code.
2248 */
2249 static void
2250 KernelWakeupFunc(struct buf *bp)
2251 {
2252 RF_DiskQueueData_t *req = NULL;
2253 RF_DiskQueue_t *queue;
2254
2255 db1_printf(("recovering the request queue:\n"));
2256
2257 req = bp->b_private;
2258
2259 queue = (RF_DiskQueue_t *) req->queue;
2260
2261 rf_lock_mutex2(queue->raidPtr->iodone_lock);
2262
2263 #if RF_ACC_TRACE > 0
2264 if (req->tracerec) {
2265 RF_ETIMER_STOP(req->tracerec->timer);
2266 RF_ETIMER_EVAL(req->tracerec->timer);
2267 rf_lock_mutex2(rf_tracing_mutex);
2268 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2269 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2270 req->tracerec->num_phys_ios++;
2271 rf_unlock_mutex2(rf_tracing_mutex);
2272 }
2273 #endif
2274
2275 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2276 * ballistic, and mark the component as hosed... */
2277
2278 if (bp->b_error != 0) {
2279 /* Mark the disk as dead */
2280 /* but only mark it once... */
2281 /* and only if it wouldn't leave this RAID set
2282 completely broken */
2283 if (((queue->raidPtr->Disks[queue->col].status ==
2284 rf_ds_optimal) ||
2285 (queue->raidPtr->Disks[queue->col].status ==
2286 rf_ds_used_spare)) &&
2287 (queue->raidPtr->numFailures <
2288 queue->raidPtr->Layout.map->faultsTolerated)) {
2289 printf("raid%d: IO Error (%d). Marking %s as failed.\n",
2290 queue->raidPtr->raidid,
2291 bp->b_error,
2292 queue->raidPtr->Disks[queue->col].devname);
2293 queue->raidPtr->Disks[queue->col].status =
2294 rf_ds_failed;
2295 queue->raidPtr->status = rf_rs_degraded;
2296 queue->raidPtr->numFailures++;
2297 queue->raidPtr->numNewFailures++;
2298 } else { /* Disk is already dead... */
2299 /* printf("Disk already marked as dead!\n"); */
2300 }
2301
2302 }
2303
2304 /* Fill in the error value */
2305 req->error = bp->b_error;
2306
2307 /* Drop this one on the "finished" queue... */
2308 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2309
2310 /* Let the raidio thread know there is work to be done. */
2311 rf_signal_cond2(queue->raidPtr->iodone_cv);
2312
2313 rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2314 }
2315
2316
2317 /*
2318 * initialize a buf structure for doing an I/O in the kernel.
2319 */
2320 static void
2321 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2322 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2323 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2324 struct proc *b_proc)
2325 {
2326 /* bp->b_flags = B_PHYS | rw_flag; */
2327 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */
2328 bp->b_oflags = 0;
2329 bp->b_cflags = 0;
2330 bp->b_bcount = numSect << logBytesPerSector;
2331 bp->b_bufsize = bp->b_bcount;
2332 bp->b_error = 0;
2333 bp->b_dev = dev;
2334 bp->b_data = bf;
2335 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2336 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2337 if (bp->b_bcount == 0) {
2338 panic("bp->b_bcount is zero in InitBP!!");
2339 }
2340 bp->b_proc = b_proc;
2341 bp->b_iodone = cbFunc;
2342 bp->b_private = cbArg;
2343 }
2344
2345 static void
2346 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2347 struct disklabel *lp)
2348 {
2349 memset(lp, 0, sizeof(*lp));
2350
2351 /* fabricate a label... */
2352 if (raidPtr->totalSectors > UINT32_MAX)
2353 lp->d_secperunit = UINT32_MAX;
2354 else
2355 lp->d_secperunit = raidPtr->totalSectors;
2356 lp->d_secsize = raidPtr->bytesPerSector;
2357 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2358 lp->d_ntracks = 4 * raidPtr->numCol;
2359 lp->d_ncylinders = raidPtr->totalSectors /
2360 (lp->d_nsectors * lp->d_ntracks);
2361 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2362
2363 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2364 lp->d_type = DKTYPE_RAID;
2365 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2366 lp->d_rpm = 3600;
2367 lp->d_interleave = 1;
2368 lp->d_flags = 0;
2369
2370 lp->d_partitions[RAW_PART].p_offset = 0;
2371 lp->d_partitions[RAW_PART].p_size = lp->d_secperunit;
2372 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2373 lp->d_npartitions = RAW_PART + 1;
2374
2375 lp->d_magic = DISKMAGIC;
2376 lp->d_magic2 = DISKMAGIC;
2377 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2378
2379 }
2380 /*
2381 * Read the disklabel from the raid device. If one is not present, fake one
2382 * up.
2383 */
2384 static void
2385 raidgetdisklabel(dev_t dev)
2386 {
2387 int unit = raidunit(dev);
2388 struct raid_softc *rs;
2389 const char *errstring;
2390 struct disklabel *lp;
2391 struct cpu_disklabel *clp;
2392 RF_Raid_t *raidPtr;
2393
2394 if ((rs = raidget(unit)) == NULL)
2395 return;
2396
2397 lp = rs->sc_dkdev.dk_label;
2398 clp = rs->sc_dkdev.dk_cpulabel;
2399
2400 db1_printf(("Getting the disklabel...\n"));
2401
2402 memset(clp, 0, sizeof(*clp));
2403
2404 raidPtr = &rs->sc_r;
2405
2406 raidgetdefaultlabel(raidPtr, rs, lp);
2407
2408 /*
2409 * Call the generic disklabel extraction routine.
2410 */
2411 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2412 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2413 if (errstring)
2414 raidmakedisklabel(rs);
2415 else {
2416 int i;
2417 struct partition *pp;
2418
2419 /*
2420 * Sanity check whether the found disklabel is valid.
2421 *
2422 * This is necessary since total size of the raid device
2423 * may vary when an interleave is changed even though exactly
2424 * same components are used, and old disklabel may used
2425 * if that is found.
2426 */
2427 if (lp->d_secperunit < UINT32_MAX ?
2428 lp->d_secperunit != rs->sc_size :
2429 lp->d_secperunit > rs->sc_size)
2430 printf("raid%d: WARNING: %s: "
2431 "total sector size in disklabel (%ju) != "
2432 "the size of raid (%ju)\n", unit, rs->sc_xname,
2433 (uintmax_t)lp->d_secperunit,
2434 (uintmax_t)rs->sc_size);
2435 for (i = 0; i < lp->d_npartitions; i++) {
2436 pp = &lp->d_partitions[i];
2437 if (pp->p_offset + pp->p_size > rs->sc_size)
2438 printf("raid%d: WARNING: %s: end of partition `%c' "
2439 "exceeds the size of raid (%ju)\n",
2440 unit, rs->sc_xname, 'a' + i,
2441 (uintmax_t)rs->sc_size);
2442 }
2443 }
2444
2445 }
2446 /*
2447 * Take care of things one might want to take care of in the event
2448 * that a disklabel isn't present.
2449 */
2450 static void
2451 raidmakedisklabel(struct raid_softc *rs)
2452 {
2453 struct disklabel *lp = rs->sc_dkdev.dk_label;
2454 db1_printf(("Making a label..\n"));
2455
2456 /*
2457 * For historical reasons, if there's no disklabel present
2458 * the raw partition must be marked FS_BSDFFS.
2459 */
2460
2461 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2462
2463 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2464
2465 lp->d_checksum = dkcksum(lp);
2466 }
2467 /*
2468 * Wait interruptibly for an exclusive lock.
2469 *
2470 * XXX
2471 * Several drivers do this; it should be abstracted and made MP-safe.
2472 * (Hmm... where have we seen this warning before :-> GO )
2473 */
2474 static int
2475 raidlock(struct raid_softc *rs)
2476 {
2477 int error;
2478
2479 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2480 rs->sc_flags |= RAIDF_WANTED;
2481 if ((error =
2482 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2483 return (error);
2484 }
2485 rs->sc_flags |= RAIDF_LOCKED;
2486 return (0);
2487 }
2488 /*
2489 * Unlock and wake up any waiters.
2490 */
2491 static void
2492 raidunlock(struct raid_softc *rs)
2493 {
2494
2495 rs->sc_flags &= ~RAIDF_LOCKED;
2496 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2497 rs->sc_flags &= ~RAIDF_WANTED;
2498 wakeup(rs);
2499 }
2500 }
2501
2502
2503 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2504 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2505 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2506
2507 static daddr_t
2508 rf_component_info_offset(void)
2509 {
2510
2511 return RF_COMPONENT_INFO_OFFSET;
2512 }
2513
2514 static daddr_t
2515 rf_component_info_size(unsigned secsize)
2516 {
2517 daddr_t info_size;
2518
2519 KASSERT(secsize);
2520 if (secsize > RF_COMPONENT_INFO_SIZE)
2521 info_size = secsize;
2522 else
2523 info_size = RF_COMPONENT_INFO_SIZE;
2524
2525 return info_size;
2526 }
2527
2528 static daddr_t
2529 rf_parity_map_offset(RF_Raid_t *raidPtr)
2530 {
2531 daddr_t map_offset;
2532
2533 KASSERT(raidPtr->bytesPerSector);
2534 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2535 map_offset = raidPtr->bytesPerSector;
2536 else
2537 map_offset = RF_COMPONENT_INFO_SIZE;
2538 map_offset += rf_component_info_offset();
2539
2540 return map_offset;
2541 }
2542
2543 static daddr_t
2544 rf_parity_map_size(RF_Raid_t *raidPtr)
2545 {
2546 daddr_t map_size;
2547
2548 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2549 map_size = raidPtr->bytesPerSector;
2550 else
2551 map_size = RF_PARITY_MAP_SIZE;
2552
2553 return map_size;
2554 }
2555
2556 int
2557 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2558 {
2559 RF_ComponentLabel_t *clabel;
2560
2561 clabel = raidget_component_label(raidPtr, col);
2562 clabel->clean = RF_RAID_CLEAN;
2563 raidflush_component_label(raidPtr, col);
2564 return(0);
2565 }
2566
2567
2568 int
2569 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2570 {
2571 RF_ComponentLabel_t *clabel;
2572
2573 clabel = raidget_component_label(raidPtr, col);
2574 clabel->clean = RF_RAID_DIRTY;
2575 raidflush_component_label(raidPtr, col);
2576 return(0);
2577 }
2578
2579 int
2580 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2581 {
2582 KASSERT(raidPtr->bytesPerSector);
2583 return raidread_component_label(raidPtr->bytesPerSector,
2584 raidPtr->Disks[col].dev,
2585 raidPtr->raid_cinfo[col].ci_vp,
2586 &raidPtr->raid_cinfo[col].ci_label);
2587 }
2588
2589 RF_ComponentLabel_t *
2590 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2591 {
2592 return &raidPtr->raid_cinfo[col].ci_label;
2593 }
2594
2595 int
2596 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2597 {
2598 RF_ComponentLabel_t *label;
2599
2600 label = &raidPtr->raid_cinfo[col].ci_label;
2601 label->mod_counter = raidPtr->mod_counter;
2602 #ifndef RF_NO_PARITY_MAP
2603 label->parity_map_modcount = label->mod_counter;
2604 #endif
2605 return raidwrite_component_label(raidPtr->bytesPerSector,
2606 raidPtr->Disks[col].dev,
2607 raidPtr->raid_cinfo[col].ci_vp, label);
2608 }
2609
2610
2611 static int
2612 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2613 RF_ComponentLabel_t *clabel)
2614 {
2615 return raidread_component_area(dev, b_vp, clabel,
2616 sizeof(RF_ComponentLabel_t),
2617 rf_component_info_offset(),
2618 rf_component_info_size(secsize));
2619 }
2620
2621 /* ARGSUSED */
2622 static int
2623 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2624 size_t msize, daddr_t offset, daddr_t dsize)
2625 {
2626 struct buf *bp;
2627 const struct bdevsw *bdev;
2628 int error;
2629
2630 /* XXX should probably ensure that we don't try to do this if
2631 someone has changed rf_protected_sectors. */
2632
2633 if (b_vp == NULL) {
2634 /* For whatever reason, this component is not valid.
2635 Don't try to read a component label from it. */
2636 return(EINVAL);
2637 }
2638
2639 /* get a block of the appropriate size... */
2640 bp = geteblk((int)dsize);
2641 bp->b_dev = dev;
2642
2643 /* get our ducks in a row for the read */
2644 bp->b_blkno = offset / DEV_BSIZE;
2645 bp->b_bcount = dsize;
2646 bp->b_flags |= B_READ;
2647 bp->b_resid = dsize;
2648
2649 bdev = bdevsw_lookup(bp->b_dev);
2650 if (bdev == NULL)
2651 return (ENXIO);
2652 (*bdev->d_strategy)(bp);
2653
2654 error = biowait(bp);
2655
2656 if (!error) {
2657 memcpy(data, bp->b_data, msize);
2658 }
2659
2660 brelse(bp, 0);
2661 return(error);
2662 }
2663
2664
2665 static int
2666 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2667 RF_ComponentLabel_t *clabel)
2668 {
2669 return raidwrite_component_area(dev, b_vp, clabel,
2670 sizeof(RF_ComponentLabel_t),
2671 rf_component_info_offset(),
2672 rf_component_info_size(secsize), 0);
2673 }
2674
2675 /* ARGSUSED */
2676 static int
2677 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2678 size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2679 {
2680 struct buf *bp;
2681 const struct bdevsw *bdev;
2682 int error;
2683
2684 /* get a block of the appropriate size... */
2685 bp = geteblk((int)dsize);
2686 bp->b_dev = dev;
2687
2688 /* get our ducks in a row for the write */
2689 bp->b_blkno = offset / DEV_BSIZE;
2690 bp->b_bcount = dsize;
2691 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2692 bp->b_resid = dsize;
2693
2694 memset(bp->b_data, 0, dsize);
2695 memcpy(bp->b_data, data, msize);
2696
2697 bdev = bdevsw_lookup(bp->b_dev);
2698 if (bdev == NULL)
2699 return (ENXIO);
2700 (*bdev->d_strategy)(bp);
2701 if (asyncp)
2702 return 0;
2703 error = biowait(bp);
2704 brelse(bp, 0);
2705 if (error) {
2706 #if 1
2707 printf("Failed to write RAID component info!\n");
2708 #endif
2709 }
2710
2711 return(error);
2712 }
2713
2714 void
2715 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2716 {
2717 int c;
2718
2719 for (c = 0; c < raidPtr->numCol; c++) {
2720 /* Skip dead disks. */
2721 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2722 continue;
2723 /* XXXjld: what if an error occurs here? */
2724 raidwrite_component_area(raidPtr->Disks[c].dev,
2725 raidPtr->raid_cinfo[c].ci_vp, map,
2726 RF_PARITYMAP_NBYTE,
2727 rf_parity_map_offset(raidPtr),
2728 rf_parity_map_size(raidPtr), 0);
2729 }
2730 }
2731
2732 void
2733 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2734 {
2735 struct rf_paritymap_ondisk tmp;
2736 int c,first;
2737
2738 first=1;
2739 for (c = 0; c < raidPtr->numCol; c++) {
2740 /* Skip dead disks. */
2741 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2742 continue;
2743 raidread_component_area(raidPtr->Disks[c].dev,
2744 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2745 RF_PARITYMAP_NBYTE,
2746 rf_parity_map_offset(raidPtr),
2747 rf_parity_map_size(raidPtr));
2748 if (first) {
2749 memcpy(map, &tmp, sizeof(*map));
2750 first = 0;
2751 } else {
2752 rf_paritymap_merge(map, &tmp);
2753 }
2754 }
2755 }
2756
2757 void
2758 rf_markalldirty(RF_Raid_t *raidPtr)
2759 {
2760 RF_ComponentLabel_t *clabel;
2761 int sparecol;
2762 int c;
2763 int j;
2764 int scol = -1;
2765
2766 raidPtr->mod_counter++;
2767 for (c = 0; c < raidPtr->numCol; c++) {
2768 /* we don't want to touch (at all) a disk that has
2769 failed */
2770 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2771 clabel = raidget_component_label(raidPtr, c);
2772 if (clabel->status == rf_ds_spared) {
2773 /* XXX do something special...
2774 but whatever you do, don't
2775 try to access it!! */
2776 } else {
2777 raidmarkdirty(raidPtr, c);
2778 }
2779 }
2780 }
2781
2782 for( c = 0; c < raidPtr->numSpare ; c++) {
2783 sparecol = raidPtr->numCol + c;
2784 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2785 /*
2786
2787 we claim this disk is "optimal" if it's
2788 rf_ds_used_spare, as that means it should be
2789 directly substitutable for the disk it replaced.
2790 We note that too...
2791
2792 */
2793
2794 for(j=0;j<raidPtr->numCol;j++) {
2795 if (raidPtr->Disks[j].spareCol == sparecol) {
2796 scol = j;
2797 break;
2798 }
2799 }
2800
2801 clabel = raidget_component_label(raidPtr, sparecol);
2802 /* make sure status is noted */
2803
2804 raid_init_component_label(raidPtr, clabel);
2805
2806 clabel->row = 0;
2807 clabel->column = scol;
2808 /* Note: we *don't* change status from rf_ds_used_spare
2809 to rf_ds_optimal */
2810 /* clabel.status = rf_ds_optimal; */
2811
2812 raidmarkdirty(raidPtr, sparecol);
2813 }
2814 }
2815 }
2816
2817
2818 void
2819 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2820 {
2821 RF_ComponentLabel_t *clabel;
2822 int sparecol;
2823 int c;
2824 int j;
2825 int scol;
2826
2827 scol = -1;
2828
2829 /* XXX should do extra checks to make sure things really are clean,
2830 rather than blindly setting the clean bit... */
2831
2832 raidPtr->mod_counter++;
2833
2834 for (c = 0; c < raidPtr->numCol; c++) {
2835 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2836 clabel = raidget_component_label(raidPtr, c);
2837 /* make sure status is noted */
2838 clabel->status = rf_ds_optimal;
2839
2840 /* note what unit we are configured as */
2841 clabel->last_unit = raidPtr->raidid;
2842
2843 raidflush_component_label(raidPtr, c);
2844 if (final == RF_FINAL_COMPONENT_UPDATE) {
2845 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2846 raidmarkclean(raidPtr, c);
2847 }
2848 }
2849 }
2850 /* else we don't touch it.. */
2851 }
2852
2853 for( c = 0; c < raidPtr->numSpare ; c++) {
2854 sparecol = raidPtr->numCol + c;
2855 /* Need to ensure that the reconstruct actually completed! */
2856 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2857 /*
2858
2859 we claim this disk is "optimal" if it's
2860 rf_ds_used_spare, as that means it should be
2861 directly substitutable for the disk it replaced.
2862 We note that too...
2863
2864 */
2865
2866 for(j=0;j<raidPtr->numCol;j++) {
2867 if (raidPtr->Disks[j].spareCol == sparecol) {
2868 scol = j;
2869 break;
2870 }
2871 }
2872
2873 /* XXX shouldn't *really* need this... */
2874 clabel = raidget_component_label(raidPtr, sparecol);
2875 /* make sure status is noted */
2876
2877 raid_init_component_label(raidPtr, clabel);
2878
2879 clabel->column = scol;
2880 clabel->status = rf_ds_optimal;
2881 clabel->last_unit = raidPtr->raidid;
2882
2883 raidflush_component_label(raidPtr, sparecol);
2884 if (final == RF_FINAL_COMPONENT_UPDATE) {
2885 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2886 raidmarkclean(raidPtr, sparecol);
2887 }
2888 }
2889 }
2890 }
2891 }
2892
2893 void
2894 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2895 {
2896
2897 if (vp != NULL) {
2898 if (auto_configured == 1) {
2899 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2900 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2901 vput(vp);
2902
2903 } else {
2904 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2905 }
2906 }
2907 }
2908
2909
2910 void
2911 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2912 {
2913 int r,c;
2914 struct vnode *vp;
2915 int acd;
2916
2917
2918 /* We take this opportunity to close the vnodes like we should.. */
2919
2920 for (c = 0; c < raidPtr->numCol; c++) {
2921 vp = raidPtr->raid_cinfo[c].ci_vp;
2922 acd = raidPtr->Disks[c].auto_configured;
2923 rf_close_component(raidPtr, vp, acd);
2924 raidPtr->raid_cinfo[c].ci_vp = NULL;
2925 raidPtr->Disks[c].auto_configured = 0;
2926 }
2927
2928 for (r = 0; r < raidPtr->numSpare; r++) {
2929 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2930 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2931 rf_close_component(raidPtr, vp, acd);
2932 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2933 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2934 }
2935 }
2936
2937
2938 void
2939 rf_ReconThread(struct rf_recon_req *req)
2940 {
2941 int s;
2942 RF_Raid_t *raidPtr;
2943
2944 s = splbio();
2945 raidPtr = (RF_Raid_t *) req->raidPtr;
2946 raidPtr->recon_in_progress = 1;
2947
2948 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2949 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2950
2951 RF_Free(req, sizeof(*req));
2952
2953 raidPtr->recon_in_progress = 0;
2954 splx(s);
2955
2956 /* That's all... */
2957 kthread_exit(0); /* does not return */
2958 }
2959
2960 void
2961 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2962 {
2963 int retcode;
2964 int s;
2965
2966 raidPtr->parity_rewrite_stripes_done = 0;
2967 raidPtr->parity_rewrite_in_progress = 1;
2968 s = splbio();
2969 retcode = rf_RewriteParity(raidPtr);
2970 splx(s);
2971 if (retcode) {
2972 printf("raid%d: Error re-writing parity (%d)!\n",
2973 raidPtr->raidid, retcode);
2974 } else {
2975 /* set the clean bit! If we shutdown correctly,
2976 the clean bit on each component label will get
2977 set */
2978 raidPtr->parity_good = RF_RAID_CLEAN;
2979 }
2980 raidPtr->parity_rewrite_in_progress = 0;
2981
2982 /* Anyone waiting for us to stop? If so, inform them... */
2983 if (raidPtr->waitShutdown) {
2984 wakeup(&raidPtr->parity_rewrite_in_progress);
2985 }
2986
2987 /* That's all... */
2988 kthread_exit(0); /* does not return */
2989 }
2990
2991
2992 void
2993 rf_CopybackThread(RF_Raid_t *raidPtr)
2994 {
2995 int s;
2996
2997 raidPtr->copyback_in_progress = 1;
2998 s = splbio();
2999 rf_CopybackReconstructedData(raidPtr);
3000 splx(s);
3001 raidPtr->copyback_in_progress = 0;
3002
3003 /* That's all... */
3004 kthread_exit(0); /* does not return */
3005 }
3006
3007
3008 void
3009 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
3010 {
3011 int s;
3012 RF_Raid_t *raidPtr;
3013
3014 s = splbio();
3015 raidPtr = req->raidPtr;
3016 raidPtr->recon_in_progress = 1;
3017 rf_ReconstructInPlace(raidPtr, req->col);
3018 RF_Free(req, sizeof(*req));
3019 raidPtr->recon_in_progress = 0;
3020 splx(s);
3021
3022 /* That's all... */
3023 kthread_exit(0); /* does not return */
3024 }
3025
3026 static RF_AutoConfig_t *
3027 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
3028 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
3029 unsigned secsize)
3030 {
3031 int good_one = 0;
3032 RF_ComponentLabel_t *clabel;
3033 RF_AutoConfig_t *ac;
3034
3035 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
3036 if (clabel == NULL) {
3037 oomem:
3038 while(ac_list) {
3039 ac = ac_list;
3040 if (ac->clabel)
3041 free(ac->clabel, M_RAIDFRAME);
3042 ac_list = ac_list->next;
3043 free(ac, M_RAIDFRAME);
3044 }
3045 printf("RAID auto config: out of memory!\n");
3046 return NULL; /* XXX probably should panic? */
3047 }
3048
3049 if (!raidread_component_label(secsize, dev, vp, clabel)) {
3050 /* Got the label. Does it look reasonable? */
3051 if (rf_reasonable_label(clabel, numsecs) &&
3052 (rf_component_label_partitionsize(clabel) <= size)) {
3053 #ifdef DEBUG
3054 printf("Component on: %s: %llu\n",
3055 cname, (unsigned long long)size);
3056 rf_print_component_label(clabel);
3057 #endif
3058 /* if it's reasonable, add it, else ignore it. */
3059 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
3060 M_NOWAIT);
3061 if (ac == NULL) {
3062 free(clabel, M_RAIDFRAME);
3063 goto oomem;
3064 }
3065 strlcpy(ac->devname, cname, sizeof(ac->devname));
3066 ac->dev = dev;
3067 ac->vp = vp;
3068 ac->clabel = clabel;
3069 ac->next = ac_list;
3070 ac_list = ac;
3071 good_one = 1;
3072 }
3073 }
3074 if (!good_one) {
3075 /* cleanup */
3076 free(clabel, M_RAIDFRAME);
3077 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3078 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3079 vput(vp);
3080 }
3081 return ac_list;
3082 }
3083
3084 RF_AutoConfig_t *
3085 rf_find_raid_components(void)
3086 {
3087 struct vnode *vp;
3088 struct disklabel label;
3089 device_t dv;
3090 deviter_t di;
3091 dev_t dev;
3092 int bmajor, bminor, wedge, rf_part_found;
3093 int error;
3094 int i;
3095 RF_AutoConfig_t *ac_list;
3096 uint64_t numsecs;
3097 unsigned secsize;
3098
3099 /* initialize the AutoConfig list */
3100 ac_list = NULL;
3101
3102 /* we begin by trolling through *all* the devices on the system */
3103
3104 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
3105 dv = deviter_next(&di)) {
3106
3107 /* we are only interested in disks... */
3108 if (device_class(dv) != DV_DISK)
3109 continue;
3110
3111 /* we don't care about floppies... */
3112 if (device_is_a(dv, "fd")) {
3113 continue;
3114 }
3115
3116 /* we don't care about CD's... */
3117 if (device_is_a(dv, "cd")) {
3118 continue;
3119 }
3120
3121 /* we don't care about md's... */
3122 if (device_is_a(dv, "md")) {
3123 continue;
3124 }
3125
3126 /* hdfd is the Atari/Hades floppy driver */
3127 if (device_is_a(dv, "hdfd")) {
3128 continue;
3129 }
3130
3131 /* fdisa is the Atari/Milan floppy driver */
3132 if (device_is_a(dv, "fdisa")) {
3133 continue;
3134 }
3135
3136 /* need to find the device_name_to_block_device_major stuff */
3137 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
3138
3139 rf_part_found = 0; /*No raid partition as yet*/
3140
3141 /* get a vnode for the raw partition of this disk */
3142
3143 wedge = device_is_a(dv, "dk");
3144 bminor = minor(device_unit(dv));
3145 dev = wedge ? makedev(bmajor, bminor) :
3146 MAKEDISKDEV(bmajor, bminor, RAW_PART);
3147 if (bdevvp(dev, &vp))
3148 panic("RAID can't alloc vnode");
3149
3150 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
3151
3152 if (error) {
3153 /* "Who cares." Continue looking
3154 for something that exists*/
3155 vput(vp);
3156 continue;
3157 }
3158
3159 error = getdisksize(vp, &numsecs, &secsize);
3160 if (error) {
3161 vput(vp);
3162 continue;
3163 }
3164 if (wedge) {
3165 struct dkwedge_info dkw;
3166 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
3167 NOCRED);
3168 if (error) {
3169 printf("RAIDframe: can't get wedge info for "
3170 "dev %s (%d)\n", device_xname(dv), error);
3171 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3172 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3173 vput(vp);
3174 continue;
3175 }
3176
3177 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
3178 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3179 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3180 vput(vp);
3181 continue;
3182 }
3183
3184 ac_list = rf_get_component(ac_list, dev, vp,
3185 device_xname(dv), dkw.dkw_size, numsecs, secsize);
3186 rf_part_found = 1; /*There is a raid component on this disk*/
3187 continue;
3188 }
3189
3190 /* Ok, the disk exists. Go get the disklabel. */
3191 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
3192 if (error) {
3193 /*
3194 * XXX can't happen - open() would
3195 * have errored out (or faked up one)
3196 */
3197 if (error != ENOTTY)
3198 printf("RAIDframe: can't get label for dev "
3199 "%s (%d)\n", device_xname(dv), error);
3200 }
3201
3202 /* don't need this any more. We'll allocate it again
3203 a little later if we really do... */
3204 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3205 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3206 vput(vp);
3207
3208 if (error)
3209 continue;
3210
3211 rf_part_found = 0; /*No raid partitions yet*/
3212 for (i = 0; i < label.d_npartitions; i++) {
3213 char cname[sizeof(ac_list->devname)];
3214
3215 /* We only support partitions marked as RAID */
3216 if (label.d_partitions[i].p_fstype != FS_RAID)
3217 continue;
3218
3219 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
3220 if (bdevvp(dev, &vp))
3221 panic("RAID can't alloc vnode");
3222
3223 error = VOP_OPEN(vp, FREAD, NOCRED);
3224 if (error) {
3225 /* Whatever... */
3226 vput(vp);
3227 continue;
3228 }
3229 snprintf(cname, sizeof(cname), "%s%c",
3230 device_xname(dv), 'a' + i);
3231 ac_list = rf_get_component(ac_list, dev, vp, cname,
3232 label.d_partitions[i].p_size, numsecs, secsize);
3233 rf_part_found = 1; /*There is at least one raid partition on this disk*/
3234 }
3235
3236 /*
3237 *If there is no raid component on this disk, either in a
3238 *disklabel or inside a wedge, check the raw partition as well,
3239 *as it is possible to configure raid components on raw disk
3240 *devices.
3241 */
3242
3243 if (!rf_part_found) {
3244 char cname[sizeof(ac_list->devname)];
3245
3246 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
3247 if (bdevvp(dev, &vp))
3248 panic("RAID can't alloc vnode");
3249
3250 error = VOP_OPEN(vp, FREAD, NOCRED);
3251 if (error) {
3252 /* Whatever... */
3253 vput(vp);
3254 continue;
3255 }
3256 snprintf(cname, sizeof(cname), "%s%c",
3257 device_xname(dv), 'a' + RAW_PART);
3258 ac_list = rf_get_component(ac_list, dev, vp, cname,
3259 label.d_partitions[RAW_PART].p_size, numsecs, secsize);
3260 }
3261 }
3262 deviter_release(&di);
3263 return ac_list;
3264 }
3265
3266
3267 int
3268 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3269 {
3270
3271 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
3272 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
3273 ((clabel->clean == RF_RAID_CLEAN) ||
3274 (clabel->clean == RF_RAID_DIRTY)) &&
3275 clabel->row >=0 &&
3276 clabel->column >= 0 &&
3277 clabel->num_rows > 0 &&
3278 clabel->num_columns > 0 &&
3279 clabel->row < clabel->num_rows &&
3280 clabel->column < clabel->num_columns &&
3281 clabel->blockSize > 0 &&
3282 /*
3283 * numBlocksHi may contain garbage, but it is ok since
3284 * the type is unsigned. If it is really garbage,
3285 * rf_fix_old_label_size() will fix it.
3286 */
3287 rf_component_label_numblocks(clabel) > 0) {
3288 /*
3289 * label looks reasonable enough...
3290 * let's make sure it has no old garbage.
3291 */
3292 if (numsecs)
3293 rf_fix_old_label_size(clabel, numsecs);
3294 return(1);
3295 }
3296 return(0);
3297 }
3298
3299
3300 /*
3301 * For reasons yet unknown, some old component labels have garbage in
3302 * the newer numBlocksHi region, and this causes lossage. Since those
3303 * disks will also have numsecs set to less than 32 bits of sectors,
3304 * we can determine when this corruption has occurred, and fix it.
3305 *
3306 * The exact same problem, with the same unknown reason, happens to
3307 * the partitionSizeHi member as well.
3308 */
3309 static void
3310 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3311 {
3312
3313 if (numsecs < ((uint64_t)1 << 32)) {
3314 if (clabel->numBlocksHi) {
3315 printf("WARNING: total sectors < 32 bits, yet "
3316 "numBlocksHi set\n"
3317 "WARNING: resetting numBlocksHi to zero.\n");
3318 clabel->numBlocksHi = 0;
3319 }
3320
3321 if (clabel->partitionSizeHi) {
3322 printf("WARNING: total sectors < 32 bits, yet "
3323 "partitionSizeHi set\n"
3324 "WARNING: resetting partitionSizeHi to zero.\n");
3325 clabel->partitionSizeHi = 0;
3326 }
3327 }
3328 }
3329
3330
3331 #ifdef DEBUG
3332 void
3333 rf_print_component_label(RF_ComponentLabel_t *clabel)
3334 {
3335 uint64_t numBlocks;
3336 static const char *rp[] = {
3337 "No", "Force", "Soft", "*invalid*"
3338 };
3339
3340
3341 numBlocks = rf_component_label_numblocks(clabel);
3342
3343 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3344 clabel->row, clabel->column,
3345 clabel->num_rows, clabel->num_columns);
3346 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3347 clabel->version, clabel->serial_number,
3348 clabel->mod_counter);
3349 printf(" Clean: %s Status: %d\n",
3350 clabel->clean ? "Yes" : "No", clabel->status);
3351 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3352 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3353 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3354 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3355 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3356 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]);
3357 printf(" Last configured as: raid%d\n", clabel->last_unit);
3358 #if 0
3359 printf(" Config order: %d\n", clabel->config_order);
3360 #endif
3361
3362 }
3363 #endif
3364
3365 RF_ConfigSet_t *
3366 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3367 {
3368 RF_AutoConfig_t *ac;
3369 RF_ConfigSet_t *config_sets;
3370 RF_ConfigSet_t *cset;
3371 RF_AutoConfig_t *ac_next;
3372
3373
3374 config_sets = NULL;
3375
3376 /* Go through the AutoConfig list, and figure out which components
3377 belong to what sets. */
3378 ac = ac_list;
3379 while(ac!=NULL) {
3380 /* we're going to putz with ac->next, so save it here
3381 for use at the end of the loop */
3382 ac_next = ac->next;
3383
3384 if (config_sets == NULL) {
3385 /* will need at least this one... */
3386 config_sets = (RF_ConfigSet_t *)
3387 malloc(sizeof(RF_ConfigSet_t),
3388 M_RAIDFRAME, M_NOWAIT);
3389 if (config_sets == NULL) {
3390 panic("rf_create_auto_sets: No memory!");
3391 }
3392 /* this one is easy :) */
3393 config_sets->ac = ac;
3394 config_sets->next = NULL;
3395 config_sets->rootable = 0;
3396 ac->next = NULL;
3397 } else {
3398 /* which set does this component fit into? */
3399 cset = config_sets;
3400 while(cset!=NULL) {
3401 if (rf_does_it_fit(cset, ac)) {
3402 /* looks like it matches... */
3403 ac->next = cset->ac;
3404 cset->ac = ac;
3405 break;
3406 }
3407 cset = cset->next;
3408 }
3409 if (cset==NULL) {
3410 /* didn't find a match above... new set..*/
3411 cset = (RF_ConfigSet_t *)
3412 malloc(sizeof(RF_ConfigSet_t),
3413 M_RAIDFRAME, M_NOWAIT);
3414 if (cset == NULL) {
3415 panic("rf_create_auto_sets: No memory!");
3416 }
3417 cset->ac = ac;
3418 ac->next = NULL;
3419 cset->next = config_sets;
3420 cset->rootable = 0;
3421 config_sets = cset;
3422 }
3423 }
3424 ac = ac_next;
3425 }
3426
3427
3428 return(config_sets);
3429 }
3430
3431 static int
3432 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3433 {
3434 RF_ComponentLabel_t *clabel1, *clabel2;
3435
3436 /* If this one matches the *first* one in the set, that's good
3437 enough, since the other members of the set would have been
3438 through here too... */
3439 /* note that we are not checking partitionSize here..
3440
3441 Note that we are also not checking the mod_counters here.
3442 If everything else matches except the mod_counter, that's
3443 good enough for this test. We will deal with the mod_counters
3444 a little later in the autoconfiguration process.
3445
3446 (clabel1->mod_counter == clabel2->mod_counter) &&
3447
3448 The reason we don't check for this is that failed disks
3449 will have lower modification counts. If those disks are
3450 not added to the set they used to belong to, then they will
3451 form their own set, which may result in 2 different sets,
3452 for example, competing to be configured at raid0, and
3453 perhaps competing to be the root filesystem set. If the
3454 wrong ones get configured, or both attempt to become /,
3455 weird behaviour and or serious lossage will occur. Thus we
3456 need to bring them into the fold here, and kick them out at
3457 a later point.
3458
3459 */
3460
3461 clabel1 = cset->ac->clabel;
3462 clabel2 = ac->clabel;
3463 if ((clabel1->version == clabel2->version) &&
3464 (clabel1->serial_number == clabel2->serial_number) &&
3465 (clabel1->num_rows == clabel2->num_rows) &&
3466 (clabel1->num_columns == clabel2->num_columns) &&
3467 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3468 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3469 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3470 (clabel1->parityConfig == clabel2->parityConfig) &&
3471 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3472 (clabel1->blockSize == clabel2->blockSize) &&
3473 rf_component_label_numblocks(clabel1) ==
3474 rf_component_label_numblocks(clabel2) &&
3475 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3476 (clabel1->root_partition == clabel2->root_partition) &&
3477 (clabel1->last_unit == clabel2->last_unit) &&
3478 (clabel1->config_order == clabel2->config_order)) {
3479 /* if it get's here, it almost *has* to be a match */
3480 } else {
3481 /* it's not consistent with somebody in the set..
3482 punt */
3483 return(0);
3484 }
3485 /* all was fine.. it must fit... */
3486 return(1);
3487 }
3488
3489 int
3490 rf_have_enough_components(RF_ConfigSet_t *cset)
3491 {
3492 RF_AutoConfig_t *ac;
3493 RF_AutoConfig_t *auto_config;
3494 RF_ComponentLabel_t *clabel;
3495 int c;
3496 int num_cols;
3497 int num_missing;
3498 int mod_counter;
3499 int mod_counter_found;
3500 int even_pair_failed;
3501 char parity_type;
3502
3503
3504 /* check to see that we have enough 'live' components
3505 of this set. If so, we can configure it if necessary */
3506
3507 num_cols = cset->ac->clabel->num_columns;
3508 parity_type = cset->ac->clabel->parityConfig;
3509
3510 /* XXX Check for duplicate components!?!?!? */
3511
3512 /* Determine what the mod_counter is supposed to be for this set. */
3513
3514 mod_counter_found = 0;
3515 mod_counter = 0;
3516 ac = cset->ac;
3517 while(ac!=NULL) {
3518 if (mod_counter_found==0) {
3519 mod_counter = ac->clabel->mod_counter;
3520 mod_counter_found = 1;
3521 } else {
3522 if (ac->clabel->mod_counter > mod_counter) {
3523 mod_counter = ac->clabel->mod_counter;
3524 }
3525 }
3526 ac = ac->next;
3527 }
3528
3529 num_missing = 0;
3530 auto_config = cset->ac;
3531
3532 even_pair_failed = 0;
3533 for(c=0; c<num_cols; c++) {
3534 ac = auto_config;
3535 while(ac!=NULL) {
3536 if ((ac->clabel->column == c) &&
3537 (ac->clabel->mod_counter == mod_counter)) {
3538 /* it's this one... */
3539 #ifdef DEBUG
3540 printf("Found: %s at %d\n",
3541 ac->devname,c);
3542 #endif
3543 break;
3544 }
3545 ac=ac->next;
3546 }
3547 if (ac==NULL) {
3548 /* Didn't find one here! */
3549 /* special case for RAID 1, especially
3550 where there are more than 2
3551 components (where RAIDframe treats
3552 things a little differently :( ) */
3553 if (parity_type == '1') {
3554 if (c%2 == 0) { /* even component */
3555 even_pair_failed = 1;
3556 } else { /* odd component. If
3557 we're failed, and
3558 so is the even
3559 component, it's
3560 "Good Night, Charlie" */
3561 if (even_pair_failed == 1) {
3562 return(0);
3563 }
3564 }
3565 } else {
3566 /* normal accounting */
3567 num_missing++;
3568 }
3569 }
3570 if ((parity_type == '1') && (c%2 == 1)) {
3571 /* Just did an even component, and we didn't
3572 bail.. reset the even_pair_failed flag,
3573 and go on to the next component.... */
3574 even_pair_failed = 0;
3575 }
3576 }
3577
3578 clabel = cset->ac->clabel;
3579
3580 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3581 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3582 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3583 /* XXX this needs to be made *much* more general */
3584 /* Too many failures */
3585 return(0);
3586 }
3587 /* otherwise, all is well, and we've got enough to take a kick
3588 at autoconfiguring this set */
3589 return(1);
3590 }
3591
3592 void
3593 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3594 RF_Raid_t *raidPtr)
3595 {
3596 RF_ComponentLabel_t *clabel;
3597 int i;
3598
3599 clabel = ac->clabel;
3600
3601 /* 1. Fill in the common stuff */
3602 config->numRow = clabel->num_rows = 1;
3603 config->numCol = clabel->num_columns;
3604 config->numSpare = 0; /* XXX should this be set here? */
3605 config->sectPerSU = clabel->sectPerSU;
3606 config->SUsPerPU = clabel->SUsPerPU;
3607 config->SUsPerRU = clabel->SUsPerRU;
3608 config->parityConfig = clabel->parityConfig;
3609 /* XXX... */
3610 strcpy(config->diskQueueType,"fifo");
3611 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3612 config->layoutSpecificSize = 0; /* XXX ?? */
3613
3614 while(ac!=NULL) {
3615 /* row/col values will be in range due to the checks
3616 in reasonable_label() */
3617 strcpy(config->devnames[0][ac->clabel->column],
3618 ac->devname);
3619 ac = ac->next;
3620 }
3621
3622 for(i=0;i<RF_MAXDBGV;i++) {
3623 config->debugVars[i][0] = 0;
3624 }
3625 }
3626
3627 int
3628 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3629 {
3630 RF_ComponentLabel_t *clabel;
3631 int column;
3632 int sparecol;
3633
3634 raidPtr->autoconfigure = new_value;
3635
3636 for(column=0; column<raidPtr->numCol; column++) {
3637 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3638 clabel = raidget_component_label(raidPtr, column);
3639 clabel->autoconfigure = new_value;
3640 raidflush_component_label(raidPtr, column);
3641 }
3642 }
3643 for(column = 0; column < raidPtr->numSpare ; column++) {
3644 sparecol = raidPtr->numCol + column;
3645 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3646 clabel = raidget_component_label(raidPtr, sparecol);
3647 clabel->autoconfigure = new_value;
3648 raidflush_component_label(raidPtr, sparecol);
3649 }
3650 }
3651 return(new_value);
3652 }
3653
3654 int
3655 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3656 {
3657 RF_ComponentLabel_t *clabel;
3658 int column;
3659 int sparecol;
3660
3661 raidPtr->root_partition = new_value;
3662 for(column=0; column<raidPtr->numCol; column++) {
3663 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3664 clabel = raidget_component_label(raidPtr, column);
3665 clabel->root_partition = new_value;
3666 raidflush_component_label(raidPtr, column);
3667 }
3668 }
3669 for(column = 0; column < raidPtr->numSpare ; column++) {
3670 sparecol = raidPtr->numCol + column;
3671 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3672 clabel = raidget_component_label(raidPtr, sparecol);
3673 clabel->root_partition = new_value;
3674 raidflush_component_label(raidPtr, sparecol);
3675 }
3676 }
3677 return(new_value);
3678 }
3679
3680 void
3681 rf_release_all_vps(RF_ConfigSet_t *cset)
3682 {
3683 RF_AutoConfig_t *ac;
3684
3685 ac = cset->ac;
3686 while(ac!=NULL) {
3687 /* Close the vp, and give it back */
3688 if (ac->vp) {
3689 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3690 VOP_CLOSE(ac->vp, FREAD, NOCRED);
3691 vput(ac->vp);
3692 ac->vp = NULL;
3693 }
3694 ac = ac->next;
3695 }
3696 }
3697
3698
3699 void
3700 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3701 {
3702 RF_AutoConfig_t *ac;
3703 RF_AutoConfig_t *next_ac;
3704
3705 ac = cset->ac;
3706 while(ac!=NULL) {
3707 next_ac = ac->next;
3708 /* nuke the label */
3709 free(ac->clabel, M_RAIDFRAME);
3710 /* cleanup the config structure */
3711 free(ac, M_RAIDFRAME);
3712 /* "next.." */
3713 ac = next_ac;
3714 }
3715 /* and, finally, nuke the config set */
3716 free(cset, M_RAIDFRAME);
3717 }
3718
3719
3720 void
3721 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3722 {
3723 /* current version number */
3724 clabel->version = RF_COMPONENT_LABEL_VERSION;
3725 clabel->serial_number = raidPtr->serial_number;
3726 clabel->mod_counter = raidPtr->mod_counter;
3727
3728 clabel->num_rows = 1;
3729 clabel->num_columns = raidPtr->numCol;
3730 clabel->clean = RF_RAID_DIRTY; /* not clean */
3731 clabel->status = rf_ds_optimal; /* "It's good!" */
3732
3733 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3734 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3735 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3736
3737 clabel->blockSize = raidPtr->bytesPerSector;
3738 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3739
3740 /* XXX not portable */
3741 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3742 clabel->maxOutstanding = raidPtr->maxOutstanding;
3743 clabel->autoconfigure = raidPtr->autoconfigure;
3744 clabel->root_partition = raidPtr->root_partition;
3745 clabel->last_unit = raidPtr->raidid;
3746 clabel->config_order = raidPtr->config_order;
3747
3748 #ifndef RF_NO_PARITY_MAP
3749 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3750 #endif
3751 }
3752
3753 struct raid_softc *
3754 rf_auto_config_set(RF_ConfigSet_t *cset)
3755 {
3756 RF_Raid_t *raidPtr;
3757 RF_Config_t *config;
3758 int raidID;
3759 struct raid_softc *sc;
3760
3761 #ifdef DEBUG
3762 printf("RAID autoconfigure\n");
3763 #endif
3764
3765 /* 1. Create a config structure */
3766 config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO);
3767 if (config == NULL) {
3768 printf("Out of mem!?!?\n");
3769 /* XXX do something more intelligent here. */
3770 return NULL;
3771 }
3772
3773 /*
3774 2. Figure out what RAID ID this one is supposed to live at
3775 See if we can get the same RAID dev that it was configured
3776 on last time..
3777 */
3778
3779 raidID = cset->ac->clabel->last_unit;
3780 for (sc = raidget(raidID); sc->sc_r.valid != 0; sc = raidget(++raidID))
3781 continue;
3782 #ifdef DEBUG
3783 printf("Configuring raid%d:\n",raidID);
3784 #endif
3785
3786 raidPtr = &sc->sc_r;
3787
3788 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3789 raidPtr->softc = sc;
3790 raidPtr->raidid = raidID;
3791 raidPtr->openings = RAIDOUTSTANDING;
3792
3793 /* 3. Build the configuration structure */
3794 rf_create_configuration(cset->ac, config, raidPtr);
3795
3796 /* 4. Do the configuration */
3797 if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3798 raidinit(sc);
3799
3800 rf_markalldirty(raidPtr);
3801 raidPtr->autoconfigure = 1; /* XXX do this here? */
3802 switch (cset->ac->clabel->root_partition) {
3803 case 1: /* Force Root */
3804 case 2: /* Soft Root: root when boot partition part of raid */
3805 /*
3806 * everything configured just fine. Make a note
3807 * that this set is eligible to be root,
3808 * or forced to be root
3809 */
3810 cset->rootable = cset->ac->clabel->root_partition;
3811 /* XXX do this here? */
3812 raidPtr->root_partition = cset->rootable;
3813 break;
3814 default:
3815 break;
3816 }
3817 } else {
3818 raidput(sc);
3819 sc = NULL;
3820 }
3821
3822 /* 5. Cleanup */
3823 free(config, M_RAIDFRAME);
3824 return sc;
3825 }
3826
3827 void
3828 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3829 {
3830 struct buf *bp;
3831 struct raid_softc *rs;
3832
3833 bp = (struct buf *)desc->bp;
3834 rs = desc->raidPtr->softc;
3835 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid),
3836 (bp->b_flags & B_READ));
3837 }
3838
3839 void
3840 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3841 size_t xmin, size_t xmax)
3842 {
3843 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3844 pool_sethiwat(p, xmax);
3845 pool_prime(p, xmin);
3846 pool_setlowat(p, xmin);
3847 }
3848
3849 /*
3850 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buf_queue to see
3851 * if there is IO pending and if that IO could possibly be done for a
3852 * given RAID set. Returns 0 if IO is waiting and can be done, 1
3853 * otherwise.
3854 *
3855 */
3856
3857 int
3858 rf_buf_queue_check(RF_Raid_t *raidPtr)
3859 {
3860 struct raid_softc *rs = raidPtr->softc;
3861 if ((bufq_peek(rs->buf_queue) != NULL) && raidPtr->openings > 0) {
3862 /* there is work to do */
3863 return 0;
3864 }
3865 /* default is nothing to do */
3866 return 1;
3867 }
3868
3869 int
3870 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3871 {
3872 uint64_t numsecs;
3873 unsigned secsize;
3874 int error;
3875
3876 error = getdisksize(vp, &numsecs, &secsize);
3877 if (error == 0) {
3878 diskPtr->blockSize = secsize;
3879 diskPtr->numBlocks = numsecs - rf_protectedSectors;
3880 diskPtr->partitionSize = numsecs;
3881 return 0;
3882 }
3883 return error;
3884 }
3885
3886 static int
3887 raid_match(device_t self, cfdata_t cfdata, void *aux)
3888 {
3889 return 1;
3890 }
3891
3892 static void
3893 raid_attach(device_t parent, device_t self, void *aux)
3894 {
3895
3896 }
3897
3898
3899 static int
3900 raid_detach(device_t self, int flags)
3901 {
3902 int error;
3903 struct raid_softc *rs = raidget(device_unit(self));
3904
3905 if (rs == NULL)
3906 return ENXIO;
3907
3908 if ((error = raidlock(rs)) != 0)
3909 return (error);
3910
3911 error = raid_detach_unlocked(rs);
3912
3913 raidunlock(rs);
3914
3915 /* XXXkd: raidput(rs) ??? */
3916
3917 return error;
3918 }
3919
3920 static void
3921 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
3922 {
3923 struct disk_geom *dg = &rs->sc_dkdev.dk_geom;
3924
3925 memset(dg, 0, sizeof(*dg));
3926
3927 dg->dg_secperunit = raidPtr->totalSectors;
3928 dg->dg_secsize = raidPtr->bytesPerSector;
3929 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
3930 dg->dg_ntracks = 4 * raidPtr->numCol;
3931
3932 disk_set_info(rs->sc_dev, &rs->sc_dkdev, NULL);
3933 }
3934
3935 /*
3936 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3937 * We end up returning whatever error was returned by the first cache flush
3938 * that fails.
3939 */
3940
3941 int
3942 rf_sync_component_caches(RF_Raid_t *raidPtr)
3943 {
3944 int c, sparecol;
3945 int e,error;
3946 int force = 1;
3947
3948 error = 0;
3949 for (c = 0; c < raidPtr->numCol; c++) {
3950 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3951 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3952 &force, FWRITE, NOCRED);
3953 if (e) {
3954 if (e != ENODEV)
3955 printf("raid%d: cache flush to component %s failed.\n",
3956 raidPtr->raidid, raidPtr->Disks[c].devname);
3957 if (error == 0) {
3958 error = e;
3959 }
3960 }
3961 }
3962 }
3963
3964 for( c = 0; c < raidPtr->numSpare ; c++) {
3965 sparecol = raidPtr->numCol + c;
3966 /* Need to ensure that the reconstruct actually completed! */
3967 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3968 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3969 DIOCCACHESYNC, &force, FWRITE, NOCRED);
3970 if (e) {
3971 if (e != ENODEV)
3972 printf("raid%d: cache flush to component %s failed.\n",
3973 raidPtr->raidid, raidPtr->Disks[sparecol].devname);
3974 if (error == 0) {
3975 error = e;
3976 }
3977 }
3978 }
3979 }
3980 return error;
3981 }
3982