rf_netbsdkintf.c revision 1.320 1 /* $NetBSD: rf_netbsdkintf.c,v 1.320 2014/12/31 19:52:06 christos Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: cd.c 1.6 90/11/28$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
89 * School of Computer Science
90 * Carnegie Mellon University
91 * Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97 /***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.320 2014/12/31 19:52:06 christos Exp $");
105
106 #ifdef _KERNEL_OPT
107 #include "opt_compat_netbsd.h"
108 #include "opt_raid_autoconfig.h"
109 #endif
110
111 #include <sys/param.h>
112 #include <sys/errno.h>
113 #include <sys/pool.h>
114 #include <sys/proc.h>
115 #include <sys/queue.h>
116 #include <sys/disk.h>
117 #include <sys/device.h>
118 #include <sys/stat.h>
119 #include <sys/ioctl.h>
120 #include <sys/fcntl.h>
121 #include <sys/systm.h>
122 #include <sys/vnode.h>
123 #include <sys/disklabel.h>
124 #include <sys/conf.h>
125 #include <sys/buf.h>
126 #include <sys/bufq.h>
127 #include <sys/reboot.h>
128 #include <sys/kauth.h>
129
130 #include <prop/proplib.h>
131
132 #include <dev/raidframe/raidframevar.h>
133 #include <dev/raidframe/raidframeio.h>
134 #include <dev/raidframe/rf_paritymap.h>
135
136 #include "rf_raid.h"
137 #include "rf_copyback.h"
138 #include "rf_dag.h"
139 #include "rf_dagflags.h"
140 #include "rf_desc.h"
141 #include "rf_diskqueue.h"
142 #include "rf_etimer.h"
143 #include "rf_general.h"
144 #include "rf_kintf.h"
145 #include "rf_options.h"
146 #include "rf_driver.h"
147 #include "rf_parityscan.h"
148 #include "rf_threadstuff.h"
149
150 #ifdef COMPAT_50
151 #include "rf_compat50.h"
152 #endif
153
154 #ifdef DEBUG
155 int rf_kdebug_level = 0;
156 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
157 #else /* DEBUG */
158 #define db1_printf(a) { }
159 #endif /* DEBUG */
160
161 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
162 static rf_declare_mutex2(rf_sparet_wait_mutex);
163 static rf_declare_cond2(rf_sparet_wait_cv);
164 static rf_declare_cond2(rf_sparet_resp_cv);
165
166 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
167 * spare table */
168 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
169 * installation process */
170 #endif
171
172 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
173
174 /* prototypes */
175 static void KernelWakeupFunc(struct buf *);
176 static void InitBP(struct buf *, struct vnode *, unsigned,
177 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
178 void *, int, struct proc *);
179 struct raid_softc;
180 static void raidinit(struct raid_softc *);
181
182 void raidattach(int);
183 static int raid_match(device_t, cfdata_t, void *);
184 static void raid_attach(device_t, device_t, void *);
185 static int raid_detach(device_t, int);
186
187 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
188 daddr_t, daddr_t);
189 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
190 daddr_t, daddr_t, int);
191
192 static int raidwrite_component_label(unsigned,
193 dev_t, struct vnode *, RF_ComponentLabel_t *);
194 static int raidread_component_label(unsigned,
195 dev_t, struct vnode *, RF_ComponentLabel_t *);
196
197
198 dev_type_open(raidopen);
199 dev_type_close(raidclose);
200 dev_type_read(raidread);
201 dev_type_write(raidwrite);
202 dev_type_ioctl(raidioctl);
203 dev_type_strategy(raidstrategy);
204 dev_type_dump(raiddump);
205 dev_type_size(raidsize);
206
207 const struct bdevsw raid_bdevsw = {
208 .d_open = raidopen,
209 .d_close = raidclose,
210 .d_strategy = raidstrategy,
211 .d_ioctl = raidioctl,
212 .d_dump = raiddump,
213 .d_psize = raidsize,
214 .d_discard = nodiscard,
215 .d_flag = D_DISK
216 };
217
218 const struct cdevsw raid_cdevsw = {
219 .d_open = raidopen,
220 .d_close = raidclose,
221 .d_read = raidread,
222 .d_write = raidwrite,
223 .d_ioctl = raidioctl,
224 .d_stop = nostop,
225 .d_tty = notty,
226 .d_poll = nopoll,
227 .d_mmap = nommap,
228 .d_kqfilter = nokqfilter,
229 .d_discard = nodiscard,
230 .d_flag = D_DISK
231 };
232
233 static struct dkdriver rf_dkdriver = { raidstrategy, minphys };
234
235 struct raid_softc {
236 device_t sc_dev;
237 int sc_unit;
238 int sc_flags; /* flags */
239 int sc_cflags; /* configuration flags */
240 uint64_t sc_size; /* size of the raid device */
241 char sc_xname[20]; /* XXX external name */
242 struct disk sc_dkdev; /* generic disk device info */
243 struct bufq_state *buf_queue; /* used for the device queue */
244 RF_Raid_t sc_r;
245 LIST_ENTRY(raid_softc) sc_link;
246 };
247 /* sc_flags */
248 #define RAIDF_INITED 0x01 /* unit has been initialized */
249 #define RAIDF_WLABEL 0x02 /* label area is writable */
250 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
251 #define RAIDF_SHUTDOWN 0x08 /* unit is being shutdown */
252 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
253 #define RAIDF_LOCKED 0x80 /* unit is locked */
254
255 #define raidunit(x) DISKUNIT(x)
256
257 extern struct cfdriver raid_cd;
258 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
259 raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
260 DVF_DETACH_SHUTDOWN);
261
262 /*
263 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
264 * Be aware that large numbers can allow the driver to consume a lot of
265 * kernel memory, especially on writes, and in degraded mode reads.
266 *
267 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
268 * a single 64K write will typically require 64K for the old data,
269 * 64K for the old parity, and 64K for the new parity, for a total
270 * of 192K (if the parity buffer is not re-used immediately).
271 * Even it if is used immediately, that's still 128K, which when multiplied
272 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
273 *
274 * Now in degraded mode, for example, a 64K read on the above setup may
275 * require data reconstruction, which will require *all* of the 4 remaining
276 * disks to participate -- 4 * 32K/disk == 128K again.
277 */
278
279 #ifndef RAIDOUTSTANDING
280 #define RAIDOUTSTANDING 6
281 #endif
282
283 #define RAIDLABELDEV(dev) \
284 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
285
286 /* declared here, and made public, for the benefit of KVM stuff.. */
287
288 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
289 struct disklabel *);
290 static void raidgetdisklabel(dev_t);
291 static void raidmakedisklabel(struct raid_softc *);
292
293 static int raidlock(struct raid_softc *);
294 static void raidunlock(struct raid_softc *);
295
296 static int raid_detach_unlocked(struct raid_softc *);
297
298 static void rf_markalldirty(RF_Raid_t *);
299 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
300
301 void rf_ReconThread(struct rf_recon_req *);
302 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
303 void rf_CopybackThread(RF_Raid_t *raidPtr);
304 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
305 int rf_autoconfig(device_t);
306 void rf_buildroothack(RF_ConfigSet_t *);
307
308 RF_AutoConfig_t *rf_find_raid_components(void);
309 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
310 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
311 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
312 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
313 int rf_set_autoconfig(RF_Raid_t *, int);
314 int rf_set_rootpartition(RF_Raid_t *, int);
315 void rf_release_all_vps(RF_ConfigSet_t *);
316 void rf_cleanup_config_set(RF_ConfigSet_t *);
317 int rf_have_enough_components(RF_ConfigSet_t *);
318 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
319 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
320
321 /*
322 * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
323 * Note that this is overridden by having RAID_AUTOCONFIG as an option
324 * in the kernel config file.
325 */
326 #ifdef RAID_AUTOCONFIG
327 int raidautoconfig = 1;
328 #else
329 int raidautoconfig = 0;
330 #endif
331 static bool raidautoconfigdone = false;
332
333 struct RF_Pools_s rf_pools;
334
335 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
336 static kmutex_t raid_lock;
337
338 static struct raid_softc *
339 raidcreate(int unit) {
340 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
341 if (sc == NULL) {
342 #ifdef DIAGNOSTIC
343 printf("%s: out of memory\n", __func__);
344 #endif
345 return NULL;
346 }
347 sc->sc_unit = unit;
348 bufq_alloc(&sc->buf_queue, "fcfs", BUFQ_SORT_RAWBLOCK);
349 return sc;
350 }
351
352 static void
353 raiddestroy(struct raid_softc *sc) {
354 bufq_free(sc->buf_queue);
355 kmem_free(sc, sizeof(*sc));
356 }
357
358 static struct raid_softc *
359 raidget(int unit) {
360 struct raid_softc *sc;
361 if (unit < 0) {
362 #ifdef DIAGNOSTIC
363 panic("%s: unit %d!", __func__, unit);
364 #endif
365 return NULL;
366 }
367 mutex_enter(&raid_lock);
368 LIST_FOREACH(sc, &raids, sc_link) {
369 if (sc->sc_unit == unit) {
370 mutex_exit(&raid_lock);
371 return sc;
372 }
373 }
374 mutex_exit(&raid_lock);
375 if ((sc = raidcreate(unit)) == NULL)
376 return NULL;
377 mutex_enter(&raid_lock);
378 LIST_INSERT_HEAD(&raids, sc, sc_link);
379 mutex_exit(&raid_lock);
380 return sc;
381 }
382
383 static void
384 raidput(struct raid_softc *sc) {
385 mutex_enter(&raid_lock);
386 LIST_REMOVE(sc, sc_link);
387 mutex_exit(&raid_lock);
388 raiddestroy(sc);
389 }
390
391 void
392 raidattach(int num)
393 {
394 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
395 /* This is where all the initialization stuff gets done. */
396
397 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
398 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
399 rf_init_cond2(rf_sparet_wait_cv, "sparetw");
400 rf_init_cond2(rf_sparet_resp_cv, "rfgst");
401
402 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
403 #endif
404
405 if (rf_BootRaidframe() == 0)
406 aprint_verbose("Kernelized RAIDframe activated\n");
407 else
408 panic("Serious error booting RAID!!");
409
410 if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) {
411 aprint_error("raidattach: config_cfattach_attach failed?\n");
412 }
413
414 raidautoconfigdone = false;
415
416 /*
417 * Register a finalizer which will be used to auto-config RAID
418 * sets once all real hardware devices have been found.
419 */
420 if (config_finalize_register(NULL, rf_autoconfig) != 0)
421 aprint_error("WARNING: unable to register RAIDframe finalizer\n");
422 }
423
424 int
425 rf_autoconfig(device_t self)
426 {
427 RF_AutoConfig_t *ac_list;
428 RF_ConfigSet_t *config_sets;
429
430 if (!raidautoconfig || raidautoconfigdone == true)
431 return (0);
432
433 /* XXX This code can only be run once. */
434 raidautoconfigdone = true;
435
436 #ifdef __HAVE_CPU_BOOTCONF
437 /*
438 * 0. find the boot device if needed first so we can use it later
439 * this needs to be done before we autoconfigure any raid sets,
440 * because if we use wedges we are not going to be able to open
441 * the boot device later
442 */
443 if (booted_device == NULL)
444 cpu_bootconf();
445 #endif
446 /* 1. locate all RAID components on the system */
447 aprint_debug("Searching for RAID components...\n");
448 ac_list = rf_find_raid_components();
449
450 /* 2. Sort them into their respective sets. */
451 config_sets = rf_create_auto_sets(ac_list);
452
453 /*
454 * 3. Evaluate each set and configure the valid ones.
455 * This gets done in rf_buildroothack().
456 */
457 rf_buildroothack(config_sets);
458
459 return 1;
460 }
461
462 static int
463 rf_containsboot(RF_Raid_t *r, device_t bdv) {
464 const char *bootname = device_xname(bdv);
465 size_t len = strlen(bootname);
466
467 for (int col = 0; col < r->numCol; col++) {
468 const char *devname = r->Disks[col].devname;
469 devname += sizeof("/dev/") - 1;
470 if (strncmp(devname, "dk", 2) == 0) {
471 const char *parent =
472 dkwedge_get_parent_name(r->Disks[col].dev);
473 if (parent != NULL)
474 devname = parent;
475 }
476 if (strncmp(devname, bootname, len) == 0) {
477 struct raid_softc *sc = r->softc;
478 aprint_debug("raid%d includes boot device %s\n",
479 sc->sc_unit, devname);
480 return 1;
481 }
482 }
483 return 0;
484 }
485
486 void
487 rf_buildroothack(RF_ConfigSet_t *config_sets)
488 {
489 RF_ConfigSet_t *cset;
490 RF_ConfigSet_t *next_cset;
491 int num_root;
492 struct raid_softc *sc, *rsc;
493
494 sc = rsc = NULL;
495 num_root = 0;
496 cset = config_sets;
497 while (cset != NULL) {
498 next_cset = cset->next;
499 if (rf_have_enough_components(cset) &&
500 cset->ac->clabel->autoconfigure == 1) {
501 sc = rf_auto_config_set(cset);
502 if (sc != NULL) {
503 aprint_debug("raid%d: configured ok\n",
504 sc->sc_unit);
505 if (cset->rootable) {
506 rsc = sc;
507 num_root++;
508 }
509 } else {
510 /* The autoconfig didn't work :( */
511 aprint_debug("Autoconfig failed\n");
512 rf_release_all_vps(cset);
513 }
514 } else {
515 /* we're not autoconfiguring this set...
516 release the associated resources */
517 rf_release_all_vps(cset);
518 }
519 /* cleanup */
520 rf_cleanup_config_set(cset);
521 cset = next_cset;
522 }
523
524 /* if the user has specified what the root device should be
525 then we don't touch booted_device or boothowto... */
526
527 if (rootspec != NULL)
528 return;
529
530 /* we found something bootable... */
531
532 /*
533 * XXX: The following code assumes that the root raid
534 * is the first ('a') partition. This is about the best
535 * we can do with a BSD disklabel, but we might be able
536 * to do better with a GPT label, by setting a specified
537 * attribute to indicate the root partition. We can then
538 * stash the partition number in the r->root_partition
539 * high bits (the bottom 2 bits are already used). For
540 * now we just set booted_partition to 0 when we override
541 * root.
542 */
543 if (num_root == 1) {
544 device_t candidate_root;
545 if (rsc->sc_dkdev.dk_nwedges != 0) {
546 char cname[sizeof(cset->ac->devname)];
547 /* XXX: assume 'a' */
548 snprintf(cname, sizeof(cname), "%s%c",
549 device_xname(rsc->sc_dev), 'a');
550 candidate_root = dkwedge_find_by_wname(cname);
551 } else
552 candidate_root = rsc->sc_dev;
553 if (booted_device == NULL ||
554 rsc->sc_r.root_partition == 1 ||
555 rf_containsboot(&rsc->sc_r, booted_device)) {
556 booted_device = candidate_root;
557 booted_partition = 0; /* XXX assume 'a' */
558 }
559 } else if (num_root > 1) {
560
561 /*
562 * Maybe the MD code can help. If it cannot, then
563 * setroot() will discover that we have no
564 * booted_device and will ask the user if nothing was
565 * hardwired in the kernel config file
566 */
567 if (booted_device == NULL)
568 return;
569
570 num_root = 0;
571 mutex_enter(&raid_lock);
572 LIST_FOREACH(sc, &raids, sc_link) {
573 RF_Raid_t *r = &sc->sc_r;
574 if (r->valid == 0)
575 continue;
576
577 if (r->root_partition == 0)
578 continue;
579
580 if (rf_containsboot(r, booted_device)) {
581 num_root++;
582 rsc = sc;
583 }
584 }
585 mutex_exit(&raid_lock);
586
587 if (num_root == 1) {
588 booted_device = rsc->sc_dev;
589 booted_partition = 0; /* XXX assume 'a' */
590 } else {
591 /* we can't guess.. require the user to answer... */
592 boothowto |= RB_ASKNAME;
593 }
594 }
595 }
596
597
598 int
599 raidsize(dev_t dev)
600 {
601 struct raid_softc *rs;
602 struct disklabel *lp;
603 int part, unit, omask, size;
604
605 unit = raidunit(dev);
606 if ((rs = raidget(unit)) == NULL)
607 return -1;
608 if ((rs->sc_flags & RAIDF_INITED) == 0)
609 return (-1);
610
611 part = DISKPART(dev);
612 omask = rs->sc_dkdev.dk_openmask & (1 << part);
613 lp = rs->sc_dkdev.dk_label;
614
615 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
616 return (-1);
617
618 if (lp->d_partitions[part].p_fstype != FS_SWAP)
619 size = -1;
620 else
621 size = lp->d_partitions[part].p_size *
622 (lp->d_secsize / DEV_BSIZE);
623
624 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
625 return (-1);
626
627 return (size);
628
629 }
630
631 int
632 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
633 {
634 int unit = raidunit(dev);
635 struct raid_softc *rs;
636 const struct bdevsw *bdev;
637 struct disklabel *lp;
638 RF_Raid_t *raidPtr;
639 daddr_t offset;
640 int part, c, sparecol, j, scol, dumpto;
641 int error = 0;
642
643 if ((rs = raidget(unit)) == NULL)
644 return ENXIO;
645
646 raidPtr = &rs->sc_r;
647
648 if ((rs->sc_flags & RAIDF_INITED) == 0)
649 return ENXIO;
650
651 /* we only support dumping to RAID 1 sets */
652 if (raidPtr->Layout.numDataCol != 1 ||
653 raidPtr->Layout.numParityCol != 1)
654 return EINVAL;
655
656
657 if ((error = raidlock(rs)) != 0)
658 return error;
659
660 if (size % DEV_BSIZE != 0) {
661 error = EINVAL;
662 goto out;
663 }
664
665 if (blkno + size / DEV_BSIZE > rs->sc_size) {
666 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
667 "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
668 size / DEV_BSIZE, rs->sc_size);
669 error = EINVAL;
670 goto out;
671 }
672
673 part = DISKPART(dev);
674 lp = rs->sc_dkdev.dk_label;
675 offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
676
677 /* figure out what device is alive.. */
678
679 /*
680 Look for a component to dump to. The preference for the
681 component to dump to is as follows:
682 1) the master
683 2) a used_spare of the master
684 3) the slave
685 4) a used_spare of the slave
686 */
687
688 dumpto = -1;
689 for (c = 0; c < raidPtr->numCol; c++) {
690 if (raidPtr->Disks[c].status == rf_ds_optimal) {
691 /* this might be the one */
692 dumpto = c;
693 break;
694 }
695 }
696
697 /*
698 At this point we have possibly selected a live master or a
699 live slave. We now check to see if there is a spared
700 master (or a spared slave), if we didn't find a live master
701 or a live slave.
702 */
703
704 for (c = 0; c < raidPtr->numSpare; c++) {
705 sparecol = raidPtr->numCol + c;
706 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
707 /* How about this one? */
708 scol = -1;
709 for(j=0;j<raidPtr->numCol;j++) {
710 if (raidPtr->Disks[j].spareCol == sparecol) {
711 scol = j;
712 break;
713 }
714 }
715 if (scol == 0) {
716 /*
717 We must have found a spared master!
718 We'll take that over anything else
719 found so far. (We couldn't have
720 found a real master before, since
721 this is a used spare, and it's
722 saying that it's replacing the
723 master.) On reboot (with
724 autoconfiguration turned on)
725 sparecol will become the 1st
726 component (component0) of this set.
727 */
728 dumpto = sparecol;
729 break;
730 } else if (scol != -1) {
731 /*
732 Must be a spared slave. We'll dump
733 to that if we havn't found anything
734 else so far.
735 */
736 if (dumpto == -1)
737 dumpto = sparecol;
738 }
739 }
740 }
741
742 if (dumpto == -1) {
743 /* we couldn't find any live components to dump to!?!?
744 */
745 error = EINVAL;
746 goto out;
747 }
748
749 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
750
751 /*
752 Note that blkno is relative to this particular partition.
753 By adding the offset of this partition in the RAID
754 set, and also adding RF_PROTECTED_SECTORS, we get a
755 value that is relative to the partition used for the
756 underlying component.
757 */
758
759 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
760 blkno + offset, va, size);
761
762 out:
763 raidunlock(rs);
764
765 return error;
766 }
767 /* ARGSUSED */
768 int
769 raidopen(dev_t dev, int flags, int fmt,
770 struct lwp *l)
771 {
772 int unit = raidunit(dev);
773 struct raid_softc *rs;
774 struct disklabel *lp;
775 int part, pmask;
776 int error = 0;
777
778 if ((rs = raidget(unit)) == NULL)
779 return ENXIO;
780 if ((error = raidlock(rs)) != 0)
781 return (error);
782
783 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
784 error = EBUSY;
785 goto bad;
786 }
787
788 lp = rs->sc_dkdev.dk_label;
789
790 part = DISKPART(dev);
791
792 /*
793 * If there are wedges, and this is not RAW_PART, then we
794 * need to fail.
795 */
796 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
797 error = EBUSY;
798 goto bad;
799 }
800 pmask = (1 << part);
801
802 if ((rs->sc_flags & RAIDF_INITED) &&
803 (rs->sc_dkdev.dk_nwedges == 0) &&
804 (rs->sc_dkdev.dk_openmask == 0))
805 raidgetdisklabel(dev);
806
807 /* make sure that this partition exists */
808
809 if (part != RAW_PART) {
810 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
811 ((part >= lp->d_npartitions) ||
812 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
813 error = ENXIO;
814 goto bad;
815 }
816 }
817 /* Prevent this unit from being unconfigured while open. */
818 switch (fmt) {
819 case S_IFCHR:
820 rs->sc_dkdev.dk_copenmask |= pmask;
821 break;
822
823 case S_IFBLK:
824 rs->sc_dkdev.dk_bopenmask |= pmask;
825 break;
826 }
827
828 if ((rs->sc_dkdev.dk_openmask == 0) &&
829 ((rs->sc_flags & RAIDF_INITED) != 0)) {
830 /* First one... mark things as dirty... Note that we *MUST*
831 have done a configure before this. I DO NOT WANT TO BE
832 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
833 THAT THEY BELONG TOGETHER!!!!! */
834 /* XXX should check to see if we're only open for reading
835 here... If so, we needn't do this, but then need some
836 other way of keeping track of what's happened.. */
837
838 rf_markalldirty(&rs->sc_r);
839 }
840
841
842 rs->sc_dkdev.dk_openmask =
843 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
844
845 bad:
846 raidunlock(rs);
847
848 return (error);
849
850
851 }
852 /* ARGSUSED */
853 int
854 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
855 {
856 int unit = raidunit(dev);
857 struct raid_softc *rs;
858 int error = 0;
859 int part;
860
861 if ((rs = raidget(unit)) == NULL)
862 return ENXIO;
863
864 if ((error = raidlock(rs)) != 0)
865 return (error);
866
867 part = DISKPART(dev);
868
869 /* ...that much closer to allowing unconfiguration... */
870 switch (fmt) {
871 case S_IFCHR:
872 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
873 break;
874
875 case S_IFBLK:
876 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
877 break;
878 }
879 rs->sc_dkdev.dk_openmask =
880 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
881
882 if ((rs->sc_dkdev.dk_openmask == 0) &&
883 ((rs->sc_flags & RAIDF_INITED) != 0)) {
884 /* Last one... device is not unconfigured yet.
885 Device shutdown has taken care of setting the
886 clean bits if RAIDF_INITED is not set
887 mark things as clean... */
888
889 rf_update_component_labels(&rs->sc_r,
890 RF_FINAL_COMPONENT_UPDATE);
891
892 /* If the kernel is shutting down, it will detach
893 * this RAID set soon enough.
894 */
895 }
896
897 raidunlock(rs);
898 return (0);
899
900 }
901
902 void
903 raidstrategy(struct buf *bp)
904 {
905 unsigned int unit = raidunit(bp->b_dev);
906 RF_Raid_t *raidPtr;
907 int wlabel;
908 struct raid_softc *rs;
909
910 if ((rs = raidget(unit)) == NULL) {
911 bp->b_error = ENXIO;
912 goto done;
913 }
914 if ((rs->sc_flags & RAIDF_INITED) == 0) {
915 bp->b_error = ENXIO;
916 goto done;
917 }
918 raidPtr = &rs->sc_r;
919 if (!raidPtr->valid) {
920 bp->b_error = ENODEV;
921 goto done;
922 }
923 if (bp->b_bcount == 0) {
924 db1_printf(("b_bcount is zero..\n"));
925 goto done;
926 }
927
928 /*
929 * Do bounds checking and adjust transfer. If there's an
930 * error, the bounds check will flag that for us.
931 */
932
933 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
934 if (DISKPART(bp->b_dev) == RAW_PART) {
935 uint64_t size; /* device size in DEV_BSIZE unit */
936
937 if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
938 size = raidPtr->totalSectors <<
939 (raidPtr->logBytesPerSector - DEV_BSHIFT);
940 } else {
941 size = raidPtr->totalSectors >>
942 (DEV_BSHIFT - raidPtr->logBytesPerSector);
943 }
944 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
945 goto done;
946 }
947 } else {
948 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
949 db1_printf(("Bounds check failed!!:%d %d\n",
950 (int) bp->b_blkno, (int) wlabel));
951 goto done;
952 }
953 }
954
955 rf_lock_mutex2(raidPtr->iodone_lock);
956
957 bp->b_resid = 0;
958
959 /* stuff it onto our queue */
960 bufq_put(rs->buf_queue, bp);
961
962 /* scheduled the IO to happen at the next convenient time */
963 rf_signal_cond2(raidPtr->iodone_cv);
964 rf_unlock_mutex2(raidPtr->iodone_lock);
965
966 return;
967
968 done:
969 bp->b_resid = bp->b_bcount;
970 biodone(bp);
971 }
972 /* ARGSUSED */
973 int
974 raidread(dev_t dev, struct uio *uio, int flags)
975 {
976 int unit = raidunit(dev);
977 struct raid_softc *rs;
978
979 if ((rs = raidget(unit)) == NULL)
980 return ENXIO;
981
982 if ((rs->sc_flags & RAIDF_INITED) == 0)
983 return (ENXIO);
984
985 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
986
987 }
988 /* ARGSUSED */
989 int
990 raidwrite(dev_t dev, struct uio *uio, int flags)
991 {
992 int unit = raidunit(dev);
993 struct raid_softc *rs;
994
995 if ((rs = raidget(unit)) == NULL)
996 return ENXIO;
997
998 if ((rs->sc_flags & RAIDF_INITED) == 0)
999 return (ENXIO);
1000
1001 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
1002
1003 }
1004
1005 static int
1006 raid_detach_unlocked(struct raid_softc *rs)
1007 {
1008 int error;
1009 RF_Raid_t *raidPtr;
1010
1011 raidPtr = &rs->sc_r;
1012
1013 /*
1014 * If somebody has a partition mounted, we shouldn't
1015 * shutdown.
1016 */
1017 if (rs->sc_dkdev.dk_openmask != 0)
1018 return EBUSY;
1019
1020 if ((rs->sc_flags & RAIDF_INITED) == 0)
1021 ; /* not initialized: nothing to do */
1022 else if ((error = rf_Shutdown(raidPtr)) != 0)
1023 return error;
1024 else
1025 rs->sc_flags &= ~(RAIDF_INITED|RAIDF_SHUTDOWN);
1026
1027 /* Detach the disk. */
1028 dkwedge_delall(&rs->sc_dkdev);
1029 disk_detach(&rs->sc_dkdev);
1030 disk_destroy(&rs->sc_dkdev);
1031
1032 aprint_normal_dev(rs->sc_dev, "detached\n");
1033
1034 return 0;
1035 }
1036
1037 int
1038 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1039 {
1040 int unit = raidunit(dev);
1041 int error = 0;
1042 int part, pmask, s;
1043 cfdata_t cf;
1044 struct raid_softc *rs;
1045 RF_Config_t *k_cfg, *u_cfg;
1046 RF_Raid_t *raidPtr;
1047 RF_RaidDisk_t *diskPtr;
1048 RF_AccTotals_t *totals;
1049 RF_DeviceConfig_t *d_cfg, **ucfgp;
1050 u_char *specific_buf;
1051 int retcode = 0;
1052 int column;
1053 /* int raidid; */
1054 struct rf_recon_req *rrcopy, *rr;
1055 RF_ComponentLabel_t *clabel;
1056 RF_ComponentLabel_t *ci_label;
1057 RF_ComponentLabel_t **clabel_ptr;
1058 RF_SingleComponent_t *sparePtr,*componentPtr;
1059 RF_SingleComponent_t component;
1060 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
1061 int i, j, d;
1062 #ifdef __HAVE_OLD_DISKLABEL
1063 struct disklabel newlabel;
1064 #endif
1065
1066 if ((rs = raidget(unit)) == NULL)
1067 return ENXIO;
1068 raidPtr = &rs->sc_r;
1069
1070 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1071 (int) DISKPART(dev), (int) unit, cmd));
1072
1073 /* Must be open for writes for these commands... */
1074 switch (cmd) {
1075 #ifdef DIOCGSECTORSIZE
1076 case DIOCGSECTORSIZE:
1077 *(u_int *)data = raidPtr->bytesPerSector;
1078 return 0;
1079 case DIOCGMEDIASIZE:
1080 *(off_t *)data =
1081 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
1082 return 0;
1083 #endif
1084 case DIOCSDINFO:
1085 case DIOCWDINFO:
1086 #ifdef __HAVE_OLD_DISKLABEL
1087 case ODIOCWDINFO:
1088 case ODIOCSDINFO:
1089 #endif
1090 case DIOCWLABEL:
1091 case DIOCAWEDGE:
1092 case DIOCDWEDGE:
1093 case DIOCMWEDGES:
1094 case DIOCSSTRATEGY:
1095 if ((flag & FWRITE) == 0)
1096 return (EBADF);
1097 }
1098
1099 /* Must be initialized for these... */
1100 switch (cmd) {
1101 case DIOCGDINFO:
1102 case DIOCSDINFO:
1103 case DIOCWDINFO:
1104 #ifdef __HAVE_OLD_DISKLABEL
1105 case ODIOCGDINFO:
1106 case ODIOCWDINFO:
1107 case ODIOCSDINFO:
1108 case ODIOCGDEFLABEL:
1109 #endif
1110 case DIOCGPART:
1111 case DIOCWLABEL:
1112 case DIOCGDEFLABEL:
1113 case DIOCAWEDGE:
1114 case DIOCDWEDGE:
1115 case DIOCLWEDGES:
1116 case DIOCMWEDGES:
1117 case DIOCCACHESYNC:
1118 case RAIDFRAME_SHUTDOWN:
1119 case RAIDFRAME_REWRITEPARITY:
1120 case RAIDFRAME_GET_INFO:
1121 case RAIDFRAME_RESET_ACCTOTALS:
1122 case RAIDFRAME_GET_ACCTOTALS:
1123 case RAIDFRAME_KEEP_ACCTOTALS:
1124 case RAIDFRAME_GET_SIZE:
1125 case RAIDFRAME_FAIL_DISK:
1126 case RAIDFRAME_COPYBACK:
1127 case RAIDFRAME_CHECK_RECON_STATUS:
1128 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1129 case RAIDFRAME_GET_COMPONENT_LABEL:
1130 case RAIDFRAME_SET_COMPONENT_LABEL:
1131 case RAIDFRAME_ADD_HOT_SPARE:
1132 case RAIDFRAME_REMOVE_HOT_SPARE:
1133 case RAIDFRAME_INIT_LABELS:
1134 case RAIDFRAME_REBUILD_IN_PLACE:
1135 case RAIDFRAME_CHECK_PARITY:
1136 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1137 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1138 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1139 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1140 case RAIDFRAME_SET_AUTOCONFIG:
1141 case RAIDFRAME_SET_ROOT:
1142 case RAIDFRAME_DELETE_COMPONENT:
1143 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1144 case RAIDFRAME_PARITYMAP_STATUS:
1145 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1146 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1147 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1148 case DIOCGSTRATEGY:
1149 case DIOCSSTRATEGY:
1150 if ((rs->sc_flags & RAIDF_INITED) == 0)
1151 return (ENXIO);
1152 }
1153
1154 switch (cmd) {
1155 #ifdef COMPAT_50
1156 case RAIDFRAME_GET_INFO50:
1157 return rf_get_info50(raidPtr, data);
1158
1159 case RAIDFRAME_CONFIGURE50:
1160 if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0)
1161 return retcode;
1162 goto config;
1163 #endif
1164 /* configure the system */
1165 case RAIDFRAME_CONFIGURE:
1166
1167 if (raidPtr->valid) {
1168 /* There is a valid RAID set running on this unit! */
1169 printf("raid%d: Device already configured!\n",unit);
1170 return(EINVAL);
1171 }
1172
1173 /* copy-in the configuration information */
1174 /* data points to a pointer to the configuration structure */
1175
1176 u_cfg = *((RF_Config_t **) data);
1177 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1178 if (k_cfg == NULL) {
1179 return (ENOMEM);
1180 }
1181 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1182 if (retcode) {
1183 RF_Free(k_cfg, sizeof(RF_Config_t));
1184 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1185 retcode));
1186 return (retcode);
1187 }
1188 goto config;
1189 config:
1190 /* allocate a buffer for the layout-specific data, and copy it
1191 * in */
1192 if (k_cfg->layoutSpecificSize) {
1193 if (k_cfg->layoutSpecificSize > 10000) {
1194 /* sanity check */
1195 RF_Free(k_cfg, sizeof(RF_Config_t));
1196 return (EINVAL);
1197 }
1198 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1199 (u_char *));
1200 if (specific_buf == NULL) {
1201 RF_Free(k_cfg, sizeof(RF_Config_t));
1202 return (ENOMEM);
1203 }
1204 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1205 k_cfg->layoutSpecificSize);
1206 if (retcode) {
1207 RF_Free(k_cfg, sizeof(RF_Config_t));
1208 RF_Free(specific_buf,
1209 k_cfg->layoutSpecificSize);
1210 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1211 retcode));
1212 return (retcode);
1213 }
1214 } else
1215 specific_buf = NULL;
1216 k_cfg->layoutSpecific = specific_buf;
1217
1218 /* should do some kind of sanity check on the configuration.
1219 * Store the sum of all the bytes in the last byte? */
1220
1221 /* configure the system */
1222
1223 /*
1224 * Clear the entire RAID descriptor, just to make sure
1225 * there is no stale data left in the case of a
1226 * reconfiguration
1227 */
1228 memset(raidPtr, 0, sizeof(*raidPtr));
1229 raidPtr->softc = rs;
1230 raidPtr->raidid = unit;
1231
1232 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1233
1234 if (retcode == 0) {
1235
1236 /* allow this many simultaneous IO's to
1237 this RAID device */
1238 raidPtr->openings = RAIDOUTSTANDING;
1239
1240 raidinit(rs);
1241 rf_markalldirty(raidPtr);
1242 }
1243 /* free the buffers. No return code here. */
1244 if (k_cfg->layoutSpecificSize) {
1245 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1246 }
1247 RF_Free(k_cfg, sizeof(RF_Config_t));
1248
1249 return (retcode);
1250
1251 /* shutdown the system */
1252 case RAIDFRAME_SHUTDOWN:
1253
1254 part = DISKPART(dev);
1255 pmask = (1 << part);
1256
1257 if ((error = raidlock(rs)) != 0)
1258 return (error);
1259
1260 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
1261 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1262 (rs->sc_dkdev.dk_copenmask & pmask)))
1263 retcode = EBUSY;
1264 else {
1265 rs->sc_flags |= RAIDF_SHUTDOWN;
1266 rs->sc_dkdev.dk_copenmask &= ~pmask;
1267 rs->sc_dkdev.dk_bopenmask &= ~pmask;
1268 rs->sc_dkdev.dk_openmask &= ~pmask;
1269 retcode = 0;
1270 }
1271
1272 raidunlock(rs);
1273
1274 if (retcode != 0)
1275 return retcode;
1276
1277 /* free the pseudo device attach bits */
1278
1279 cf = device_cfdata(rs->sc_dev);
1280 if ((retcode = config_detach(rs->sc_dev, DETACH_QUIET)) == 0)
1281 free(cf, M_RAIDFRAME);
1282
1283 return (retcode);
1284 case RAIDFRAME_GET_COMPONENT_LABEL:
1285 clabel_ptr = (RF_ComponentLabel_t **) data;
1286 /* need to read the component label for the disk indicated
1287 by row,column in clabel */
1288
1289 /*
1290 * Perhaps there should be an option to skip the in-core
1291 * copy and hit the disk, as with disklabel(8).
1292 */
1293 RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *));
1294
1295 retcode = copyin(*clabel_ptr, clabel, sizeof(*clabel));
1296
1297 if (retcode) {
1298 RF_Free(clabel, sizeof(*clabel));
1299 return retcode;
1300 }
1301
1302 clabel->row = 0; /* Don't allow looking at anything else.*/
1303
1304 column = clabel->column;
1305
1306 if ((column < 0) || (column >= raidPtr->numCol +
1307 raidPtr->numSpare)) {
1308 RF_Free(clabel, sizeof(*clabel));
1309 return EINVAL;
1310 }
1311
1312 RF_Free(clabel, sizeof(*clabel));
1313
1314 clabel = raidget_component_label(raidPtr, column);
1315
1316 return copyout(clabel, *clabel_ptr, sizeof(**clabel_ptr));
1317
1318 #if 0
1319 case RAIDFRAME_SET_COMPONENT_LABEL:
1320 clabel = (RF_ComponentLabel_t *) data;
1321
1322 /* XXX check the label for valid stuff... */
1323 /* Note that some things *should not* get modified --
1324 the user should be re-initing the labels instead of
1325 trying to patch things.
1326 */
1327
1328 raidid = raidPtr->raidid;
1329 #ifdef DEBUG
1330 printf("raid%d: Got component label:\n", raidid);
1331 printf("raid%d: Version: %d\n", raidid, clabel->version);
1332 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1333 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1334 printf("raid%d: Column: %d\n", raidid, clabel->column);
1335 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1336 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1337 printf("raid%d: Status: %d\n", raidid, clabel->status);
1338 #endif
1339 clabel->row = 0;
1340 column = clabel->column;
1341
1342 if ((column < 0) || (column >= raidPtr->numCol)) {
1343 return(EINVAL);
1344 }
1345
1346 /* XXX this isn't allowed to do anything for now :-) */
1347
1348 /* XXX and before it is, we need to fill in the rest
1349 of the fields!?!?!?! */
1350 memcpy(raidget_component_label(raidPtr, column),
1351 clabel, sizeof(*clabel));
1352 raidflush_component_label(raidPtr, column);
1353 return (0);
1354 #endif
1355
1356 case RAIDFRAME_INIT_LABELS:
1357 clabel = (RF_ComponentLabel_t *) data;
1358 /*
1359 we only want the serial number from
1360 the above. We get all the rest of the information
1361 from the config that was used to create this RAID
1362 set.
1363 */
1364
1365 raidPtr->serial_number = clabel->serial_number;
1366
1367 for(column=0;column<raidPtr->numCol;column++) {
1368 diskPtr = &raidPtr->Disks[column];
1369 if (!RF_DEAD_DISK(diskPtr->status)) {
1370 ci_label = raidget_component_label(raidPtr,
1371 column);
1372 /* Zeroing this is important. */
1373 memset(ci_label, 0, sizeof(*ci_label));
1374 raid_init_component_label(raidPtr, ci_label);
1375 ci_label->serial_number =
1376 raidPtr->serial_number;
1377 ci_label->row = 0; /* we dont' pretend to support more */
1378 rf_component_label_set_partitionsize(ci_label,
1379 diskPtr->partitionSize);
1380 ci_label->column = column;
1381 raidflush_component_label(raidPtr, column);
1382 }
1383 /* XXXjld what about the spares? */
1384 }
1385
1386 return (retcode);
1387 case RAIDFRAME_SET_AUTOCONFIG:
1388 d = rf_set_autoconfig(raidPtr, *(int *) data);
1389 printf("raid%d: New autoconfig value is: %d\n",
1390 raidPtr->raidid, d);
1391 *(int *) data = d;
1392 return (retcode);
1393
1394 case RAIDFRAME_SET_ROOT:
1395 d = rf_set_rootpartition(raidPtr, *(int *) data);
1396 printf("raid%d: New rootpartition value is: %d\n",
1397 raidPtr->raidid, d);
1398 *(int *) data = d;
1399 return (retcode);
1400
1401 /* initialize all parity */
1402 case RAIDFRAME_REWRITEPARITY:
1403
1404 if (raidPtr->Layout.map->faultsTolerated == 0) {
1405 /* Parity for RAID 0 is trivially correct */
1406 raidPtr->parity_good = RF_RAID_CLEAN;
1407 return(0);
1408 }
1409
1410 if (raidPtr->parity_rewrite_in_progress == 1) {
1411 /* Re-write is already in progress! */
1412 return(EINVAL);
1413 }
1414
1415 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1416 rf_RewriteParityThread,
1417 raidPtr,"raid_parity");
1418 return (retcode);
1419
1420
1421 case RAIDFRAME_ADD_HOT_SPARE:
1422 sparePtr = (RF_SingleComponent_t *) data;
1423 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1424 retcode = rf_add_hot_spare(raidPtr, &component);
1425 return(retcode);
1426
1427 case RAIDFRAME_REMOVE_HOT_SPARE:
1428 return(retcode);
1429
1430 case RAIDFRAME_DELETE_COMPONENT:
1431 componentPtr = (RF_SingleComponent_t *)data;
1432 memcpy( &component, componentPtr,
1433 sizeof(RF_SingleComponent_t));
1434 retcode = rf_delete_component(raidPtr, &component);
1435 return(retcode);
1436
1437 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1438 componentPtr = (RF_SingleComponent_t *)data;
1439 memcpy( &component, componentPtr,
1440 sizeof(RF_SingleComponent_t));
1441 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1442 return(retcode);
1443
1444 case RAIDFRAME_REBUILD_IN_PLACE:
1445
1446 if (raidPtr->Layout.map->faultsTolerated == 0) {
1447 /* Can't do this on a RAID 0!! */
1448 return(EINVAL);
1449 }
1450
1451 if (raidPtr->recon_in_progress == 1) {
1452 /* a reconstruct is already in progress! */
1453 return(EINVAL);
1454 }
1455
1456 componentPtr = (RF_SingleComponent_t *) data;
1457 memcpy( &component, componentPtr,
1458 sizeof(RF_SingleComponent_t));
1459 component.row = 0; /* we don't support any more */
1460 column = component.column;
1461
1462 if ((column < 0) || (column >= raidPtr->numCol)) {
1463 return(EINVAL);
1464 }
1465
1466 rf_lock_mutex2(raidPtr->mutex);
1467 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1468 (raidPtr->numFailures > 0)) {
1469 /* XXX 0 above shouldn't be constant!!! */
1470 /* some component other than this has failed.
1471 Let's not make things worse than they already
1472 are... */
1473 printf("raid%d: Unable to reconstruct to disk at:\n",
1474 raidPtr->raidid);
1475 printf("raid%d: Col: %d Too many failures.\n",
1476 raidPtr->raidid, column);
1477 rf_unlock_mutex2(raidPtr->mutex);
1478 return (EINVAL);
1479 }
1480 if (raidPtr->Disks[column].status ==
1481 rf_ds_reconstructing) {
1482 printf("raid%d: Unable to reconstruct to disk at:\n",
1483 raidPtr->raidid);
1484 printf("raid%d: Col: %d Reconstruction already occurring!\n", raidPtr->raidid, column);
1485
1486 rf_unlock_mutex2(raidPtr->mutex);
1487 return (EINVAL);
1488 }
1489 if (raidPtr->Disks[column].status == rf_ds_spared) {
1490 rf_unlock_mutex2(raidPtr->mutex);
1491 return (EINVAL);
1492 }
1493 rf_unlock_mutex2(raidPtr->mutex);
1494
1495 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1496 if (rrcopy == NULL)
1497 return(ENOMEM);
1498
1499 rrcopy->raidPtr = (void *) raidPtr;
1500 rrcopy->col = column;
1501
1502 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1503 rf_ReconstructInPlaceThread,
1504 rrcopy,"raid_reconip");
1505 return(retcode);
1506
1507 case RAIDFRAME_GET_INFO:
1508 if (!raidPtr->valid)
1509 return (ENODEV);
1510 ucfgp = (RF_DeviceConfig_t **) data;
1511 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1512 (RF_DeviceConfig_t *));
1513 if (d_cfg == NULL)
1514 return (ENOMEM);
1515 d_cfg->rows = 1; /* there is only 1 row now */
1516 d_cfg->cols = raidPtr->numCol;
1517 d_cfg->ndevs = raidPtr->numCol;
1518 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1519 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1520 return (ENOMEM);
1521 }
1522 d_cfg->nspares = raidPtr->numSpare;
1523 if (d_cfg->nspares >= RF_MAX_DISKS) {
1524 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1525 return (ENOMEM);
1526 }
1527 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1528 d = 0;
1529 for (j = 0; j < d_cfg->cols; j++) {
1530 d_cfg->devs[d] = raidPtr->Disks[j];
1531 d++;
1532 }
1533 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1534 d_cfg->spares[i] = raidPtr->Disks[j];
1535 if (d_cfg->spares[i].status == rf_ds_rebuilding_spare) {
1536 /* XXX: raidctl(8) expects to see this as a used spare */
1537 d_cfg->spares[i].status = rf_ds_used_spare;
1538 }
1539 }
1540 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1541 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1542
1543 return (retcode);
1544
1545 case RAIDFRAME_CHECK_PARITY:
1546 *(int *) data = raidPtr->parity_good;
1547 return (0);
1548
1549 case RAIDFRAME_PARITYMAP_STATUS:
1550 if (rf_paritymap_ineligible(raidPtr))
1551 return EINVAL;
1552 rf_paritymap_status(raidPtr->parity_map,
1553 (struct rf_pmstat *)data);
1554 return 0;
1555
1556 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1557 if (rf_paritymap_ineligible(raidPtr))
1558 return EINVAL;
1559 if (raidPtr->parity_map == NULL)
1560 return ENOENT; /* ??? */
1561 if (0 != rf_paritymap_set_params(raidPtr->parity_map,
1562 (struct rf_pmparams *)data, 1))
1563 return EINVAL;
1564 return 0;
1565
1566 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1567 if (rf_paritymap_ineligible(raidPtr))
1568 return EINVAL;
1569 *(int *) data = rf_paritymap_get_disable(raidPtr);
1570 return 0;
1571
1572 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1573 if (rf_paritymap_ineligible(raidPtr))
1574 return EINVAL;
1575 rf_paritymap_set_disable(raidPtr, *(int *)data);
1576 /* XXX should errors be passed up? */
1577 return 0;
1578
1579 case RAIDFRAME_RESET_ACCTOTALS:
1580 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1581 return (0);
1582
1583 case RAIDFRAME_GET_ACCTOTALS:
1584 totals = (RF_AccTotals_t *) data;
1585 *totals = raidPtr->acc_totals;
1586 return (0);
1587
1588 case RAIDFRAME_KEEP_ACCTOTALS:
1589 raidPtr->keep_acc_totals = *(int *)data;
1590 return (0);
1591
1592 case RAIDFRAME_GET_SIZE:
1593 *(int *) data = raidPtr->totalSectors;
1594 return (0);
1595
1596 /* fail a disk & optionally start reconstruction */
1597 case RAIDFRAME_FAIL_DISK:
1598
1599 if (raidPtr->Layout.map->faultsTolerated == 0) {
1600 /* Can't do this on a RAID 0!! */
1601 return(EINVAL);
1602 }
1603
1604 rr = (struct rf_recon_req *) data;
1605 rr->row = 0;
1606 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1607 return (EINVAL);
1608
1609
1610 rf_lock_mutex2(raidPtr->mutex);
1611 if (raidPtr->status == rf_rs_reconstructing) {
1612 /* you can't fail a disk while we're reconstructing! */
1613 /* XXX wrong for RAID6 */
1614 rf_unlock_mutex2(raidPtr->mutex);
1615 return (EINVAL);
1616 }
1617 if ((raidPtr->Disks[rr->col].status ==
1618 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1619 /* some other component has failed. Let's not make
1620 things worse. XXX wrong for RAID6 */
1621 rf_unlock_mutex2(raidPtr->mutex);
1622 return (EINVAL);
1623 }
1624 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1625 /* Can't fail a spared disk! */
1626 rf_unlock_mutex2(raidPtr->mutex);
1627 return (EINVAL);
1628 }
1629 rf_unlock_mutex2(raidPtr->mutex);
1630
1631 /* make a copy of the recon request so that we don't rely on
1632 * the user's buffer */
1633 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1634 if (rrcopy == NULL)
1635 return(ENOMEM);
1636 memcpy(rrcopy, rr, sizeof(*rr));
1637 rrcopy->raidPtr = (void *) raidPtr;
1638
1639 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1640 rf_ReconThread,
1641 rrcopy,"raid_recon");
1642 return (0);
1643
1644 /* invoke a copyback operation after recon on whatever disk
1645 * needs it, if any */
1646 case RAIDFRAME_COPYBACK:
1647
1648 if (raidPtr->Layout.map->faultsTolerated == 0) {
1649 /* This makes no sense on a RAID 0!! */
1650 return(EINVAL);
1651 }
1652
1653 if (raidPtr->copyback_in_progress == 1) {
1654 /* Copyback is already in progress! */
1655 return(EINVAL);
1656 }
1657
1658 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1659 rf_CopybackThread,
1660 raidPtr,"raid_copyback");
1661 return (retcode);
1662
1663 /* return the percentage completion of reconstruction */
1664 case RAIDFRAME_CHECK_RECON_STATUS:
1665 if (raidPtr->Layout.map->faultsTolerated == 0) {
1666 /* This makes no sense on a RAID 0, so tell the
1667 user it's done. */
1668 *(int *) data = 100;
1669 return(0);
1670 }
1671 if (raidPtr->status != rf_rs_reconstructing)
1672 *(int *) data = 100;
1673 else {
1674 if (raidPtr->reconControl->numRUsTotal > 0) {
1675 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1676 } else {
1677 *(int *) data = 0;
1678 }
1679 }
1680 return (0);
1681 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1682 progressInfoPtr = (RF_ProgressInfo_t **) data;
1683 if (raidPtr->status != rf_rs_reconstructing) {
1684 progressInfo.remaining = 0;
1685 progressInfo.completed = 100;
1686 progressInfo.total = 100;
1687 } else {
1688 progressInfo.total =
1689 raidPtr->reconControl->numRUsTotal;
1690 progressInfo.completed =
1691 raidPtr->reconControl->numRUsComplete;
1692 progressInfo.remaining = progressInfo.total -
1693 progressInfo.completed;
1694 }
1695 retcode = copyout(&progressInfo, *progressInfoPtr,
1696 sizeof(RF_ProgressInfo_t));
1697 return (retcode);
1698
1699 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1700 if (raidPtr->Layout.map->faultsTolerated == 0) {
1701 /* This makes no sense on a RAID 0, so tell the
1702 user it's done. */
1703 *(int *) data = 100;
1704 return(0);
1705 }
1706 if (raidPtr->parity_rewrite_in_progress == 1) {
1707 *(int *) data = 100 *
1708 raidPtr->parity_rewrite_stripes_done /
1709 raidPtr->Layout.numStripe;
1710 } else {
1711 *(int *) data = 100;
1712 }
1713 return (0);
1714
1715 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1716 progressInfoPtr = (RF_ProgressInfo_t **) data;
1717 if (raidPtr->parity_rewrite_in_progress == 1) {
1718 progressInfo.total = raidPtr->Layout.numStripe;
1719 progressInfo.completed =
1720 raidPtr->parity_rewrite_stripes_done;
1721 progressInfo.remaining = progressInfo.total -
1722 progressInfo.completed;
1723 } else {
1724 progressInfo.remaining = 0;
1725 progressInfo.completed = 100;
1726 progressInfo.total = 100;
1727 }
1728 retcode = copyout(&progressInfo, *progressInfoPtr,
1729 sizeof(RF_ProgressInfo_t));
1730 return (retcode);
1731
1732 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1733 if (raidPtr->Layout.map->faultsTolerated == 0) {
1734 /* This makes no sense on a RAID 0 */
1735 *(int *) data = 100;
1736 return(0);
1737 }
1738 if (raidPtr->copyback_in_progress == 1) {
1739 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1740 raidPtr->Layout.numStripe;
1741 } else {
1742 *(int *) data = 100;
1743 }
1744 return (0);
1745
1746 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1747 progressInfoPtr = (RF_ProgressInfo_t **) data;
1748 if (raidPtr->copyback_in_progress == 1) {
1749 progressInfo.total = raidPtr->Layout.numStripe;
1750 progressInfo.completed =
1751 raidPtr->copyback_stripes_done;
1752 progressInfo.remaining = progressInfo.total -
1753 progressInfo.completed;
1754 } else {
1755 progressInfo.remaining = 0;
1756 progressInfo.completed = 100;
1757 progressInfo.total = 100;
1758 }
1759 retcode = copyout(&progressInfo, *progressInfoPtr,
1760 sizeof(RF_ProgressInfo_t));
1761 return (retcode);
1762
1763 /* the sparetable daemon calls this to wait for the kernel to
1764 * need a spare table. this ioctl does not return until a
1765 * spare table is needed. XXX -- calling mpsleep here in the
1766 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1767 * -- I should either compute the spare table in the kernel,
1768 * or have a different -- XXX XXX -- interface (a different
1769 * character device) for delivering the table -- XXX */
1770 #if 0
1771 case RAIDFRAME_SPARET_WAIT:
1772 rf_lock_mutex2(rf_sparet_wait_mutex);
1773 while (!rf_sparet_wait_queue)
1774 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1775 waitreq = rf_sparet_wait_queue;
1776 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1777 rf_unlock_mutex2(rf_sparet_wait_mutex);
1778
1779 /* structure assignment */
1780 *((RF_SparetWait_t *) data) = *waitreq;
1781
1782 RF_Free(waitreq, sizeof(*waitreq));
1783 return (0);
1784
1785 /* wakes up a process waiting on SPARET_WAIT and puts an error
1786 * code in it that will cause the dameon to exit */
1787 case RAIDFRAME_ABORT_SPARET_WAIT:
1788 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1789 waitreq->fcol = -1;
1790 rf_lock_mutex2(rf_sparet_wait_mutex);
1791 waitreq->next = rf_sparet_wait_queue;
1792 rf_sparet_wait_queue = waitreq;
1793 rf_broadcast_conf2(rf_sparet_wait_cv);
1794 rf_unlock_mutex2(rf_sparet_wait_mutex);
1795 return (0);
1796
1797 /* used by the spare table daemon to deliver a spare table
1798 * into the kernel */
1799 case RAIDFRAME_SEND_SPARET:
1800
1801 /* install the spare table */
1802 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1803
1804 /* respond to the requestor. the return status of the spare
1805 * table installation is passed in the "fcol" field */
1806 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1807 waitreq->fcol = retcode;
1808 rf_lock_mutex2(rf_sparet_wait_mutex);
1809 waitreq->next = rf_sparet_resp_queue;
1810 rf_sparet_resp_queue = waitreq;
1811 rf_broadcast_cond2(rf_sparet_resp_cv);
1812 rf_unlock_mutex2(rf_sparet_wait_mutex);
1813
1814 return (retcode);
1815 #endif
1816
1817 default:
1818 break; /* fall through to the os-specific code below */
1819
1820 }
1821
1822 if (!raidPtr->valid)
1823 return (EINVAL);
1824
1825 /*
1826 * Add support for "regular" device ioctls here.
1827 */
1828
1829 error = disk_ioctl(&rs->sc_dkdev, dev, cmd, data, flag, l);
1830 if (error != EPASSTHROUGH)
1831 return (error);
1832
1833 switch (cmd) {
1834 case DIOCWDINFO:
1835 case DIOCSDINFO:
1836 #ifdef __HAVE_OLD_DISKLABEL
1837 case ODIOCWDINFO:
1838 case ODIOCSDINFO:
1839 #endif
1840 {
1841 struct disklabel *lp;
1842 #ifdef __HAVE_OLD_DISKLABEL
1843 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1844 memset(&newlabel, 0, sizeof newlabel);
1845 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1846 lp = &newlabel;
1847 } else
1848 #endif
1849 lp = (struct disklabel *)data;
1850
1851 if ((error = raidlock(rs)) != 0)
1852 return (error);
1853
1854 rs->sc_flags |= RAIDF_LABELLING;
1855
1856 error = setdisklabel(rs->sc_dkdev.dk_label,
1857 lp, 0, rs->sc_dkdev.dk_cpulabel);
1858 if (error == 0) {
1859 if (cmd == DIOCWDINFO
1860 #ifdef __HAVE_OLD_DISKLABEL
1861 || cmd == ODIOCWDINFO
1862 #endif
1863 )
1864 error = writedisklabel(RAIDLABELDEV(dev),
1865 raidstrategy, rs->sc_dkdev.dk_label,
1866 rs->sc_dkdev.dk_cpulabel);
1867 }
1868 rs->sc_flags &= ~RAIDF_LABELLING;
1869
1870 raidunlock(rs);
1871
1872 if (error)
1873 return (error);
1874 break;
1875 }
1876
1877 case DIOCWLABEL:
1878 if (*(int *) data != 0)
1879 rs->sc_flags |= RAIDF_WLABEL;
1880 else
1881 rs->sc_flags &= ~RAIDF_WLABEL;
1882 break;
1883
1884 case DIOCGDEFLABEL:
1885 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1886 break;
1887
1888 #ifdef __HAVE_OLD_DISKLABEL
1889 case ODIOCGDEFLABEL:
1890 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1891 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1892 return ENOTTY;
1893 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1894 break;
1895 #endif
1896
1897 case DIOCCACHESYNC:
1898 return rf_sync_component_caches(raidPtr);
1899
1900 case DIOCGSTRATEGY:
1901 {
1902 struct disk_strategy *dks = (void *)data;
1903
1904 s = splbio();
1905 strlcpy(dks->dks_name, bufq_getstrategyname(rs->buf_queue),
1906 sizeof(dks->dks_name));
1907 splx(s);
1908 dks->dks_paramlen = 0;
1909
1910 return 0;
1911 }
1912
1913 case DIOCSSTRATEGY:
1914 {
1915 struct disk_strategy *dks = (void *)data;
1916 struct bufq_state *new;
1917 struct bufq_state *old;
1918
1919 if (dks->dks_param != NULL) {
1920 return EINVAL;
1921 }
1922 dks->dks_name[sizeof(dks->dks_name) - 1] = 0; /* ensure term */
1923 error = bufq_alloc(&new, dks->dks_name,
1924 BUFQ_EXACT|BUFQ_SORT_RAWBLOCK);
1925 if (error) {
1926 return error;
1927 }
1928 s = splbio();
1929 old = rs->buf_queue;
1930 bufq_move(new, old);
1931 rs->buf_queue = new;
1932 splx(s);
1933 bufq_free(old);
1934
1935 return 0;
1936 }
1937
1938 default:
1939 retcode = ENOTTY;
1940 }
1941 return (retcode);
1942
1943 }
1944
1945
1946 /* raidinit -- complete the rest of the initialization for the
1947 RAIDframe device. */
1948
1949
1950 static void
1951 raidinit(struct raid_softc *rs)
1952 {
1953 cfdata_t cf;
1954 int unit;
1955 RF_Raid_t *raidPtr = &rs->sc_r;
1956
1957 unit = raidPtr->raidid;
1958
1959
1960 /* XXX should check return code first... */
1961 rs->sc_flags |= RAIDF_INITED;
1962
1963 /* XXX doesn't check bounds. */
1964 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1965
1966 /* attach the pseudo device */
1967 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1968 cf->cf_name = raid_cd.cd_name;
1969 cf->cf_atname = raid_cd.cd_name;
1970 cf->cf_unit = unit;
1971 cf->cf_fstate = FSTATE_STAR;
1972
1973 rs->sc_dev = config_attach_pseudo(cf);
1974
1975 if (rs->sc_dev == NULL) {
1976 printf("raid%d: config_attach_pseudo failed\n",
1977 raidPtr->raidid);
1978 rs->sc_flags &= ~RAIDF_INITED;
1979 free(cf, M_RAIDFRAME);
1980 return;
1981 }
1982
1983 /* disk_attach actually creates space for the CPU disklabel, among
1984 * other things, so it's critical to call this *BEFORE* we try putzing
1985 * with disklabels. */
1986
1987 disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1988 disk_attach(&rs->sc_dkdev);
1989
1990 /* XXX There may be a weird interaction here between this, and
1991 * protectedSectors, as used in RAIDframe. */
1992
1993 rs->sc_size = raidPtr->totalSectors;
1994
1995 rf_set_geometry(rs, raidPtr);
1996
1997 dkwedge_discover(&rs->sc_dkdev);
1998
1999 }
2000 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
2001 /* wake up the daemon & tell it to get us a spare table
2002 * XXX
2003 * the entries in the queues should be tagged with the raidPtr
2004 * so that in the extremely rare case that two recons happen at once,
2005 * we know for which device were requesting a spare table
2006 * XXX
2007 *
2008 * XXX This code is not currently used. GO
2009 */
2010 int
2011 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
2012 {
2013 int retcode;
2014
2015 rf_lock_mutex2(rf_sparet_wait_mutex);
2016 req->next = rf_sparet_wait_queue;
2017 rf_sparet_wait_queue = req;
2018 rf_broadcast_cond2(rf_sparet_wait_cv);
2019
2020 /* mpsleep unlocks the mutex */
2021 while (!rf_sparet_resp_queue) {
2022 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
2023 }
2024 req = rf_sparet_resp_queue;
2025 rf_sparet_resp_queue = req->next;
2026 rf_unlock_mutex2(rf_sparet_wait_mutex);
2027
2028 retcode = req->fcol;
2029 RF_Free(req, sizeof(*req)); /* this is not the same req as we
2030 * alloc'd */
2031 return (retcode);
2032 }
2033 #endif
2034
2035 /* a wrapper around rf_DoAccess that extracts appropriate info from the
2036 * bp & passes it down.
2037 * any calls originating in the kernel must use non-blocking I/O
2038 * do some extra sanity checking to return "appropriate" error values for
2039 * certain conditions (to make some standard utilities work)
2040 *
2041 * Formerly known as: rf_DoAccessKernel
2042 */
2043 void
2044 raidstart(RF_Raid_t *raidPtr)
2045 {
2046 RF_SectorCount_t num_blocks, pb, sum;
2047 RF_RaidAddr_t raid_addr;
2048 struct partition *pp;
2049 daddr_t blocknum;
2050 struct raid_softc *rs;
2051 int do_async;
2052 struct buf *bp;
2053 int rc;
2054
2055 rs = raidPtr->softc;
2056 /* quick check to see if anything has died recently */
2057 rf_lock_mutex2(raidPtr->mutex);
2058 if (raidPtr->numNewFailures > 0) {
2059 rf_unlock_mutex2(raidPtr->mutex);
2060 rf_update_component_labels(raidPtr,
2061 RF_NORMAL_COMPONENT_UPDATE);
2062 rf_lock_mutex2(raidPtr->mutex);
2063 raidPtr->numNewFailures--;
2064 }
2065
2066 /* Check to see if we're at the limit... */
2067 while (raidPtr->openings > 0) {
2068 rf_unlock_mutex2(raidPtr->mutex);
2069
2070 /* get the next item, if any, from the queue */
2071 if ((bp = bufq_get(rs->buf_queue)) == NULL) {
2072 /* nothing more to do */
2073 return;
2074 }
2075
2076 /* Ok, for the bp we have here, bp->b_blkno is relative to the
2077 * partition.. Need to make it absolute to the underlying
2078 * device.. */
2079
2080 blocknum = bp->b_blkno << DEV_BSHIFT >> raidPtr->logBytesPerSector;
2081 if (DISKPART(bp->b_dev) != RAW_PART) {
2082 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
2083 blocknum += pp->p_offset;
2084 }
2085
2086 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
2087 (int) blocknum));
2088
2089 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
2090 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
2091
2092 /* *THIS* is where we adjust what block we're going to...
2093 * but DO NOT TOUCH bp->b_blkno!!! */
2094 raid_addr = blocknum;
2095
2096 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
2097 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
2098 sum = raid_addr + num_blocks + pb;
2099 if (1 || rf_debugKernelAccess) {
2100 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
2101 (int) raid_addr, (int) sum, (int) num_blocks,
2102 (int) pb, (int) bp->b_resid));
2103 }
2104 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
2105 || (sum < num_blocks) || (sum < pb)) {
2106 bp->b_error = ENOSPC;
2107 bp->b_resid = bp->b_bcount;
2108 biodone(bp);
2109 rf_lock_mutex2(raidPtr->mutex);
2110 continue;
2111 }
2112 /*
2113 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
2114 */
2115
2116 if (bp->b_bcount & raidPtr->sectorMask) {
2117 bp->b_error = EINVAL;
2118 bp->b_resid = bp->b_bcount;
2119 biodone(bp);
2120 rf_lock_mutex2(raidPtr->mutex);
2121 continue;
2122
2123 }
2124 db1_printf(("Calling DoAccess..\n"));
2125
2126
2127 rf_lock_mutex2(raidPtr->mutex);
2128 raidPtr->openings--;
2129 rf_unlock_mutex2(raidPtr->mutex);
2130
2131 /*
2132 * Everything is async.
2133 */
2134 do_async = 1;
2135
2136 disk_busy(&rs->sc_dkdev);
2137
2138 /* XXX we're still at splbio() here... do we *really*
2139 need to be? */
2140
2141 /* don't ever condition on bp->b_flags & B_WRITE.
2142 * always condition on B_READ instead */
2143
2144 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
2145 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
2146 do_async, raid_addr, num_blocks,
2147 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
2148
2149 if (rc) {
2150 bp->b_error = rc;
2151 bp->b_resid = bp->b_bcount;
2152 biodone(bp);
2153 /* continue loop */
2154 }
2155
2156 rf_lock_mutex2(raidPtr->mutex);
2157 }
2158 rf_unlock_mutex2(raidPtr->mutex);
2159 }
2160
2161
2162
2163
2164 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
2165
2166 int
2167 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
2168 {
2169 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2170 struct buf *bp;
2171
2172 req->queue = queue;
2173 bp = req->bp;
2174
2175 switch (req->type) {
2176 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2177 /* XXX need to do something extra here.. */
2178 /* I'm leaving this in, as I've never actually seen it used,
2179 * and I'd like folks to report it... GO */
2180 printf(("WAKEUP CALLED\n"));
2181 queue->numOutstanding++;
2182
2183 bp->b_flags = 0;
2184 bp->b_private = req;
2185
2186 KernelWakeupFunc(bp);
2187 break;
2188
2189 case RF_IO_TYPE_READ:
2190 case RF_IO_TYPE_WRITE:
2191 #if RF_ACC_TRACE > 0
2192 if (req->tracerec) {
2193 RF_ETIMER_START(req->tracerec->timer);
2194 }
2195 #endif
2196 InitBP(bp, queue->rf_cinfo->ci_vp,
2197 op, queue->rf_cinfo->ci_dev,
2198 req->sectorOffset, req->numSector,
2199 req->buf, KernelWakeupFunc, (void *) req,
2200 queue->raidPtr->logBytesPerSector, req->b_proc);
2201
2202 if (rf_debugKernelAccess) {
2203 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2204 (long) bp->b_blkno));
2205 }
2206 queue->numOutstanding++;
2207 queue->last_deq_sector = req->sectorOffset;
2208 /* acc wouldn't have been let in if there were any pending
2209 * reqs at any other priority */
2210 queue->curPriority = req->priority;
2211
2212 db1_printf(("Going for %c to unit %d col %d\n",
2213 req->type, queue->raidPtr->raidid,
2214 queue->col));
2215 db1_printf(("sector %d count %d (%d bytes) %d\n",
2216 (int) req->sectorOffset, (int) req->numSector,
2217 (int) (req->numSector <<
2218 queue->raidPtr->logBytesPerSector),
2219 (int) queue->raidPtr->logBytesPerSector));
2220
2221 /*
2222 * XXX: drop lock here since this can block at
2223 * least with backing SCSI devices. Retake it
2224 * to minimize fuss with calling interfaces.
2225 */
2226
2227 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2228 bdev_strategy(bp);
2229 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2230 break;
2231
2232 default:
2233 panic("bad req->type in rf_DispatchKernelIO");
2234 }
2235 db1_printf(("Exiting from DispatchKernelIO\n"));
2236
2237 return (0);
2238 }
2239 /* this is the callback function associated with a I/O invoked from
2240 kernel code.
2241 */
2242 static void
2243 KernelWakeupFunc(struct buf *bp)
2244 {
2245 RF_DiskQueueData_t *req = NULL;
2246 RF_DiskQueue_t *queue;
2247
2248 db1_printf(("recovering the request queue:\n"));
2249
2250 req = bp->b_private;
2251
2252 queue = (RF_DiskQueue_t *) req->queue;
2253
2254 rf_lock_mutex2(queue->raidPtr->iodone_lock);
2255
2256 #if RF_ACC_TRACE > 0
2257 if (req->tracerec) {
2258 RF_ETIMER_STOP(req->tracerec->timer);
2259 RF_ETIMER_EVAL(req->tracerec->timer);
2260 rf_lock_mutex2(rf_tracing_mutex);
2261 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2262 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2263 req->tracerec->num_phys_ios++;
2264 rf_unlock_mutex2(rf_tracing_mutex);
2265 }
2266 #endif
2267
2268 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2269 * ballistic, and mark the component as hosed... */
2270
2271 if (bp->b_error != 0) {
2272 /* Mark the disk as dead */
2273 /* but only mark it once... */
2274 /* and only if it wouldn't leave this RAID set
2275 completely broken */
2276 if (((queue->raidPtr->Disks[queue->col].status ==
2277 rf_ds_optimal) ||
2278 (queue->raidPtr->Disks[queue->col].status ==
2279 rf_ds_used_spare)) &&
2280 (queue->raidPtr->numFailures <
2281 queue->raidPtr->Layout.map->faultsTolerated)) {
2282 printf("raid%d: IO Error. Marking %s as failed.\n",
2283 queue->raidPtr->raidid,
2284 queue->raidPtr->Disks[queue->col].devname);
2285 queue->raidPtr->Disks[queue->col].status =
2286 rf_ds_failed;
2287 queue->raidPtr->status = rf_rs_degraded;
2288 queue->raidPtr->numFailures++;
2289 queue->raidPtr->numNewFailures++;
2290 } else { /* Disk is already dead... */
2291 /* printf("Disk already marked as dead!\n"); */
2292 }
2293
2294 }
2295
2296 /* Fill in the error value */
2297 req->error = bp->b_error;
2298
2299 /* Drop this one on the "finished" queue... */
2300 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2301
2302 /* Let the raidio thread know there is work to be done. */
2303 rf_signal_cond2(queue->raidPtr->iodone_cv);
2304
2305 rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2306 }
2307
2308
2309 /*
2310 * initialize a buf structure for doing an I/O in the kernel.
2311 */
2312 static void
2313 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2314 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2315 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2316 struct proc *b_proc)
2317 {
2318 /* bp->b_flags = B_PHYS | rw_flag; */
2319 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */
2320 bp->b_oflags = 0;
2321 bp->b_cflags = 0;
2322 bp->b_bcount = numSect << logBytesPerSector;
2323 bp->b_bufsize = bp->b_bcount;
2324 bp->b_error = 0;
2325 bp->b_dev = dev;
2326 bp->b_data = bf;
2327 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2328 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2329 if (bp->b_bcount == 0) {
2330 panic("bp->b_bcount is zero in InitBP!!");
2331 }
2332 bp->b_proc = b_proc;
2333 bp->b_iodone = cbFunc;
2334 bp->b_private = cbArg;
2335 }
2336
2337 static void
2338 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2339 struct disklabel *lp)
2340 {
2341 memset(lp, 0, sizeof(*lp));
2342
2343 /* fabricate a label... */
2344 if (raidPtr->totalSectors > UINT32_MAX)
2345 lp->d_secperunit = UINT32_MAX;
2346 else
2347 lp->d_secperunit = raidPtr->totalSectors;
2348 lp->d_secsize = raidPtr->bytesPerSector;
2349 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2350 lp->d_ntracks = 4 * raidPtr->numCol;
2351 lp->d_ncylinders = raidPtr->totalSectors /
2352 (lp->d_nsectors * lp->d_ntracks);
2353 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2354
2355 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2356 lp->d_type = DTYPE_RAID;
2357 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2358 lp->d_rpm = 3600;
2359 lp->d_interleave = 1;
2360 lp->d_flags = 0;
2361
2362 lp->d_partitions[RAW_PART].p_offset = 0;
2363 lp->d_partitions[RAW_PART].p_size = lp->d_secperunit;
2364 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2365 lp->d_npartitions = RAW_PART + 1;
2366
2367 lp->d_magic = DISKMAGIC;
2368 lp->d_magic2 = DISKMAGIC;
2369 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2370
2371 }
2372 /*
2373 * Read the disklabel from the raid device. If one is not present, fake one
2374 * up.
2375 */
2376 static void
2377 raidgetdisklabel(dev_t dev)
2378 {
2379 int unit = raidunit(dev);
2380 struct raid_softc *rs;
2381 const char *errstring;
2382 struct disklabel *lp;
2383 struct cpu_disklabel *clp;
2384 RF_Raid_t *raidPtr;
2385
2386 if ((rs = raidget(unit)) == NULL)
2387 return;
2388
2389 lp = rs->sc_dkdev.dk_label;
2390 clp = rs->sc_dkdev.dk_cpulabel;
2391
2392 db1_printf(("Getting the disklabel...\n"));
2393
2394 memset(clp, 0, sizeof(*clp));
2395
2396 raidPtr = &rs->sc_r;
2397
2398 raidgetdefaultlabel(raidPtr, rs, lp);
2399
2400 /*
2401 * Call the generic disklabel extraction routine.
2402 */
2403 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2404 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2405 if (errstring)
2406 raidmakedisklabel(rs);
2407 else {
2408 int i;
2409 struct partition *pp;
2410
2411 /*
2412 * Sanity check whether the found disklabel is valid.
2413 *
2414 * This is necessary since total size of the raid device
2415 * may vary when an interleave is changed even though exactly
2416 * same components are used, and old disklabel may used
2417 * if that is found.
2418 */
2419 if (lp->d_secperunit < UINT32_MAX ?
2420 lp->d_secperunit != rs->sc_size :
2421 lp->d_secperunit > rs->sc_size)
2422 printf("raid%d: WARNING: %s: "
2423 "total sector size in disklabel (%ju) != "
2424 "the size of raid (%ju)\n", unit, rs->sc_xname,
2425 (uintmax_t)lp->d_secperunit,
2426 (uintmax_t)rs->sc_size);
2427 for (i = 0; i < lp->d_npartitions; i++) {
2428 pp = &lp->d_partitions[i];
2429 if (pp->p_offset + pp->p_size > rs->sc_size)
2430 printf("raid%d: WARNING: %s: end of partition `%c' "
2431 "exceeds the size of raid (%ju)\n",
2432 unit, rs->sc_xname, 'a' + i,
2433 (uintmax_t)rs->sc_size);
2434 }
2435 }
2436
2437 }
2438 /*
2439 * Take care of things one might want to take care of in the event
2440 * that a disklabel isn't present.
2441 */
2442 static void
2443 raidmakedisklabel(struct raid_softc *rs)
2444 {
2445 struct disklabel *lp = rs->sc_dkdev.dk_label;
2446 db1_printf(("Making a label..\n"));
2447
2448 /*
2449 * For historical reasons, if there's no disklabel present
2450 * the raw partition must be marked FS_BSDFFS.
2451 */
2452
2453 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2454
2455 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2456
2457 lp->d_checksum = dkcksum(lp);
2458 }
2459 /*
2460 * Wait interruptibly for an exclusive lock.
2461 *
2462 * XXX
2463 * Several drivers do this; it should be abstracted and made MP-safe.
2464 * (Hmm... where have we seen this warning before :-> GO )
2465 */
2466 static int
2467 raidlock(struct raid_softc *rs)
2468 {
2469 int error;
2470
2471 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2472 rs->sc_flags |= RAIDF_WANTED;
2473 if ((error =
2474 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2475 return (error);
2476 }
2477 rs->sc_flags |= RAIDF_LOCKED;
2478 return (0);
2479 }
2480 /*
2481 * Unlock and wake up any waiters.
2482 */
2483 static void
2484 raidunlock(struct raid_softc *rs)
2485 {
2486
2487 rs->sc_flags &= ~RAIDF_LOCKED;
2488 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2489 rs->sc_flags &= ~RAIDF_WANTED;
2490 wakeup(rs);
2491 }
2492 }
2493
2494
2495 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2496 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2497 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2498
2499 static daddr_t
2500 rf_component_info_offset(void)
2501 {
2502
2503 return RF_COMPONENT_INFO_OFFSET;
2504 }
2505
2506 static daddr_t
2507 rf_component_info_size(unsigned secsize)
2508 {
2509 daddr_t info_size;
2510
2511 KASSERT(secsize);
2512 if (secsize > RF_COMPONENT_INFO_SIZE)
2513 info_size = secsize;
2514 else
2515 info_size = RF_COMPONENT_INFO_SIZE;
2516
2517 return info_size;
2518 }
2519
2520 static daddr_t
2521 rf_parity_map_offset(RF_Raid_t *raidPtr)
2522 {
2523 daddr_t map_offset;
2524
2525 KASSERT(raidPtr->bytesPerSector);
2526 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2527 map_offset = raidPtr->bytesPerSector;
2528 else
2529 map_offset = RF_COMPONENT_INFO_SIZE;
2530 map_offset += rf_component_info_offset();
2531
2532 return map_offset;
2533 }
2534
2535 static daddr_t
2536 rf_parity_map_size(RF_Raid_t *raidPtr)
2537 {
2538 daddr_t map_size;
2539
2540 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2541 map_size = raidPtr->bytesPerSector;
2542 else
2543 map_size = RF_PARITY_MAP_SIZE;
2544
2545 return map_size;
2546 }
2547
2548 int
2549 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2550 {
2551 RF_ComponentLabel_t *clabel;
2552
2553 clabel = raidget_component_label(raidPtr, col);
2554 clabel->clean = RF_RAID_CLEAN;
2555 raidflush_component_label(raidPtr, col);
2556 return(0);
2557 }
2558
2559
2560 int
2561 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2562 {
2563 RF_ComponentLabel_t *clabel;
2564
2565 clabel = raidget_component_label(raidPtr, col);
2566 clabel->clean = RF_RAID_DIRTY;
2567 raidflush_component_label(raidPtr, col);
2568 return(0);
2569 }
2570
2571 int
2572 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2573 {
2574 KASSERT(raidPtr->bytesPerSector);
2575 return raidread_component_label(raidPtr->bytesPerSector,
2576 raidPtr->Disks[col].dev,
2577 raidPtr->raid_cinfo[col].ci_vp,
2578 &raidPtr->raid_cinfo[col].ci_label);
2579 }
2580
2581 RF_ComponentLabel_t *
2582 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2583 {
2584 return &raidPtr->raid_cinfo[col].ci_label;
2585 }
2586
2587 int
2588 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2589 {
2590 RF_ComponentLabel_t *label;
2591
2592 label = &raidPtr->raid_cinfo[col].ci_label;
2593 label->mod_counter = raidPtr->mod_counter;
2594 #ifndef RF_NO_PARITY_MAP
2595 label->parity_map_modcount = label->mod_counter;
2596 #endif
2597 return raidwrite_component_label(raidPtr->bytesPerSector,
2598 raidPtr->Disks[col].dev,
2599 raidPtr->raid_cinfo[col].ci_vp, label);
2600 }
2601
2602
2603 static int
2604 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2605 RF_ComponentLabel_t *clabel)
2606 {
2607 return raidread_component_area(dev, b_vp, clabel,
2608 sizeof(RF_ComponentLabel_t),
2609 rf_component_info_offset(),
2610 rf_component_info_size(secsize));
2611 }
2612
2613 /* ARGSUSED */
2614 static int
2615 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2616 size_t msize, daddr_t offset, daddr_t dsize)
2617 {
2618 struct buf *bp;
2619 const struct bdevsw *bdev;
2620 int error;
2621
2622 /* XXX should probably ensure that we don't try to do this if
2623 someone has changed rf_protected_sectors. */
2624
2625 if (b_vp == NULL) {
2626 /* For whatever reason, this component is not valid.
2627 Don't try to read a component label from it. */
2628 return(EINVAL);
2629 }
2630
2631 /* get a block of the appropriate size... */
2632 bp = geteblk((int)dsize);
2633 bp->b_dev = dev;
2634
2635 /* get our ducks in a row for the read */
2636 bp->b_blkno = offset / DEV_BSIZE;
2637 bp->b_bcount = dsize;
2638 bp->b_flags |= B_READ;
2639 bp->b_resid = dsize;
2640
2641 bdev = bdevsw_lookup(bp->b_dev);
2642 if (bdev == NULL)
2643 return (ENXIO);
2644 (*bdev->d_strategy)(bp);
2645
2646 error = biowait(bp);
2647
2648 if (!error) {
2649 memcpy(data, bp->b_data, msize);
2650 }
2651
2652 brelse(bp, 0);
2653 return(error);
2654 }
2655
2656
2657 static int
2658 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2659 RF_ComponentLabel_t *clabel)
2660 {
2661 return raidwrite_component_area(dev, b_vp, clabel,
2662 sizeof(RF_ComponentLabel_t),
2663 rf_component_info_offset(),
2664 rf_component_info_size(secsize), 0);
2665 }
2666
2667 /* ARGSUSED */
2668 static int
2669 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2670 size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2671 {
2672 struct buf *bp;
2673 const struct bdevsw *bdev;
2674 int error;
2675
2676 /* get a block of the appropriate size... */
2677 bp = geteblk((int)dsize);
2678 bp->b_dev = dev;
2679
2680 /* get our ducks in a row for the write */
2681 bp->b_blkno = offset / DEV_BSIZE;
2682 bp->b_bcount = dsize;
2683 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2684 bp->b_resid = dsize;
2685
2686 memset(bp->b_data, 0, dsize);
2687 memcpy(bp->b_data, data, msize);
2688
2689 bdev = bdevsw_lookup(bp->b_dev);
2690 if (bdev == NULL)
2691 return (ENXIO);
2692 (*bdev->d_strategy)(bp);
2693 if (asyncp)
2694 return 0;
2695 error = biowait(bp);
2696 brelse(bp, 0);
2697 if (error) {
2698 #if 1
2699 printf("Failed to write RAID component info!\n");
2700 #endif
2701 }
2702
2703 return(error);
2704 }
2705
2706 void
2707 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2708 {
2709 int c;
2710
2711 for (c = 0; c < raidPtr->numCol; c++) {
2712 /* Skip dead disks. */
2713 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2714 continue;
2715 /* XXXjld: what if an error occurs here? */
2716 raidwrite_component_area(raidPtr->Disks[c].dev,
2717 raidPtr->raid_cinfo[c].ci_vp, map,
2718 RF_PARITYMAP_NBYTE,
2719 rf_parity_map_offset(raidPtr),
2720 rf_parity_map_size(raidPtr), 0);
2721 }
2722 }
2723
2724 void
2725 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2726 {
2727 struct rf_paritymap_ondisk tmp;
2728 int c,first;
2729
2730 first=1;
2731 for (c = 0; c < raidPtr->numCol; c++) {
2732 /* Skip dead disks. */
2733 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2734 continue;
2735 raidread_component_area(raidPtr->Disks[c].dev,
2736 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2737 RF_PARITYMAP_NBYTE,
2738 rf_parity_map_offset(raidPtr),
2739 rf_parity_map_size(raidPtr));
2740 if (first) {
2741 memcpy(map, &tmp, sizeof(*map));
2742 first = 0;
2743 } else {
2744 rf_paritymap_merge(map, &tmp);
2745 }
2746 }
2747 }
2748
2749 void
2750 rf_markalldirty(RF_Raid_t *raidPtr)
2751 {
2752 RF_ComponentLabel_t *clabel;
2753 int sparecol;
2754 int c;
2755 int j;
2756 int scol = -1;
2757
2758 raidPtr->mod_counter++;
2759 for (c = 0; c < raidPtr->numCol; c++) {
2760 /* we don't want to touch (at all) a disk that has
2761 failed */
2762 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2763 clabel = raidget_component_label(raidPtr, c);
2764 if (clabel->status == rf_ds_spared) {
2765 /* XXX do something special...
2766 but whatever you do, don't
2767 try to access it!! */
2768 } else {
2769 raidmarkdirty(raidPtr, c);
2770 }
2771 }
2772 }
2773
2774 for( c = 0; c < raidPtr->numSpare ; c++) {
2775 sparecol = raidPtr->numCol + c;
2776 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2777 /*
2778
2779 we claim this disk is "optimal" if it's
2780 rf_ds_used_spare, as that means it should be
2781 directly substitutable for the disk it replaced.
2782 We note that too...
2783
2784 */
2785
2786 for(j=0;j<raidPtr->numCol;j++) {
2787 if (raidPtr->Disks[j].spareCol == sparecol) {
2788 scol = j;
2789 break;
2790 }
2791 }
2792
2793 clabel = raidget_component_label(raidPtr, sparecol);
2794 /* make sure status is noted */
2795
2796 raid_init_component_label(raidPtr, clabel);
2797
2798 clabel->row = 0;
2799 clabel->column = scol;
2800 /* Note: we *don't* change status from rf_ds_used_spare
2801 to rf_ds_optimal */
2802 /* clabel.status = rf_ds_optimal; */
2803
2804 raidmarkdirty(raidPtr, sparecol);
2805 }
2806 }
2807 }
2808
2809
2810 void
2811 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2812 {
2813 RF_ComponentLabel_t *clabel;
2814 int sparecol;
2815 int c;
2816 int j;
2817 int scol;
2818
2819 scol = -1;
2820
2821 /* XXX should do extra checks to make sure things really are clean,
2822 rather than blindly setting the clean bit... */
2823
2824 raidPtr->mod_counter++;
2825
2826 for (c = 0; c < raidPtr->numCol; c++) {
2827 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2828 clabel = raidget_component_label(raidPtr, c);
2829 /* make sure status is noted */
2830 clabel->status = rf_ds_optimal;
2831
2832 /* note what unit we are configured as */
2833 clabel->last_unit = raidPtr->raidid;
2834
2835 raidflush_component_label(raidPtr, c);
2836 if (final == RF_FINAL_COMPONENT_UPDATE) {
2837 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2838 raidmarkclean(raidPtr, c);
2839 }
2840 }
2841 }
2842 /* else we don't touch it.. */
2843 }
2844
2845 for( c = 0; c < raidPtr->numSpare ; c++) {
2846 sparecol = raidPtr->numCol + c;
2847 /* Need to ensure that the reconstruct actually completed! */
2848 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2849 /*
2850
2851 we claim this disk is "optimal" if it's
2852 rf_ds_used_spare, as that means it should be
2853 directly substitutable for the disk it replaced.
2854 We note that too...
2855
2856 */
2857
2858 for(j=0;j<raidPtr->numCol;j++) {
2859 if (raidPtr->Disks[j].spareCol == sparecol) {
2860 scol = j;
2861 break;
2862 }
2863 }
2864
2865 /* XXX shouldn't *really* need this... */
2866 clabel = raidget_component_label(raidPtr, sparecol);
2867 /* make sure status is noted */
2868
2869 raid_init_component_label(raidPtr, clabel);
2870
2871 clabel->column = scol;
2872 clabel->status = rf_ds_optimal;
2873 clabel->last_unit = raidPtr->raidid;
2874
2875 raidflush_component_label(raidPtr, sparecol);
2876 if (final == RF_FINAL_COMPONENT_UPDATE) {
2877 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2878 raidmarkclean(raidPtr, sparecol);
2879 }
2880 }
2881 }
2882 }
2883 }
2884
2885 void
2886 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2887 {
2888
2889 if (vp != NULL) {
2890 if (auto_configured == 1) {
2891 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2892 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2893 vput(vp);
2894
2895 } else {
2896 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2897 }
2898 }
2899 }
2900
2901
2902 void
2903 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2904 {
2905 int r,c;
2906 struct vnode *vp;
2907 int acd;
2908
2909
2910 /* We take this opportunity to close the vnodes like we should.. */
2911
2912 for (c = 0; c < raidPtr->numCol; c++) {
2913 vp = raidPtr->raid_cinfo[c].ci_vp;
2914 acd = raidPtr->Disks[c].auto_configured;
2915 rf_close_component(raidPtr, vp, acd);
2916 raidPtr->raid_cinfo[c].ci_vp = NULL;
2917 raidPtr->Disks[c].auto_configured = 0;
2918 }
2919
2920 for (r = 0; r < raidPtr->numSpare; r++) {
2921 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2922 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2923 rf_close_component(raidPtr, vp, acd);
2924 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2925 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2926 }
2927 }
2928
2929
2930 void
2931 rf_ReconThread(struct rf_recon_req *req)
2932 {
2933 int s;
2934 RF_Raid_t *raidPtr;
2935
2936 s = splbio();
2937 raidPtr = (RF_Raid_t *) req->raidPtr;
2938 raidPtr->recon_in_progress = 1;
2939
2940 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2941 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2942
2943 RF_Free(req, sizeof(*req));
2944
2945 raidPtr->recon_in_progress = 0;
2946 splx(s);
2947
2948 /* That's all... */
2949 kthread_exit(0); /* does not return */
2950 }
2951
2952 void
2953 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2954 {
2955 int retcode;
2956 int s;
2957
2958 raidPtr->parity_rewrite_stripes_done = 0;
2959 raidPtr->parity_rewrite_in_progress = 1;
2960 s = splbio();
2961 retcode = rf_RewriteParity(raidPtr);
2962 splx(s);
2963 if (retcode) {
2964 printf("raid%d: Error re-writing parity (%d)!\n",
2965 raidPtr->raidid, retcode);
2966 } else {
2967 /* set the clean bit! If we shutdown correctly,
2968 the clean bit on each component label will get
2969 set */
2970 raidPtr->parity_good = RF_RAID_CLEAN;
2971 }
2972 raidPtr->parity_rewrite_in_progress = 0;
2973
2974 /* Anyone waiting for us to stop? If so, inform them... */
2975 if (raidPtr->waitShutdown) {
2976 wakeup(&raidPtr->parity_rewrite_in_progress);
2977 }
2978
2979 /* That's all... */
2980 kthread_exit(0); /* does not return */
2981 }
2982
2983
2984 void
2985 rf_CopybackThread(RF_Raid_t *raidPtr)
2986 {
2987 int s;
2988
2989 raidPtr->copyback_in_progress = 1;
2990 s = splbio();
2991 rf_CopybackReconstructedData(raidPtr);
2992 splx(s);
2993 raidPtr->copyback_in_progress = 0;
2994
2995 /* That's all... */
2996 kthread_exit(0); /* does not return */
2997 }
2998
2999
3000 void
3001 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
3002 {
3003 int s;
3004 RF_Raid_t *raidPtr;
3005
3006 s = splbio();
3007 raidPtr = req->raidPtr;
3008 raidPtr->recon_in_progress = 1;
3009 rf_ReconstructInPlace(raidPtr, req->col);
3010 RF_Free(req, sizeof(*req));
3011 raidPtr->recon_in_progress = 0;
3012 splx(s);
3013
3014 /* That's all... */
3015 kthread_exit(0); /* does not return */
3016 }
3017
3018 static RF_AutoConfig_t *
3019 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
3020 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
3021 unsigned secsize)
3022 {
3023 int good_one = 0;
3024 RF_ComponentLabel_t *clabel;
3025 RF_AutoConfig_t *ac;
3026
3027 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
3028 if (clabel == NULL) {
3029 oomem:
3030 while(ac_list) {
3031 ac = ac_list;
3032 if (ac->clabel)
3033 free(ac->clabel, M_RAIDFRAME);
3034 ac_list = ac_list->next;
3035 free(ac, M_RAIDFRAME);
3036 }
3037 printf("RAID auto config: out of memory!\n");
3038 return NULL; /* XXX probably should panic? */
3039 }
3040
3041 if (!raidread_component_label(secsize, dev, vp, clabel)) {
3042 /* Got the label. Does it look reasonable? */
3043 if (rf_reasonable_label(clabel, numsecs) &&
3044 (rf_component_label_partitionsize(clabel) <= size)) {
3045 #ifdef DEBUG
3046 printf("Component on: %s: %llu\n",
3047 cname, (unsigned long long)size);
3048 rf_print_component_label(clabel);
3049 #endif
3050 /* if it's reasonable, add it, else ignore it. */
3051 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
3052 M_NOWAIT);
3053 if (ac == NULL) {
3054 free(clabel, M_RAIDFRAME);
3055 goto oomem;
3056 }
3057 strlcpy(ac->devname, cname, sizeof(ac->devname));
3058 ac->dev = dev;
3059 ac->vp = vp;
3060 ac->clabel = clabel;
3061 ac->next = ac_list;
3062 ac_list = ac;
3063 good_one = 1;
3064 }
3065 }
3066 if (!good_one) {
3067 /* cleanup */
3068 free(clabel, M_RAIDFRAME);
3069 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3070 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3071 vput(vp);
3072 }
3073 return ac_list;
3074 }
3075
3076 RF_AutoConfig_t *
3077 rf_find_raid_components(void)
3078 {
3079 struct vnode *vp;
3080 struct disklabel label;
3081 device_t dv;
3082 deviter_t di;
3083 dev_t dev;
3084 int bmajor, bminor, wedge, rf_part_found;
3085 int error;
3086 int i;
3087 RF_AutoConfig_t *ac_list;
3088 uint64_t numsecs;
3089 unsigned secsize;
3090
3091 /* initialize the AutoConfig list */
3092 ac_list = NULL;
3093
3094 /* we begin by trolling through *all* the devices on the system */
3095
3096 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
3097 dv = deviter_next(&di)) {
3098
3099 /* we are only interested in disks... */
3100 if (device_class(dv) != DV_DISK)
3101 continue;
3102
3103 /* we don't care about floppies... */
3104 if (device_is_a(dv, "fd")) {
3105 continue;
3106 }
3107
3108 /* we don't care about CD's... */
3109 if (device_is_a(dv, "cd")) {
3110 continue;
3111 }
3112
3113 /* we don't care about md's... */
3114 if (device_is_a(dv, "md")) {
3115 continue;
3116 }
3117
3118 /* hdfd is the Atari/Hades floppy driver */
3119 if (device_is_a(dv, "hdfd")) {
3120 continue;
3121 }
3122
3123 /* fdisa is the Atari/Milan floppy driver */
3124 if (device_is_a(dv, "fdisa")) {
3125 continue;
3126 }
3127
3128 /* need to find the device_name_to_block_device_major stuff */
3129 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
3130
3131 rf_part_found = 0; /*No raid partition as yet*/
3132
3133 /* get a vnode for the raw partition of this disk */
3134
3135 wedge = device_is_a(dv, "dk");
3136 bminor = minor(device_unit(dv));
3137 dev = wedge ? makedev(bmajor, bminor) :
3138 MAKEDISKDEV(bmajor, bminor, RAW_PART);
3139 if (bdevvp(dev, &vp))
3140 panic("RAID can't alloc vnode");
3141
3142 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
3143
3144 if (error) {
3145 /* "Who cares." Continue looking
3146 for something that exists*/
3147 vput(vp);
3148 continue;
3149 }
3150
3151 error = getdisksize(vp, &numsecs, &secsize);
3152 if (error) {
3153 vput(vp);
3154 continue;
3155 }
3156 if (wedge) {
3157 struct dkwedge_info dkw;
3158 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
3159 NOCRED);
3160 if (error) {
3161 printf("RAIDframe: can't get wedge info for "
3162 "dev %s (%d)\n", device_xname(dv), error);
3163 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3164 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3165 vput(vp);
3166 continue;
3167 }
3168
3169 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
3170 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3171 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3172 vput(vp);
3173 continue;
3174 }
3175
3176 ac_list = rf_get_component(ac_list, dev, vp,
3177 device_xname(dv), dkw.dkw_size, numsecs, secsize);
3178 rf_part_found = 1; /*There is a raid component on this disk*/
3179 continue;
3180 }
3181
3182 /* Ok, the disk exists. Go get the disklabel. */
3183 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
3184 if (error) {
3185 /*
3186 * XXX can't happen - open() would
3187 * have errored out (or faked up one)
3188 */
3189 if (error != ENOTTY)
3190 printf("RAIDframe: can't get label for dev "
3191 "%s (%d)\n", device_xname(dv), error);
3192 }
3193
3194 /* don't need this any more. We'll allocate it again
3195 a little later if we really do... */
3196 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3197 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3198 vput(vp);
3199
3200 if (error)
3201 continue;
3202
3203 rf_part_found = 0; /*No raid partitions yet*/
3204 for (i = 0; i < label.d_npartitions; i++) {
3205 char cname[sizeof(ac_list->devname)];
3206
3207 /* We only support partitions marked as RAID */
3208 if (label.d_partitions[i].p_fstype != FS_RAID)
3209 continue;
3210
3211 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
3212 if (bdevvp(dev, &vp))
3213 panic("RAID can't alloc vnode");
3214
3215 error = VOP_OPEN(vp, FREAD, NOCRED);
3216 if (error) {
3217 /* Whatever... */
3218 vput(vp);
3219 continue;
3220 }
3221 snprintf(cname, sizeof(cname), "%s%c",
3222 device_xname(dv), 'a' + i);
3223 ac_list = rf_get_component(ac_list, dev, vp, cname,
3224 label.d_partitions[i].p_size, numsecs, secsize);
3225 rf_part_found = 1; /*There is at least one raid partition on this disk*/
3226 }
3227
3228 /*
3229 *If there is no raid component on this disk, either in a
3230 *disklabel or inside a wedge, check the raw partition as well,
3231 *as it is possible to configure raid components on raw disk
3232 *devices.
3233 */
3234
3235 if (!rf_part_found) {
3236 char cname[sizeof(ac_list->devname)];
3237
3238 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
3239 if (bdevvp(dev, &vp))
3240 panic("RAID can't alloc vnode");
3241
3242 error = VOP_OPEN(vp, FREAD, NOCRED);
3243 if (error) {
3244 /* Whatever... */
3245 vput(vp);
3246 continue;
3247 }
3248 snprintf(cname, sizeof(cname), "%s%c",
3249 device_xname(dv), 'a' + RAW_PART);
3250 ac_list = rf_get_component(ac_list, dev, vp, cname,
3251 label.d_partitions[RAW_PART].p_size, numsecs, secsize);
3252 }
3253 }
3254 deviter_release(&di);
3255 return ac_list;
3256 }
3257
3258
3259 int
3260 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3261 {
3262
3263 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
3264 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
3265 ((clabel->clean == RF_RAID_CLEAN) ||
3266 (clabel->clean == RF_RAID_DIRTY)) &&
3267 clabel->row >=0 &&
3268 clabel->column >= 0 &&
3269 clabel->num_rows > 0 &&
3270 clabel->num_columns > 0 &&
3271 clabel->row < clabel->num_rows &&
3272 clabel->column < clabel->num_columns &&
3273 clabel->blockSize > 0 &&
3274 /*
3275 * numBlocksHi may contain garbage, but it is ok since
3276 * the type is unsigned. If it is really garbage,
3277 * rf_fix_old_label_size() will fix it.
3278 */
3279 rf_component_label_numblocks(clabel) > 0) {
3280 /*
3281 * label looks reasonable enough...
3282 * let's make sure it has no old garbage.
3283 */
3284 if (numsecs)
3285 rf_fix_old_label_size(clabel, numsecs);
3286 return(1);
3287 }
3288 return(0);
3289 }
3290
3291
3292 /*
3293 * For reasons yet unknown, some old component labels have garbage in
3294 * the newer numBlocksHi region, and this causes lossage. Since those
3295 * disks will also have numsecs set to less than 32 bits of sectors,
3296 * we can determine when this corruption has occurred, and fix it.
3297 *
3298 * The exact same problem, with the same unknown reason, happens to
3299 * the partitionSizeHi member as well.
3300 */
3301 static void
3302 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3303 {
3304
3305 if (numsecs < ((uint64_t)1 << 32)) {
3306 if (clabel->numBlocksHi) {
3307 printf("WARNING: total sectors < 32 bits, yet "
3308 "numBlocksHi set\n"
3309 "WARNING: resetting numBlocksHi to zero.\n");
3310 clabel->numBlocksHi = 0;
3311 }
3312
3313 if (clabel->partitionSizeHi) {
3314 printf("WARNING: total sectors < 32 bits, yet "
3315 "partitionSizeHi set\n"
3316 "WARNING: resetting partitionSizeHi to zero.\n");
3317 clabel->partitionSizeHi = 0;
3318 }
3319 }
3320 }
3321
3322
3323 #ifdef DEBUG
3324 void
3325 rf_print_component_label(RF_ComponentLabel_t *clabel)
3326 {
3327 uint64_t numBlocks;
3328 static const char *rp[] = {
3329 "No", "Force", "Soft", "*invalid*"
3330 };
3331
3332
3333 numBlocks = rf_component_label_numblocks(clabel);
3334
3335 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3336 clabel->row, clabel->column,
3337 clabel->num_rows, clabel->num_columns);
3338 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3339 clabel->version, clabel->serial_number,
3340 clabel->mod_counter);
3341 printf(" Clean: %s Status: %d\n",
3342 clabel->clean ? "Yes" : "No", clabel->status);
3343 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3344 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3345 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3346 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3347 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3348 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]);
3349 printf(" Last configured as: raid%d\n", clabel->last_unit);
3350 #if 0
3351 printf(" Config order: %d\n", clabel->config_order);
3352 #endif
3353
3354 }
3355 #endif
3356
3357 RF_ConfigSet_t *
3358 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3359 {
3360 RF_AutoConfig_t *ac;
3361 RF_ConfigSet_t *config_sets;
3362 RF_ConfigSet_t *cset;
3363 RF_AutoConfig_t *ac_next;
3364
3365
3366 config_sets = NULL;
3367
3368 /* Go through the AutoConfig list, and figure out which components
3369 belong to what sets. */
3370 ac = ac_list;
3371 while(ac!=NULL) {
3372 /* we're going to putz with ac->next, so save it here
3373 for use at the end of the loop */
3374 ac_next = ac->next;
3375
3376 if (config_sets == NULL) {
3377 /* will need at least this one... */
3378 config_sets = (RF_ConfigSet_t *)
3379 malloc(sizeof(RF_ConfigSet_t),
3380 M_RAIDFRAME, M_NOWAIT);
3381 if (config_sets == NULL) {
3382 panic("rf_create_auto_sets: No memory!");
3383 }
3384 /* this one is easy :) */
3385 config_sets->ac = ac;
3386 config_sets->next = NULL;
3387 config_sets->rootable = 0;
3388 ac->next = NULL;
3389 } else {
3390 /* which set does this component fit into? */
3391 cset = config_sets;
3392 while(cset!=NULL) {
3393 if (rf_does_it_fit(cset, ac)) {
3394 /* looks like it matches... */
3395 ac->next = cset->ac;
3396 cset->ac = ac;
3397 break;
3398 }
3399 cset = cset->next;
3400 }
3401 if (cset==NULL) {
3402 /* didn't find a match above... new set..*/
3403 cset = (RF_ConfigSet_t *)
3404 malloc(sizeof(RF_ConfigSet_t),
3405 M_RAIDFRAME, M_NOWAIT);
3406 if (cset == NULL) {
3407 panic("rf_create_auto_sets: No memory!");
3408 }
3409 cset->ac = ac;
3410 ac->next = NULL;
3411 cset->next = config_sets;
3412 cset->rootable = 0;
3413 config_sets = cset;
3414 }
3415 }
3416 ac = ac_next;
3417 }
3418
3419
3420 return(config_sets);
3421 }
3422
3423 static int
3424 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3425 {
3426 RF_ComponentLabel_t *clabel1, *clabel2;
3427
3428 /* If this one matches the *first* one in the set, that's good
3429 enough, since the other members of the set would have been
3430 through here too... */
3431 /* note that we are not checking partitionSize here..
3432
3433 Note that we are also not checking the mod_counters here.
3434 If everything else matches except the mod_counter, that's
3435 good enough for this test. We will deal with the mod_counters
3436 a little later in the autoconfiguration process.
3437
3438 (clabel1->mod_counter == clabel2->mod_counter) &&
3439
3440 The reason we don't check for this is that failed disks
3441 will have lower modification counts. If those disks are
3442 not added to the set they used to belong to, then they will
3443 form their own set, which may result in 2 different sets,
3444 for example, competing to be configured at raid0, and
3445 perhaps competing to be the root filesystem set. If the
3446 wrong ones get configured, or both attempt to become /,
3447 weird behaviour and or serious lossage will occur. Thus we
3448 need to bring them into the fold here, and kick them out at
3449 a later point.
3450
3451 */
3452
3453 clabel1 = cset->ac->clabel;
3454 clabel2 = ac->clabel;
3455 if ((clabel1->version == clabel2->version) &&
3456 (clabel1->serial_number == clabel2->serial_number) &&
3457 (clabel1->num_rows == clabel2->num_rows) &&
3458 (clabel1->num_columns == clabel2->num_columns) &&
3459 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3460 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3461 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3462 (clabel1->parityConfig == clabel2->parityConfig) &&
3463 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3464 (clabel1->blockSize == clabel2->blockSize) &&
3465 rf_component_label_numblocks(clabel1) ==
3466 rf_component_label_numblocks(clabel2) &&
3467 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3468 (clabel1->root_partition == clabel2->root_partition) &&
3469 (clabel1->last_unit == clabel2->last_unit) &&
3470 (clabel1->config_order == clabel2->config_order)) {
3471 /* if it get's here, it almost *has* to be a match */
3472 } else {
3473 /* it's not consistent with somebody in the set..
3474 punt */
3475 return(0);
3476 }
3477 /* all was fine.. it must fit... */
3478 return(1);
3479 }
3480
3481 int
3482 rf_have_enough_components(RF_ConfigSet_t *cset)
3483 {
3484 RF_AutoConfig_t *ac;
3485 RF_AutoConfig_t *auto_config;
3486 RF_ComponentLabel_t *clabel;
3487 int c;
3488 int num_cols;
3489 int num_missing;
3490 int mod_counter;
3491 int mod_counter_found;
3492 int even_pair_failed;
3493 char parity_type;
3494
3495
3496 /* check to see that we have enough 'live' components
3497 of this set. If so, we can configure it if necessary */
3498
3499 num_cols = cset->ac->clabel->num_columns;
3500 parity_type = cset->ac->clabel->parityConfig;
3501
3502 /* XXX Check for duplicate components!?!?!? */
3503
3504 /* Determine what the mod_counter is supposed to be for this set. */
3505
3506 mod_counter_found = 0;
3507 mod_counter = 0;
3508 ac = cset->ac;
3509 while(ac!=NULL) {
3510 if (mod_counter_found==0) {
3511 mod_counter = ac->clabel->mod_counter;
3512 mod_counter_found = 1;
3513 } else {
3514 if (ac->clabel->mod_counter > mod_counter) {
3515 mod_counter = ac->clabel->mod_counter;
3516 }
3517 }
3518 ac = ac->next;
3519 }
3520
3521 num_missing = 0;
3522 auto_config = cset->ac;
3523
3524 even_pair_failed = 0;
3525 for(c=0; c<num_cols; c++) {
3526 ac = auto_config;
3527 while(ac!=NULL) {
3528 if ((ac->clabel->column == c) &&
3529 (ac->clabel->mod_counter == mod_counter)) {
3530 /* it's this one... */
3531 #ifdef DEBUG
3532 printf("Found: %s at %d\n",
3533 ac->devname,c);
3534 #endif
3535 break;
3536 }
3537 ac=ac->next;
3538 }
3539 if (ac==NULL) {
3540 /* Didn't find one here! */
3541 /* special case for RAID 1, especially
3542 where there are more than 2
3543 components (where RAIDframe treats
3544 things a little differently :( ) */
3545 if (parity_type == '1') {
3546 if (c%2 == 0) { /* even component */
3547 even_pair_failed = 1;
3548 } else { /* odd component. If
3549 we're failed, and
3550 so is the even
3551 component, it's
3552 "Good Night, Charlie" */
3553 if (even_pair_failed == 1) {
3554 return(0);
3555 }
3556 }
3557 } else {
3558 /* normal accounting */
3559 num_missing++;
3560 }
3561 }
3562 if ((parity_type == '1') && (c%2 == 1)) {
3563 /* Just did an even component, and we didn't
3564 bail.. reset the even_pair_failed flag,
3565 and go on to the next component.... */
3566 even_pair_failed = 0;
3567 }
3568 }
3569
3570 clabel = cset->ac->clabel;
3571
3572 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3573 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3574 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3575 /* XXX this needs to be made *much* more general */
3576 /* Too many failures */
3577 return(0);
3578 }
3579 /* otherwise, all is well, and we've got enough to take a kick
3580 at autoconfiguring this set */
3581 return(1);
3582 }
3583
3584 void
3585 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3586 RF_Raid_t *raidPtr)
3587 {
3588 RF_ComponentLabel_t *clabel;
3589 int i;
3590
3591 clabel = ac->clabel;
3592
3593 /* 1. Fill in the common stuff */
3594 config->numRow = clabel->num_rows = 1;
3595 config->numCol = clabel->num_columns;
3596 config->numSpare = 0; /* XXX should this be set here? */
3597 config->sectPerSU = clabel->sectPerSU;
3598 config->SUsPerPU = clabel->SUsPerPU;
3599 config->SUsPerRU = clabel->SUsPerRU;
3600 config->parityConfig = clabel->parityConfig;
3601 /* XXX... */
3602 strcpy(config->diskQueueType,"fifo");
3603 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3604 config->layoutSpecificSize = 0; /* XXX ?? */
3605
3606 while(ac!=NULL) {
3607 /* row/col values will be in range due to the checks
3608 in reasonable_label() */
3609 strcpy(config->devnames[0][ac->clabel->column],
3610 ac->devname);
3611 ac = ac->next;
3612 }
3613
3614 for(i=0;i<RF_MAXDBGV;i++) {
3615 config->debugVars[i][0] = 0;
3616 }
3617 }
3618
3619 int
3620 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3621 {
3622 RF_ComponentLabel_t *clabel;
3623 int column;
3624 int sparecol;
3625
3626 raidPtr->autoconfigure = new_value;
3627
3628 for(column=0; column<raidPtr->numCol; column++) {
3629 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3630 clabel = raidget_component_label(raidPtr, column);
3631 clabel->autoconfigure = new_value;
3632 raidflush_component_label(raidPtr, column);
3633 }
3634 }
3635 for(column = 0; column < raidPtr->numSpare ; column++) {
3636 sparecol = raidPtr->numCol + column;
3637 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3638 clabel = raidget_component_label(raidPtr, sparecol);
3639 clabel->autoconfigure = new_value;
3640 raidflush_component_label(raidPtr, sparecol);
3641 }
3642 }
3643 return(new_value);
3644 }
3645
3646 int
3647 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3648 {
3649 RF_ComponentLabel_t *clabel;
3650 int column;
3651 int sparecol;
3652
3653 raidPtr->root_partition = new_value;
3654 for(column=0; column<raidPtr->numCol; column++) {
3655 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3656 clabel = raidget_component_label(raidPtr, column);
3657 clabel->root_partition = new_value;
3658 raidflush_component_label(raidPtr, column);
3659 }
3660 }
3661 for(column = 0; column < raidPtr->numSpare ; column++) {
3662 sparecol = raidPtr->numCol + column;
3663 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3664 clabel = raidget_component_label(raidPtr, sparecol);
3665 clabel->root_partition = new_value;
3666 raidflush_component_label(raidPtr, sparecol);
3667 }
3668 }
3669 return(new_value);
3670 }
3671
3672 void
3673 rf_release_all_vps(RF_ConfigSet_t *cset)
3674 {
3675 RF_AutoConfig_t *ac;
3676
3677 ac = cset->ac;
3678 while(ac!=NULL) {
3679 /* Close the vp, and give it back */
3680 if (ac->vp) {
3681 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3682 VOP_CLOSE(ac->vp, FREAD, NOCRED);
3683 vput(ac->vp);
3684 ac->vp = NULL;
3685 }
3686 ac = ac->next;
3687 }
3688 }
3689
3690
3691 void
3692 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3693 {
3694 RF_AutoConfig_t *ac;
3695 RF_AutoConfig_t *next_ac;
3696
3697 ac = cset->ac;
3698 while(ac!=NULL) {
3699 next_ac = ac->next;
3700 /* nuke the label */
3701 free(ac->clabel, M_RAIDFRAME);
3702 /* cleanup the config structure */
3703 free(ac, M_RAIDFRAME);
3704 /* "next.." */
3705 ac = next_ac;
3706 }
3707 /* and, finally, nuke the config set */
3708 free(cset, M_RAIDFRAME);
3709 }
3710
3711
3712 void
3713 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3714 {
3715 /* current version number */
3716 clabel->version = RF_COMPONENT_LABEL_VERSION;
3717 clabel->serial_number = raidPtr->serial_number;
3718 clabel->mod_counter = raidPtr->mod_counter;
3719
3720 clabel->num_rows = 1;
3721 clabel->num_columns = raidPtr->numCol;
3722 clabel->clean = RF_RAID_DIRTY; /* not clean */
3723 clabel->status = rf_ds_optimal; /* "It's good!" */
3724
3725 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3726 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3727 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3728
3729 clabel->blockSize = raidPtr->bytesPerSector;
3730 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3731
3732 /* XXX not portable */
3733 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3734 clabel->maxOutstanding = raidPtr->maxOutstanding;
3735 clabel->autoconfigure = raidPtr->autoconfigure;
3736 clabel->root_partition = raidPtr->root_partition;
3737 clabel->last_unit = raidPtr->raidid;
3738 clabel->config_order = raidPtr->config_order;
3739
3740 #ifndef RF_NO_PARITY_MAP
3741 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3742 #endif
3743 }
3744
3745 struct raid_softc *
3746 rf_auto_config_set(RF_ConfigSet_t *cset)
3747 {
3748 RF_Raid_t *raidPtr;
3749 RF_Config_t *config;
3750 int raidID;
3751 struct raid_softc *sc;
3752
3753 #ifdef DEBUG
3754 printf("RAID autoconfigure\n");
3755 #endif
3756
3757 /* 1. Create a config structure */
3758 config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO);
3759 if (config == NULL) {
3760 printf("Out of mem!?!?\n");
3761 /* XXX do something more intelligent here. */
3762 return NULL;
3763 }
3764
3765 /*
3766 2. Figure out what RAID ID this one is supposed to live at
3767 See if we can get the same RAID dev that it was configured
3768 on last time..
3769 */
3770
3771 raidID = cset->ac->clabel->last_unit;
3772 for (sc = raidget(raidID); sc->sc_r.valid != 0; sc = raidget(++raidID))
3773 continue;
3774 #ifdef DEBUG
3775 printf("Configuring raid%d:\n",raidID);
3776 #endif
3777
3778 raidPtr = &sc->sc_r;
3779
3780 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3781 raidPtr->softc = sc;
3782 raidPtr->raidid = raidID;
3783 raidPtr->openings = RAIDOUTSTANDING;
3784
3785 /* 3. Build the configuration structure */
3786 rf_create_configuration(cset->ac, config, raidPtr);
3787
3788 /* 4. Do the configuration */
3789 if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3790 raidinit(sc);
3791
3792 rf_markalldirty(raidPtr);
3793 raidPtr->autoconfigure = 1; /* XXX do this here? */
3794 switch (cset->ac->clabel->root_partition) {
3795 case 1: /* Force Root */
3796 case 2: /* Soft Root: root when boot partition part of raid */
3797 /*
3798 * everything configured just fine. Make a note
3799 * that this set is eligible to be root,
3800 * or forced to be root
3801 */
3802 cset->rootable = cset->ac->clabel->root_partition;
3803 /* XXX do this here? */
3804 raidPtr->root_partition = cset->rootable;
3805 break;
3806 default:
3807 break;
3808 }
3809 } else {
3810 raidput(sc);
3811 sc = NULL;
3812 }
3813
3814 /* 5. Cleanup */
3815 free(config, M_RAIDFRAME);
3816 return sc;
3817 }
3818
3819 void
3820 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3821 {
3822 struct buf *bp;
3823 struct raid_softc *rs;
3824
3825 bp = (struct buf *)desc->bp;
3826 rs = desc->raidPtr->softc;
3827 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid),
3828 (bp->b_flags & B_READ));
3829 }
3830
3831 void
3832 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3833 size_t xmin, size_t xmax)
3834 {
3835 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3836 pool_sethiwat(p, xmax);
3837 pool_prime(p, xmin);
3838 pool_setlowat(p, xmin);
3839 }
3840
3841 /*
3842 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buf_queue to see
3843 * if there is IO pending and if that IO could possibly be done for a
3844 * given RAID set. Returns 0 if IO is waiting and can be done, 1
3845 * otherwise.
3846 *
3847 */
3848
3849 int
3850 rf_buf_queue_check(RF_Raid_t *raidPtr)
3851 {
3852 struct raid_softc *rs = raidPtr->softc;
3853 if ((bufq_peek(rs->buf_queue) != NULL) && raidPtr->openings > 0) {
3854 /* there is work to do */
3855 return 0;
3856 }
3857 /* default is nothing to do */
3858 return 1;
3859 }
3860
3861 int
3862 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3863 {
3864 uint64_t numsecs;
3865 unsigned secsize;
3866 int error;
3867
3868 error = getdisksize(vp, &numsecs, &secsize);
3869 if (error == 0) {
3870 diskPtr->blockSize = secsize;
3871 diskPtr->numBlocks = numsecs - rf_protectedSectors;
3872 diskPtr->partitionSize = numsecs;
3873 return 0;
3874 }
3875 return error;
3876 }
3877
3878 static int
3879 raid_match(device_t self, cfdata_t cfdata, void *aux)
3880 {
3881 return 1;
3882 }
3883
3884 static void
3885 raid_attach(device_t parent, device_t self, void *aux)
3886 {
3887
3888 }
3889
3890
3891 static int
3892 raid_detach(device_t self, int flags)
3893 {
3894 int error;
3895 struct raid_softc *rs = raidget(device_unit(self));
3896
3897 if (rs == NULL)
3898 return ENXIO;
3899
3900 if ((error = raidlock(rs)) != 0)
3901 return (error);
3902
3903 error = raid_detach_unlocked(rs);
3904
3905 raidunlock(rs);
3906
3907 /* XXXkd: raidput(rs) ??? */
3908
3909 return error;
3910 }
3911
3912 static void
3913 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
3914 {
3915 struct disk_geom *dg = &rs->sc_dkdev.dk_geom;
3916
3917 memset(dg, 0, sizeof(*dg));
3918
3919 dg->dg_secperunit = raidPtr->totalSectors;
3920 dg->dg_secsize = raidPtr->bytesPerSector;
3921 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
3922 dg->dg_ntracks = 4 * raidPtr->numCol;
3923
3924 disk_set_info(rs->sc_dev, &rs->sc_dkdev, NULL);
3925 }
3926
3927 /*
3928 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3929 * We end up returning whatever error was returned by the first cache flush
3930 * that fails.
3931 */
3932
3933 int
3934 rf_sync_component_caches(RF_Raid_t *raidPtr)
3935 {
3936 int c, sparecol;
3937 int e,error;
3938 int force = 1;
3939
3940 error = 0;
3941 for (c = 0; c < raidPtr->numCol; c++) {
3942 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3943 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3944 &force, FWRITE, NOCRED);
3945 if (e) {
3946 if (e != ENODEV)
3947 printf("raid%d: cache flush to component %s failed.\n",
3948 raidPtr->raidid, raidPtr->Disks[c].devname);
3949 if (error == 0) {
3950 error = e;
3951 }
3952 }
3953 }
3954 }
3955
3956 for( c = 0; c < raidPtr->numSpare ; c++) {
3957 sparecol = raidPtr->numCol + c;
3958 /* Need to ensure that the reconstruct actually completed! */
3959 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3960 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3961 DIOCCACHESYNC, &force, FWRITE, NOCRED);
3962 if (e) {
3963 if (e != ENODEV)
3964 printf("raid%d: cache flush to component %s failed.\n",
3965 raidPtr->raidid, raidPtr->Disks[sparecol].devname);
3966 if (error == 0) {
3967 error = e;
3968 }
3969 }
3970 }
3971 }
3972 return error;
3973 }
3974