rf_netbsdkintf.c revision 1.323 1 /* $NetBSD: rf_netbsdkintf.c,v 1.323 2015/04/26 15:15:20 mlelstv Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: cd.c 1.6 90/11/28$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
89 * School of Computer Science
90 * Carnegie Mellon University
91 * Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97 /***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.323 2015/04/26 15:15:20 mlelstv Exp $");
105
106 #ifdef _KERNEL_OPT
107 #include "opt_compat_netbsd.h"
108 #include "opt_raid_autoconfig.h"
109 #endif
110
111 #include <sys/param.h>
112 #include <sys/errno.h>
113 #include <sys/pool.h>
114 #include <sys/proc.h>
115 #include <sys/queue.h>
116 #include <sys/disk.h>
117 #include <sys/device.h>
118 #include <sys/stat.h>
119 #include <sys/ioctl.h>
120 #include <sys/fcntl.h>
121 #include <sys/systm.h>
122 #include <sys/vnode.h>
123 #include <sys/disklabel.h>
124 #include <sys/conf.h>
125 #include <sys/buf.h>
126 #include <sys/bufq.h>
127 #include <sys/reboot.h>
128 #include <sys/kauth.h>
129
130 #include <prop/proplib.h>
131
132 #include <dev/raidframe/raidframevar.h>
133 #include <dev/raidframe/raidframeio.h>
134 #include <dev/raidframe/rf_paritymap.h>
135
136 #include "rf_raid.h"
137 #include "rf_copyback.h"
138 #include "rf_dag.h"
139 #include "rf_dagflags.h"
140 #include "rf_desc.h"
141 #include "rf_diskqueue.h"
142 #include "rf_etimer.h"
143 #include "rf_general.h"
144 #include "rf_kintf.h"
145 #include "rf_options.h"
146 #include "rf_driver.h"
147 #include "rf_parityscan.h"
148 #include "rf_threadstuff.h"
149
150 #ifdef COMPAT_50
151 #include "rf_compat50.h"
152 #endif
153
154 #ifdef DEBUG
155 int rf_kdebug_level = 0;
156 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
157 #else /* DEBUG */
158 #define db1_printf(a) { }
159 #endif /* DEBUG */
160
161 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
162 static rf_declare_mutex2(rf_sparet_wait_mutex);
163 static rf_declare_cond2(rf_sparet_wait_cv);
164 static rf_declare_cond2(rf_sparet_resp_cv);
165
166 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
167 * spare table */
168 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
169 * installation process */
170 #endif
171
172 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
173
174 /* prototypes */
175 static void KernelWakeupFunc(struct buf *);
176 static void InitBP(struct buf *, struct vnode *, unsigned,
177 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
178 void *, int, struct proc *);
179 struct raid_softc;
180 static void raidinit(struct raid_softc *);
181
182 void raidattach(int);
183 static int raid_match(device_t, cfdata_t, void *);
184 static void raid_attach(device_t, device_t, void *);
185 static int raid_detach(device_t, int);
186
187 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
188 daddr_t, daddr_t);
189 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
190 daddr_t, daddr_t, int);
191
192 static int raidwrite_component_label(unsigned,
193 dev_t, struct vnode *, RF_ComponentLabel_t *);
194 static int raidread_component_label(unsigned,
195 dev_t, struct vnode *, RF_ComponentLabel_t *);
196
197
198 dev_type_open(raidopen);
199 dev_type_close(raidclose);
200 dev_type_read(raidread);
201 dev_type_write(raidwrite);
202 dev_type_ioctl(raidioctl);
203 dev_type_strategy(raidstrategy);
204 dev_type_dump(raiddump);
205 dev_type_size(raidsize);
206
207 const struct bdevsw raid_bdevsw = {
208 .d_open = raidopen,
209 .d_close = raidclose,
210 .d_strategy = raidstrategy,
211 .d_ioctl = raidioctl,
212 .d_dump = raiddump,
213 .d_psize = raidsize,
214 .d_discard = nodiscard,
215 .d_flag = D_DISK
216 };
217
218 const struct cdevsw raid_cdevsw = {
219 .d_open = raidopen,
220 .d_close = raidclose,
221 .d_read = raidread,
222 .d_write = raidwrite,
223 .d_ioctl = raidioctl,
224 .d_stop = nostop,
225 .d_tty = notty,
226 .d_poll = nopoll,
227 .d_mmap = nommap,
228 .d_kqfilter = nokqfilter,
229 .d_discard = nodiscard,
230 .d_flag = D_DISK
231 };
232
233 static struct dkdriver rf_dkdriver = {
234 .d_strategy = raidstrategy,
235 .d_minphys = minphys
236 };
237
238 struct raid_softc {
239 device_t sc_dev;
240 int sc_unit;
241 int sc_flags; /* flags */
242 int sc_cflags; /* configuration flags */
243 uint64_t sc_size; /* size of the raid device */
244 char sc_xname[20]; /* XXX external name */
245 struct disk sc_dkdev; /* generic disk device info */
246 struct bufq_state *buf_queue; /* used for the device queue */
247 RF_Raid_t sc_r;
248 LIST_ENTRY(raid_softc) sc_link;
249 };
250 /* sc_flags */
251 #define RAIDF_INITED 0x01 /* unit has been initialized */
252 #define RAIDF_WLABEL 0x02 /* label area is writable */
253 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
254 #define RAIDF_SHUTDOWN 0x08 /* unit is being shutdown */
255 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
256 #define RAIDF_LOCKED 0x80 /* unit is locked */
257
258 #define raidunit(x) DISKUNIT(x)
259
260 extern struct cfdriver raid_cd;
261 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
262 raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
263 DVF_DETACH_SHUTDOWN);
264
265 /*
266 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
267 * Be aware that large numbers can allow the driver to consume a lot of
268 * kernel memory, especially on writes, and in degraded mode reads.
269 *
270 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
271 * a single 64K write will typically require 64K for the old data,
272 * 64K for the old parity, and 64K for the new parity, for a total
273 * of 192K (if the parity buffer is not re-used immediately).
274 * Even it if is used immediately, that's still 128K, which when multiplied
275 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
276 *
277 * Now in degraded mode, for example, a 64K read on the above setup may
278 * require data reconstruction, which will require *all* of the 4 remaining
279 * disks to participate -- 4 * 32K/disk == 128K again.
280 */
281
282 #ifndef RAIDOUTSTANDING
283 #define RAIDOUTSTANDING 6
284 #endif
285
286 #define RAIDLABELDEV(dev) \
287 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
288
289 /* declared here, and made public, for the benefit of KVM stuff.. */
290
291 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
292 struct disklabel *);
293 static void raidgetdisklabel(dev_t);
294 static void raidmakedisklabel(struct raid_softc *);
295
296 static int raidlock(struct raid_softc *);
297 static void raidunlock(struct raid_softc *);
298
299 static int raid_detach_unlocked(struct raid_softc *);
300
301 static void rf_markalldirty(RF_Raid_t *);
302 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
303
304 void rf_ReconThread(struct rf_recon_req *);
305 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
306 void rf_CopybackThread(RF_Raid_t *raidPtr);
307 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
308 int rf_autoconfig(device_t);
309 void rf_buildroothack(RF_ConfigSet_t *);
310
311 RF_AutoConfig_t *rf_find_raid_components(void);
312 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
313 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
314 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
315 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
316 int rf_set_autoconfig(RF_Raid_t *, int);
317 int rf_set_rootpartition(RF_Raid_t *, int);
318 void rf_release_all_vps(RF_ConfigSet_t *);
319 void rf_cleanup_config_set(RF_ConfigSet_t *);
320 int rf_have_enough_components(RF_ConfigSet_t *);
321 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
322 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
323
324 /*
325 * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
326 * Note that this is overridden by having RAID_AUTOCONFIG as an option
327 * in the kernel config file.
328 */
329 #ifdef RAID_AUTOCONFIG
330 int raidautoconfig = 1;
331 #else
332 int raidautoconfig = 0;
333 #endif
334 static bool raidautoconfigdone = false;
335
336 struct RF_Pools_s rf_pools;
337
338 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
339 static kmutex_t raid_lock;
340
341 static struct raid_softc *
342 raidcreate(int unit) {
343 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
344 if (sc == NULL) {
345 #ifdef DIAGNOSTIC
346 printf("%s: out of memory\n", __func__);
347 #endif
348 return NULL;
349 }
350 sc->sc_unit = unit;
351 bufq_alloc(&sc->buf_queue, "fcfs", BUFQ_SORT_RAWBLOCK);
352 return sc;
353 }
354
355 static void
356 raiddestroy(struct raid_softc *sc) {
357 bufq_free(sc->buf_queue);
358 kmem_free(sc, sizeof(*sc));
359 }
360
361 static struct raid_softc *
362 raidget(int unit) {
363 struct raid_softc *sc;
364 if (unit < 0) {
365 #ifdef DIAGNOSTIC
366 panic("%s: unit %d!", __func__, unit);
367 #endif
368 return NULL;
369 }
370 mutex_enter(&raid_lock);
371 LIST_FOREACH(sc, &raids, sc_link) {
372 if (sc->sc_unit == unit) {
373 mutex_exit(&raid_lock);
374 return sc;
375 }
376 }
377 mutex_exit(&raid_lock);
378 if ((sc = raidcreate(unit)) == NULL)
379 return NULL;
380 mutex_enter(&raid_lock);
381 LIST_INSERT_HEAD(&raids, sc, sc_link);
382 mutex_exit(&raid_lock);
383 return sc;
384 }
385
386 static void
387 raidput(struct raid_softc *sc) {
388 mutex_enter(&raid_lock);
389 LIST_REMOVE(sc, sc_link);
390 mutex_exit(&raid_lock);
391 raiddestroy(sc);
392 }
393
394 void
395 raidattach(int num)
396 {
397 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
398 /* This is where all the initialization stuff gets done. */
399
400 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
401 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
402 rf_init_cond2(rf_sparet_wait_cv, "sparetw");
403 rf_init_cond2(rf_sparet_resp_cv, "rfgst");
404
405 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
406 #endif
407
408 if (rf_BootRaidframe() == 0)
409 aprint_verbose("Kernelized RAIDframe activated\n");
410 else
411 panic("Serious error booting RAID!!");
412
413 if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) {
414 aprint_error("raidattach: config_cfattach_attach failed?\n");
415 }
416
417 raidautoconfigdone = false;
418
419 /*
420 * Register a finalizer which will be used to auto-config RAID
421 * sets once all real hardware devices have been found.
422 */
423 if (config_finalize_register(NULL, rf_autoconfig) != 0)
424 aprint_error("WARNING: unable to register RAIDframe finalizer\n");
425 }
426
427 int
428 rf_autoconfig(device_t self)
429 {
430 RF_AutoConfig_t *ac_list;
431 RF_ConfigSet_t *config_sets;
432
433 if (!raidautoconfig || raidautoconfigdone == true)
434 return (0);
435
436 /* XXX This code can only be run once. */
437 raidautoconfigdone = true;
438
439 #ifdef __HAVE_CPU_BOOTCONF
440 /*
441 * 0. find the boot device if needed first so we can use it later
442 * this needs to be done before we autoconfigure any raid sets,
443 * because if we use wedges we are not going to be able to open
444 * the boot device later
445 */
446 if (booted_device == NULL)
447 cpu_bootconf();
448 #endif
449 /* 1. locate all RAID components on the system */
450 aprint_debug("Searching for RAID components...\n");
451 ac_list = rf_find_raid_components();
452
453 /* 2. Sort them into their respective sets. */
454 config_sets = rf_create_auto_sets(ac_list);
455
456 /*
457 * 3. Evaluate each set and configure the valid ones.
458 * This gets done in rf_buildroothack().
459 */
460 rf_buildroothack(config_sets);
461
462 return 1;
463 }
464
465 static int
466 rf_containsboot(RF_Raid_t *r, device_t bdv) {
467 const char *bootname = device_xname(bdv);
468 size_t len = strlen(bootname);
469
470 for (int col = 0; col < r->numCol; col++) {
471 const char *devname = r->Disks[col].devname;
472 devname += sizeof("/dev/") - 1;
473 if (strncmp(devname, "dk", 2) == 0) {
474 const char *parent =
475 dkwedge_get_parent_name(r->Disks[col].dev);
476 if (parent != NULL)
477 devname = parent;
478 }
479 if (strncmp(devname, bootname, len) == 0) {
480 struct raid_softc *sc = r->softc;
481 aprint_debug("raid%d includes boot device %s\n",
482 sc->sc_unit, devname);
483 return 1;
484 }
485 }
486 return 0;
487 }
488
489 void
490 rf_buildroothack(RF_ConfigSet_t *config_sets)
491 {
492 RF_ConfigSet_t *cset;
493 RF_ConfigSet_t *next_cset;
494 int num_root;
495 struct raid_softc *sc, *rsc;
496
497 sc = rsc = NULL;
498 num_root = 0;
499 cset = config_sets;
500 while (cset != NULL) {
501 next_cset = cset->next;
502 if (rf_have_enough_components(cset) &&
503 cset->ac->clabel->autoconfigure == 1) {
504 sc = rf_auto_config_set(cset);
505 if (sc != NULL) {
506 aprint_debug("raid%d: configured ok\n",
507 sc->sc_unit);
508 if (cset->rootable) {
509 rsc = sc;
510 num_root++;
511 }
512 } else {
513 /* The autoconfig didn't work :( */
514 aprint_debug("Autoconfig failed\n");
515 rf_release_all_vps(cset);
516 }
517 } else {
518 /* we're not autoconfiguring this set...
519 release the associated resources */
520 rf_release_all_vps(cset);
521 }
522 /* cleanup */
523 rf_cleanup_config_set(cset);
524 cset = next_cset;
525 }
526
527 /* if the user has specified what the root device should be
528 then we don't touch booted_device or boothowto... */
529
530 if (rootspec != NULL)
531 return;
532
533 /* we found something bootable... */
534
535 /*
536 * XXX: The following code assumes that the root raid
537 * is the first ('a') partition. This is about the best
538 * we can do with a BSD disklabel, but we might be able
539 * to do better with a GPT label, by setting a specified
540 * attribute to indicate the root partition. We can then
541 * stash the partition number in the r->root_partition
542 * high bits (the bottom 2 bits are already used). For
543 * now we just set booted_partition to 0 when we override
544 * root.
545 */
546 if (num_root == 1) {
547 device_t candidate_root;
548 if (rsc->sc_dkdev.dk_nwedges != 0) {
549 char cname[sizeof(cset->ac->devname)];
550 /* XXX: assume 'a' */
551 snprintf(cname, sizeof(cname), "%s%c",
552 device_xname(rsc->sc_dev), 'a');
553 candidate_root = dkwedge_find_by_wname(cname);
554 } else
555 candidate_root = rsc->sc_dev;
556 if (booted_device == NULL ||
557 rsc->sc_r.root_partition == 1 ||
558 rf_containsboot(&rsc->sc_r, booted_device)) {
559 booted_device = candidate_root;
560 booted_partition = 0; /* XXX assume 'a' */
561 }
562 } else if (num_root > 1) {
563
564 /*
565 * Maybe the MD code can help. If it cannot, then
566 * setroot() will discover that we have no
567 * booted_device and will ask the user if nothing was
568 * hardwired in the kernel config file
569 */
570 if (booted_device == NULL)
571 return;
572
573 num_root = 0;
574 mutex_enter(&raid_lock);
575 LIST_FOREACH(sc, &raids, sc_link) {
576 RF_Raid_t *r = &sc->sc_r;
577 if (r->valid == 0)
578 continue;
579
580 if (r->root_partition == 0)
581 continue;
582
583 if (rf_containsboot(r, booted_device)) {
584 num_root++;
585 rsc = sc;
586 }
587 }
588 mutex_exit(&raid_lock);
589
590 if (num_root == 1) {
591 booted_device = rsc->sc_dev;
592 booted_partition = 0; /* XXX assume 'a' */
593 } else {
594 /* we can't guess.. require the user to answer... */
595 boothowto |= RB_ASKNAME;
596 }
597 }
598 }
599
600
601 int
602 raidsize(dev_t dev)
603 {
604 struct raid_softc *rs;
605 struct disklabel *lp;
606 int part, unit, omask, size;
607
608 unit = raidunit(dev);
609 if ((rs = raidget(unit)) == NULL)
610 return -1;
611 if ((rs->sc_flags & RAIDF_INITED) == 0)
612 return (-1);
613
614 part = DISKPART(dev);
615 omask = rs->sc_dkdev.dk_openmask & (1 << part);
616 lp = rs->sc_dkdev.dk_label;
617
618 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
619 return (-1);
620
621 if (lp->d_partitions[part].p_fstype != FS_SWAP)
622 size = -1;
623 else
624 size = lp->d_partitions[part].p_size *
625 (lp->d_secsize / DEV_BSIZE);
626
627 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
628 return (-1);
629
630 return (size);
631
632 }
633
634 int
635 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
636 {
637 int unit = raidunit(dev);
638 struct raid_softc *rs;
639 const struct bdevsw *bdev;
640 struct disklabel *lp;
641 RF_Raid_t *raidPtr;
642 daddr_t offset;
643 int part, c, sparecol, j, scol, dumpto;
644 int error = 0;
645
646 if ((rs = raidget(unit)) == NULL)
647 return ENXIO;
648
649 raidPtr = &rs->sc_r;
650
651 if ((rs->sc_flags & RAIDF_INITED) == 0)
652 return ENXIO;
653
654 /* we only support dumping to RAID 1 sets */
655 if (raidPtr->Layout.numDataCol != 1 ||
656 raidPtr->Layout.numParityCol != 1)
657 return EINVAL;
658
659
660 if ((error = raidlock(rs)) != 0)
661 return error;
662
663 if (size % DEV_BSIZE != 0) {
664 error = EINVAL;
665 goto out;
666 }
667
668 if (blkno + size / DEV_BSIZE > rs->sc_size) {
669 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
670 "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
671 size / DEV_BSIZE, rs->sc_size);
672 error = EINVAL;
673 goto out;
674 }
675
676 part = DISKPART(dev);
677 lp = rs->sc_dkdev.dk_label;
678 offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
679
680 /* figure out what device is alive.. */
681
682 /*
683 Look for a component to dump to. The preference for the
684 component to dump to is as follows:
685 1) the master
686 2) a used_spare of the master
687 3) the slave
688 4) a used_spare of the slave
689 */
690
691 dumpto = -1;
692 for (c = 0; c < raidPtr->numCol; c++) {
693 if (raidPtr->Disks[c].status == rf_ds_optimal) {
694 /* this might be the one */
695 dumpto = c;
696 break;
697 }
698 }
699
700 /*
701 At this point we have possibly selected a live master or a
702 live slave. We now check to see if there is a spared
703 master (or a spared slave), if we didn't find a live master
704 or a live slave.
705 */
706
707 for (c = 0; c < raidPtr->numSpare; c++) {
708 sparecol = raidPtr->numCol + c;
709 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
710 /* How about this one? */
711 scol = -1;
712 for(j=0;j<raidPtr->numCol;j++) {
713 if (raidPtr->Disks[j].spareCol == sparecol) {
714 scol = j;
715 break;
716 }
717 }
718 if (scol == 0) {
719 /*
720 We must have found a spared master!
721 We'll take that over anything else
722 found so far. (We couldn't have
723 found a real master before, since
724 this is a used spare, and it's
725 saying that it's replacing the
726 master.) On reboot (with
727 autoconfiguration turned on)
728 sparecol will become the 1st
729 component (component0) of this set.
730 */
731 dumpto = sparecol;
732 break;
733 } else if (scol != -1) {
734 /*
735 Must be a spared slave. We'll dump
736 to that if we havn't found anything
737 else so far.
738 */
739 if (dumpto == -1)
740 dumpto = sparecol;
741 }
742 }
743 }
744
745 if (dumpto == -1) {
746 /* we couldn't find any live components to dump to!?!?
747 */
748 error = EINVAL;
749 goto out;
750 }
751
752 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
753
754 /*
755 Note that blkno is relative to this particular partition.
756 By adding the offset of this partition in the RAID
757 set, and also adding RF_PROTECTED_SECTORS, we get a
758 value that is relative to the partition used for the
759 underlying component.
760 */
761
762 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
763 blkno + offset, va, size);
764
765 out:
766 raidunlock(rs);
767
768 return error;
769 }
770 /* ARGSUSED */
771 int
772 raidopen(dev_t dev, int flags, int fmt,
773 struct lwp *l)
774 {
775 int unit = raidunit(dev);
776 struct raid_softc *rs;
777 struct disklabel *lp;
778 int part, pmask;
779 int error = 0;
780
781 if ((rs = raidget(unit)) == NULL)
782 return ENXIO;
783 if ((error = raidlock(rs)) != 0)
784 return (error);
785
786 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
787 error = EBUSY;
788 goto bad;
789 }
790
791 lp = rs->sc_dkdev.dk_label;
792
793 part = DISKPART(dev);
794
795 /*
796 * If there are wedges, and this is not RAW_PART, then we
797 * need to fail.
798 */
799 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
800 error = EBUSY;
801 goto bad;
802 }
803 pmask = (1 << part);
804
805 if ((rs->sc_flags & RAIDF_INITED) &&
806 (rs->sc_dkdev.dk_nwedges == 0) &&
807 (rs->sc_dkdev.dk_openmask == 0))
808 raidgetdisklabel(dev);
809
810 /* make sure that this partition exists */
811
812 if (part != RAW_PART) {
813 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
814 ((part >= lp->d_npartitions) ||
815 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
816 error = ENXIO;
817 goto bad;
818 }
819 }
820 /* Prevent this unit from being unconfigured while open. */
821 switch (fmt) {
822 case S_IFCHR:
823 rs->sc_dkdev.dk_copenmask |= pmask;
824 break;
825
826 case S_IFBLK:
827 rs->sc_dkdev.dk_bopenmask |= pmask;
828 break;
829 }
830
831 if ((rs->sc_dkdev.dk_openmask == 0) &&
832 ((rs->sc_flags & RAIDF_INITED) != 0)) {
833 /* First one... mark things as dirty... Note that we *MUST*
834 have done a configure before this. I DO NOT WANT TO BE
835 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
836 THAT THEY BELONG TOGETHER!!!!! */
837 /* XXX should check to see if we're only open for reading
838 here... If so, we needn't do this, but then need some
839 other way of keeping track of what's happened.. */
840
841 rf_markalldirty(&rs->sc_r);
842 }
843
844
845 rs->sc_dkdev.dk_openmask =
846 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
847
848 bad:
849 raidunlock(rs);
850
851 return (error);
852
853
854 }
855 /* ARGSUSED */
856 int
857 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
858 {
859 int unit = raidunit(dev);
860 struct raid_softc *rs;
861 int error = 0;
862 int part;
863
864 if ((rs = raidget(unit)) == NULL)
865 return ENXIO;
866
867 if ((error = raidlock(rs)) != 0)
868 return (error);
869
870 part = DISKPART(dev);
871
872 /* ...that much closer to allowing unconfiguration... */
873 switch (fmt) {
874 case S_IFCHR:
875 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
876 break;
877
878 case S_IFBLK:
879 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
880 break;
881 }
882 rs->sc_dkdev.dk_openmask =
883 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
884
885 if ((rs->sc_dkdev.dk_openmask == 0) &&
886 ((rs->sc_flags & RAIDF_INITED) != 0)) {
887 /* Last one... device is not unconfigured yet.
888 Device shutdown has taken care of setting the
889 clean bits if RAIDF_INITED is not set
890 mark things as clean... */
891
892 rf_update_component_labels(&rs->sc_r,
893 RF_FINAL_COMPONENT_UPDATE);
894
895 /* If the kernel is shutting down, it will detach
896 * this RAID set soon enough.
897 */
898 }
899
900 raidunlock(rs);
901 return (0);
902
903 }
904
905 void
906 raidstrategy(struct buf *bp)
907 {
908 unsigned int unit = raidunit(bp->b_dev);
909 RF_Raid_t *raidPtr;
910 int wlabel;
911 struct raid_softc *rs;
912
913 if ((rs = raidget(unit)) == NULL) {
914 bp->b_error = ENXIO;
915 goto done;
916 }
917 if ((rs->sc_flags & RAIDF_INITED) == 0) {
918 bp->b_error = ENXIO;
919 goto done;
920 }
921 raidPtr = &rs->sc_r;
922 if (!raidPtr->valid) {
923 bp->b_error = ENODEV;
924 goto done;
925 }
926 if (bp->b_bcount == 0) {
927 db1_printf(("b_bcount is zero..\n"));
928 goto done;
929 }
930
931 /*
932 * Do bounds checking and adjust transfer. If there's an
933 * error, the bounds check will flag that for us.
934 */
935
936 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
937 if (DISKPART(bp->b_dev) == RAW_PART) {
938 uint64_t size; /* device size in DEV_BSIZE unit */
939
940 if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
941 size = raidPtr->totalSectors <<
942 (raidPtr->logBytesPerSector - DEV_BSHIFT);
943 } else {
944 size = raidPtr->totalSectors >>
945 (DEV_BSHIFT - raidPtr->logBytesPerSector);
946 }
947 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
948 goto done;
949 }
950 } else {
951 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
952 db1_printf(("Bounds check failed!!:%d %d\n",
953 (int) bp->b_blkno, (int) wlabel));
954 goto done;
955 }
956 }
957
958 rf_lock_mutex2(raidPtr->iodone_lock);
959
960 bp->b_resid = 0;
961
962 /* stuff it onto our queue */
963 bufq_put(rs->buf_queue, bp);
964
965 /* scheduled the IO to happen at the next convenient time */
966 rf_signal_cond2(raidPtr->iodone_cv);
967 rf_unlock_mutex2(raidPtr->iodone_lock);
968
969 return;
970
971 done:
972 bp->b_resid = bp->b_bcount;
973 biodone(bp);
974 }
975 /* ARGSUSED */
976 int
977 raidread(dev_t dev, struct uio *uio, int flags)
978 {
979 int unit = raidunit(dev);
980 struct raid_softc *rs;
981
982 if ((rs = raidget(unit)) == NULL)
983 return ENXIO;
984
985 if ((rs->sc_flags & RAIDF_INITED) == 0)
986 return (ENXIO);
987
988 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
989
990 }
991 /* ARGSUSED */
992 int
993 raidwrite(dev_t dev, struct uio *uio, int flags)
994 {
995 int unit = raidunit(dev);
996 struct raid_softc *rs;
997
998 if ((rs = raidget(unit)) == NULL)
999 return ENXIO;
1000
1001 if ((rs->sc_flags & RAIDF_INITED) == 0)
1002 return (ENXIO);
1003
1004 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
1005
1006 }
1007
1008 static int
1009 raid_detach_unlocked(struct raid_softc *rs)
1010 {
1011 int error;
1012 RF_Raid_t *raidPtr;
1013
1014 raidPtr = &rs->sc_r;
1015
1016 /*
1017 * If somebody has a partition mounted, we shouldn't
1018 * shutdown.
1019 */
1020 if (rs->sc_dkdev.dk_openmask != 0)
1021 return EBUSY;
1022
1023 if ((rs->sc_flags & RAIDF_INITED) == 0)
1024 ; /* not initialized: nothing to do */
1025 else if ((error = rf_Shutdown(raidPtr)) != 0)
1026 return error;
1027 else
1028 rs->sc_flags &= ~(RAIDF_INITED|RAIDF_SHUTDOWN);
1029
1030 /* Detach the disk. */
1031 dkwedge_delall(&rs->sc_dkdev);
1032 disk_detach(&rs->sc_dkdev);
1033 disk_destroy(&rs->sc_dkdev);
1034
1035 aprint_normal_dev(rs->sc_dev, "detached\n");
1036
1037 return 0;
1038 }
1039
1040 int
1041 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1042 {
1043 int unit = raidunit(dev);
1044 int error = 0;
1045 int part, pmask, s;
1046 cfdata_t cf;
1047 struct raid_softc *rs;
1048 RF_Config_t *k_cfg, *u_cfg;
1049 RF_Raid_t *raidPtr;
1050 RF_RaidDisk_t *diskPtr;
1051 RF_AccTotals_t *totals;
1052 RF_DeviceConfig_t *d_cfg, **ucfgp;
1053 u_char *specific_buf;
1054 int retcode = 0;
1055 int column;
1056 /* int raidid; */
1057 struct rf_recon_req *rrcopy, *rr;
1058 RF_ComponentLabel_t *clabel;
1059 RF_ComponentLabel_t *ci_label;
1060 RF_ComponentLabel_t **clabel_ptr;
1061 RF_SingleComponent_t *sparePtr,*componentPtr;
1062 RF_SingleComponent_t component;
1063 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
1064 int i, j, d;
1065 #ifdef __HAVE_OLD_DISKLABEL
1066 struct disklabel newlabel;
1067 #endif
1068
1069 if ((rs = raidget(unit)) == NULL)
1070 return ENXIO;
1071 raidPtr = &rs->sc_r;
1072
1073 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1074 (int) DISKPART(dev), (int) unit, cmd));
1075
1076 /* Must be open for writes for these commands... */
1077 switch (cmd) {
1078 #ifdef DIOCGSECTORSIZE
1079 case DIOCGSECTORSIZE:
1080 *(u_int *)data = raidPtr->bytesPerSector;
1081 return 0;
1082 case DIOCGMEDIASIZE:
1083 *(off_t *)data =
1084 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
1085 return 0;
1086 #endif
1087 case DIOCSDINFO:
1088 case DIOCWDINFO:
1089 #ifdef __HAVE_OLD_DISKLABEL
1090 case ODIOCWDINFO:
1091 case ODIOCSDINFO:
1092 #endif
1093 case DIOCWLABEL:
1094 case DIOCAWEDGE:
1095 case DIOCDWEDGE:
1096 case DIOCMWEDGES:
1097 case DIOCSSTRATEGY:
1098 if ((flag & FWRITE) == 0)
1099 return (EBADF);
1100 }
1101
1102 /* Must be initialized for these... */
1103 switch (cmd) {
1104 case DIOCGDINFO:
1105 case DIOCSDINFO:
1106 case DIOCWDINFO:
1107 #ifdef __HAVE_OLD_DISKLABEL
1108 case ODIOCGDINFO:
1109 case ODIOCWDINFO:
1110 case ODIOCSDINFO:
1111 case ODIOCGDEFLABEL:
1112 #endif
1113 case DIOCGPART:
1114 case DIOCWLABEL:
1115 case DIOCGDEFLABEL:
1116 case DIOCAWEDGE:
1117 case DIOCDWEDGE:
1118 case DIOCLWEDGES:
1119 case DIOCMWEDGES:
1120 case DIOCCACHESYNC:
1121 case RAIDFRAME_SHUTDOWN:
1122 case RAIDFRAME_REWRITEPARITY:
1123 case RAIDFRAME_GET_INFO:
1124 case RAIDFRAME_RESET_ACCTOTALS:
1125 case RAIDFRAME_GET_ACCTOTALS:
1126 case RAIDFRAME_KEEP_ACCTOTALS:
1127 case RAIDFRAME_GET_SIZE:
1128 case RAIDFRAME_FAIL_DISK:
1129 case RAIDFRAME_COPYBACK:
1130 case RAIDFRAME_CHECK_RECON_STATUS:
1131 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1132 case RAIDFRAME_GET_COMPONENT_LABEL:
1133 case RAIDFRAME_SET_COMPONENT_LABEL:
1134 case RAIDFRAME_ADD_HOT_SPARE:
1135 case RAIDFRAME_REMOVE_HOT_SPARE:
1136 case RAIDFRAME_INIT_LABELS:
1137 case RAIDFRAME_REBUILD_IN_PLACE:
1138 case RAIDFRAME_CHECK_PARITY:
1139 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1140 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1141 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1142 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1143 case RAIDFRAME_SET_AUTOCONFIG:
1144 case RAIDFRAME_SET_ROOT:
1145 case RAIDFRAME_DELETE_COMPONENT:
1146 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1147 case RAIDFRAME_PARITYMAP_STATUS:
1148 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1149 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1150 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1151 case DIOCGSTRATEGY:
1152 case DIOCSSTRATEGY:
1153 if ((rs->sc_flags & RAIDF_INITED) == 0)
1154 return (ENXIO);
1155 }
1156
1157 switch (cmd) {
1158 #ifdef COMPAT_50
1159 case RAIDFRAME_GET_INFO50:
1160 return rf_get_info50(raidPtr, data);
1161
1162 case RAIDFRAME_CONFIGURE50:
1163 if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0)
1164 return retcode;
1165 goto config;
1166 #endif
1167 /* configure the system */
1168 case RAIDFRAME_CONFIGURE:
1169
1170 if (raidPtr->valid) {
1171 /* There is a valid RAID set running on this unit! */
1172 printf("raid%d: Device already configured!\n",unit);
1173 return(EINVAL);
1174 }
1175
1176 /* copy-in the configuration information */
1177 /* data points to a pointer to the configuration structure */
1178
1179 u_cfg = *((RF_Config_t **) data);
1180 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1181 if (k_cfg == NULL) {
1182 return (ENOMEM);
1183 }
1184 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1185 if (retcode) {
1186 RF_Free(k_cfg, sizeof(RF_Config_t));
1187 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1188 retcode));
1189 return (retcode);
1190 }
1191 goto config;
1192 config:
1193 /* allocate a buffer for the layout-specific data, and copy it
1194 * in */
1195 if (k_cfg->layoutSpecificSize) {
1196 if (k_cfg->layoutSpecificSize > 10000) {
1197 /* sanity check */
1198 RF_Free(k_cfg, sizeof(RF_Config_t));
1199 return (EINVAL);
1200 }
1201 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1202 (u_char *));
1203 if (specific_buf == NULL) {
1204 RF_Free(k_cfg, sizeof(RF_Config_t));
1205 return (ENOMEM);
1206 }
1207 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1208 k_cfg->layoutSpecificSize);
1209 if (retcode) {
1210 RF_Free(k_cfg, sizeof(RF_Config_t));
1211 RF_Free(specific_buf,
1212 k_cfg->layoutSpecificSize);
1213 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1214 retcode));
1215 return (retcode);
1216 }
1217 } else
1218 specific_buf = NULL;
1219 k_cfg->layoutSpecific = specific_buf;
1220
1221 /* should do some kind of sanity check on the configuration.
1222 * Store the sum of all the bytes in the last byte? */
1223
1224 /* configure the system */
1225
1226 /*
1227 * Clear the entire RAID descriptor, just to make sure
1228 * there is no stale data left in the case of a
1229 * reconfiguration
1230 */
1231 memset(raidPtr, 0, sizeof(*raidPtr));
1232 raidPtr->softc = rs;
1233 raidPtr->raidid = unit;
1234
1235 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1236
1237 if (retcode == 0) {
1238
1239 /* allow this many simultaneous IO's to
1240 this RAID device */
1241 raidPtr->openings = RAIDOUTSTANDING;
1242
1243 raidinit(rs);
1244 rf_markalldirty(raidPtr);
1245 }
1246 /* free the buffers. No return code here. */
1247 if (k_cfg->layoutSpecificSize) {
1248 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1249 }
1250 RF_Free(k_cfg, sizeof(RF_Config_t));
1251
1252 return (retcode);
1253
1254 /* shutdown the system */
1255 case RAIDFRAME_SHUTDOWN:
1256
1257 part = DISKPART(dev);
1258 pmask = (1 << part);
1259
1260 if ((error = raidlock(rs)) != 0)
1261 return (error);
1262
1263 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
1264 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1265 (rs->sc_dkdev.dk_copenmask & pmask)))
1266 retcode = EBUSY;
1267 else {
1268 rs->sc_flags |= RAIDF_SHUTDOWN;
1269 rs->sc_dkdev.dk_copenmask &= ~pmask;
1270 rs->sc_dkdev.dk_bopenmask &= ~pmask;
1271 rs->sc_dkdev.dk_openmask &= ~pmask;
1272 retcode = 0;
1273 }
1274
1275 raidunlock(rs);
1276
1277 if (retcode != 0)
1278 return retcode;
1279
1280 /* free the pseudo device attach bits */
1281
1282 cf = device_cfdata(rs->sc_dev);
1283 if ((retcode = config_detach(rs->sc_dev, DETACH_QUIET)) == 0)
1284 free(cf, M_RAIDFRAME);
1285
1286 return (retcode);
1287 case RAIDFRAME_GET_COMPONENT_LABEL:
1288 clabel_ptr = (RF_ComponentLabel_t **) data;
1289 /* need to read the component label for the disk indicated
1290 by row,column in clabel */
1291
1292 /*
1293 * Perhaps there should be an option to skip the in-core
1294 * copy and hit the disk, as with disklabel(8).
1295 */
1296 RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *));
1297
1298 retcode = copyin(*clabel_ptr, clabel, sizeof(*clabel));
1299
1300 if (retcode) {
1301 RF_Free(clabel, sizeof(*clabel));
1302 return retcode;
1303 }
1304
1305 clabel->row = 0; /* Don't allow looking at anything else.*/
1306
1307 column = clabel->column;
1308
1309 if ((column < 0) || (column >= raidPtr->numCol +
1310 raidPtr->numSpare)) {
1311 RF_Free(clabel, sizeof(*clabel));
1312 return EINVAL;
1313 }
1314
1315 RF_Free(clabel, sizeof(*clabel));
1316
1317 clabel = raidget_component_label(raidPtr, column);
1318
1319 return copyout(clabel, *clabel_ptr, sizeof(**clabel_ptr));
1320
1321 #if 0
1322 case RAIDFRAME_SET_COMPONENT_LABEL:
1323 clabel = (RF_ComponentLabel_t *) data;
1324
1325 /* XXX check the label for valid stuff... */
1326 /* Note that some things *should not* get modified --
1327 the user should be re-initing the labels instead of
1328 trying to patch things.
1329 */
1330
1331 raidid = raidPtr->raidid;
1332 #ifdef DEBUG
1333 printf("raid%d: Got component label:\n", raidid);
1334 printf("raid%d: Version: %d\n", raidid, clabel->version);
1335 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1336 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1337 printf("raid%d: Column: %d\n", raidid, clabel->column);
1338 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1339 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1340 printf("raid%d: Status: %d\n", raidid, clabel->status);
1341 #endif
1342 clabel->row = 0;
1343 column = clabel->column;
1344
1345 if ((column < 0) || (column >= raidPtr->numCol)) {
1346 return(EINVAL);
1347 }
1348
1349 /* XXX this isn't allowed to do anything for now :-) */
1350
1351 /* XXX and before it is, we need to fill in the rest
1352 of the fields!?!?!?! */
1353 memcpy(raidget_component_label(raidPtr, column),
1354 clabel, sizeof(*clabel));
1355 raidflush_component_label(raidPtr, column);
1356 return (0);
1357 #endif
1358
1359 case RAIDFRAME_INIT_LABELS:
1360 clabel = (RF_ComponentLabel_t *) data;
1361 /*
1362 we only want the serial number from
1363 the above. We get all the rest of the information
1364 from the config that was used to create this RAID
1365 set.
1366 */
1367
1368 raidPtr->serial_number = clabel->serial_number;
1369
1370 for(column=0;column<raidPtr->numCol;column++) {
1371 diskPtr = &raidPtr->Disks[column];
1372 if (!RF_DEAD_DISK(diskPtr->status)) {
1373 ci_label = raidget_component_label(raidPtr,
1374 column);
1375 /* Zeroing this is important. */
1376 memset(ci_label, 0, sizeof(*ci_label));
1377 raid_init_component_label(raidPtr, ci_label);
1378 ci_label->serial_number =
1379 raidPtr->serial_number;
1380 ci_label->row = 0; /* we dont' pretend to support more */
1381 rf_component_label_set_partitionsize(ci_label,
1382 diskPtr->partitionSize);
1383 ci_label->column = column;
1384 raidflush_component_label(raidPtr, column);
1385 }
1386 /* XXXjld what about the spares? */
1387 }
1388
1389 return (retcode);
1390 case RAIDFRAME_SET_AUTOCONFIG:
1391 d = rf_set_autoconfig(raidPtr, *(int *) data);
1392 printf("raid%d: New autoconfig value is: %d\n",
1393 raidPtr->raidid, d);
1394 *(int *) data = d;
1395 return (retcode);
1396
1397 case RAIDFRAME_SET_ROOT:
1398 d = rf_set_rootpartition(raidPtr, *(int *) data);
1399 printf("raid%d: New rootpartition value is: %d\n",
1400 raidPtr->raidid, d);
1401 *(int *) data = d;
1402 return (retcode);
1403
1404 /* initialize all parity */
1405 case RAIDFRAME_REWRITEPARITY:
1406
1407 if (raidPtr->Layout.map->faultsTolerated == 0) {
1408 /* Parity for RAID 0 is trivially correct */
1409 raidPtr->parity_good = RF_RAID_CLEAN;
1410 return(0);
1411 }
1412
1413 if (raidPtr->parity_rewrite_in_progress == 1) {
1414 /* Re-write is already in progress! */
1415 return(EINVAL);
1416 }
1417
1418 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1419 rf_RewriteParityThread,
1420 raidPtr,"raid_parity");
1421 return (retcode);
1422
1423
1424 case RAIDFRAME_ADD_HOT_SPARE:
1425 sparePtr = (RF_SingleComponent_t *) data;
1426 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1427 retcode = rf_add_hot_spare(raidPtr, &component);
1428 return(retcode);
1429
1430 case RAIDFRAME_REMOVE_HOT_SPARE:
1431 return(retcode);
1432
1433 case RAIDFRAME_DELETE_COMPONENT:
1434 componentPtr = (RF_SingleComponent_t *)data;
1435 memcpy( &component, componentPtr,
1436 sizeof(RF_SingleComponent_t));
1437 retcode = rf_delete_component(raidPtr, &component);
1438 return(retcode);
1439
1440 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1441 componentPtr = (RF_SingleComponent_t *)data;
1442 memcpy( &component, componentPtr,
1443 sizeof(RF_SingleComponent_t));
1444 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1445 return(retcode);
1446
1447 case RAIDFRAME_REBUILD_IN_PLACE:
1448
1449 if (raidPtr->Layout.map->faultsTolerated == 0) {
1450 /* Can't do this on a RAID 0!! */
1451 return(EINVAL);
1452 }
1453
1454 if (raidPtr->recon_in_progress == 1) {
1455 /* a reconstruct is already in progress! */
1456 return(EINVAL);
1457 }
1458
1459 componentPtr = (RF_SingleComponent_t *) data;
1460 memcpy( &component, componentPtr,
1461 sizeof(RF_SingleComponent_t));
1462 component.row = 0; /* we don't support any more */
1463 column = component.column;
1464
1465 if ((column < 0) || (column >= raidPtr->numCol)) {
1466 return(EINVAL);
1467 }
1468
1469 rf_lock_mutex2(raidPtr->mutex);
1470 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1471 (raidPtr->numFailures > 0)) {
1472 /* XXX 0 above shouldn't be constant!!! */
1473 /* some component other than this has failed.
1474 Let's not make things worse than they already
1475 are... */
1476 printf("raid%d: Unable to reconstruct to disk at:\n",
1477 raidPtr->raidid);
1478 printf("raid%d: Col: %d Too many failures.\n",
1479 raidPtr->raidid, column);
1480 rf_unlock_mutex2(raidPtr->mutex);
1481 return (EINVAL);
1482 }
1483 if (raidPtr->Disks[column].status ==
1484 rf_ds_reconstructing) {
1485 printf("raid%d: Unable to reconstruct to disk at:\n",
1486 raidPtr->raidid);
1487 printf("raid%d: Col: %d Reconstruction already occurring!\n", raidPtr->raidid, column);
1488
1489 rf_unlock_mutex2(raidPtr->mutex);
1490 return (EINVAL);
1491 }
1492 if (raidPtr->Disks[column].status == rf_ds_spared) {
1493 rf_unlock_mutex2(raidPtr->mutex);
1494 return (EINVAL);
1495 }
1496 rf_unlock_mutex2(raidPtr->mutex);
1497
1498 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1499 if (rrcopy == NULL)
1500 return(ENOMEM);
1501
1502 rrcopy->raidPtr = (void *) raidPtr;
1503 rrcopy->col = column;
1504
1505 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1506 rf_ReconstructInPlaceThread,
1507 rrcopy,"raid_reconip");
1508 return(retcode);
1509
1510 case RAIDFRAME_GET_INFO:
1511 if (!raidPtr->valid)
1512 return (ENODEV);
1513 ucfgp = (RF_DeviceConfig_t **) data;
1514 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1515 (RF_DeviceConfig_t *));
1516 if (d_cfg == NULL)
1517 return (ENOMEM);
1518 d_cfg->rows = 1; /* there is only 1 row now */
1519 d_cfg->cols = raidPtr->numCol;
1520 d_cfg->ndevs = raidPtr->numCol;
1521 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1522 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1523 return (ENOMEM);
1524 }
1525 d_cfg->nspares = raidPtr->numSpare;
1526 if (d_cfg->nspares >= RF_MAX_DISKS) {
1527 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1528 return (ENOMEM);
1529 }
1530 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1531 d = 0;
1532 for (j = 0; j < d_cfg->cols; j++) {
1533 d_cfg->devs[d] = raidPtr->Disks[j];
1534 d++;
1535 }
1536 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1537 d_cfg->spares[i] = raidPtr->Disks[j];
1538 if (d_cfg->spares[i].status == rf_ds_rebuilding_spare) {
1539 /* XXX: raidctl(8) expects to see this as a used spare */
1540 d_cfg->spares[i].status = rf_ds_used_spare;
1541 }
1542 }
1543 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1544 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1545
1546 return (retcode);
1547
1548 case RAIDFRAME_CHECK_PARITY:
1549 *(int *) data = raidPtr->parity_good;
1550 return (0);
1551
1552 case RAIDFRAME_PARITYMAP_STATUS:
1553 if (rf_paritymap_ineligible(raidPtr))
1554 return EINVAL;
1555 rf_paritymap_status(raidPtr->parity_map,
1556 (struct rf_pmstat *)data);
1557 return 0;
1558
1559 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1560 if (rf_paritymap_ineligible(raidPtr))
1561 return EINVAL;
1562 if (raidPtr->parity_map == NULL)
1563 return ENOENT; /* ??? */
1564 if (0 != rf_paritymap_set_params(raidPtr->parity_map,
1565 (struct rf_pmparams *)data, 1))
1566 return EINVAL;
1567 return 0;
1568
1569 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1570 if (rf_paritymap_ineligible(raidPtr))
1571 return EINVAL;
1572 *(int *) data = rf_paritymap_get_disable(raidPtr);
1573 return 0;
1574
1575 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1576 if (rf_paritymap_ineligible(raidPtr))
1577 return EINVAL;
1578 rf_paritymap_set_disable(raidPtr, *(int *)data);
1579 /* XXX should errors be passed up? */
1580 return 0;
1581
1582 case RAIDFRAME_RESET_ACCTOTALS:
1583 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1584 return (0);
1585
1586 case RAIDFRAME_GET_ACCTOTALS:
1587 totals = (RF_AccTotals_t *) data;
1588 *totals = raidPtr->acc_totals;
1589 return (0);
1590
1591 case RAIDFRAME_KEEP_ACCTOTALS:
1592 raidPtr->keep_acc_totals = *(int *)data;
1593 return (0);
1594
1595 case RAIDFRAME_GET_SIZE:
1596 *(int *) data = raidPtr->totalSectors;
1597 return (0);
1598
1599 /* fail a disk & optionally start reconstruction */
1600 case RAIDFRAME_FAIL_DISK:
1601
1602 if (raidPtr->Layout.map->faultsTolerated == 0) {
1603 /* Can't do this on a RAID 0!! */
1604 return(EINVAL);
1605 }
1606
1607 rr = (struct rf_recon_req *) data;
1608 rr->row = 0;
1609 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1610 return (EINVAL);
1611
1612
1613 rf_lock_mutex2(raidPtr->mutex);
1614 if (raidPtr->status == rf_rs_reconstructing) {
1615 /* you can't fail a disk while we're reconstructing! */
1616 /* XXX wrong for RAID6 */
1617 rf_unlock_mutex2(raidPtr->mutex);
1618 return (EINVAL);
1619 }
1620 if ((raidPtr->Disks[rr->col].status ==
1621 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1622 /* some other component has failed. Let's not make
1623 things worse. XXX wrong for RAID6 */
1624 rf_unlock_mutex2(raidPtr->mutex);
1625 return (EINVAL);
1626 }
1627 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1628 /* Can't fail a spared disk! */
1629 rf_unlock_mutex2(raidPtr->mutex);
1630 return (EINVAL);
1631 }
1632 rf_unlock_mutex2(raidPtr->mutex);
1633
1634 /* make a copy of the recon request so that we don't rely on
1635 * the user's buffer */
1636 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1637 if (rrcopy == NULL)
1638 return(ENOMEM);
1639 memcpy(rrcopy, rr, sizeof(*rr));
1640 rrcopy->raidPtr = (void *) raidPtr;
1641
1642 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1643 rf_ReconThread,
1644 rrcopy,"raid_recon");
1645 return (0);
1646
1647 /* invoke a copyback operation after recon on whatever disk
1648 * needs it, if any */
1649 case RAIDFRAME_COPYBACK:
1650
1651 if (raidPtr->Layout.map->faultsTolerated == 0) {
1652 /* This makes no sense on a RAID 0!! */
1653 return(EINVAL);
1654 }
1655
1656 if (raidPtr->copyback_in_progress == 1) {
1657 /* Copyback is already in progress! */
1658 return(EINVAL);
1659 }
1660
1661 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1662 rf_CopybackThread,
1663 raidPtr,"raid_copyback");
1664 return (retcode);
1665
1666 /* return the percentage completion of reconstruction */
1667 case RAIDFRAME_CHECK_RECON_STATUS:
1668 if (raidPtr->Layout.map->faultsTolerated == 0) {
1669 /* This makes no sense on a RAID 0, so tell the
1670 user it's done. */
1671 *(int *) data = 100;
1672 return(0);
1673 }
1674 if (raidPtr->status != rf_rs_reconstructing)
1675 *(int *) data = 100;
1676 else {
1677 if (raidPtr->reconControl->numRUsTotal > 0) {
1678 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1679 } else {
1680 *(int *) data = 0;
1681 }
1682 }
1683 return (0);
1684 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1685 progressInfoPtr = (RF_ProgressInfo_t **) data;
1686 if (raidPtr->status != rf_rs_reconstructing) {
1687 progressInfo.remaining = 0;
1688 progressInfo.completed = 100;
1689 progressInfo.total = 100;
1690 } else {
1691 progressInfo.total =
1692 raidPtr->reconControl->numRUsTotal;
1693 progressInfo.completed =
1694 raidPtr->reconControl->numRUsComplete;
1695 progressInfo.remaining = progressInfo.total -
1696 progressInfo.completed;
1697 }
1698 retcode = copyout(&progressInfo, *progressInfoPtr,
1699 sizeof(RF_ProgressInfo_t));
1700 return (retcode);
1701
1702 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1703 if (raidPtr->Layout.map->faultsTolerated == 0) {
1704 /* This makes no sense on a RAID 0, so tell the
1705 user it's done. */
1706 *(int *) data = 100;
1707 return(0);
1708 }
1709 if (raidPtr->parity_rewrite_in_progress == 1) {
1710 *(int *) data = 100 *
1711 raidPtr->parity_rewrite_stripes_done /
1712 raidPtr->Layout.numStripe;
1713 } else {
1714 *(int *) data = 100;
1715 }
1716 return (0);
1717
1718 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1719 progressInfoPtr = (RF_ProgressInfo_t **) data;
1720 if (raidPtr->parity_rewrite_in_progress == 1) {
1721 progressInfo.total = raidPtr->Layout.numStripe;
1722 progressInfo.completed =
1723 raidPtr->parity_rewrite_stripes_done;
1724 progressInfo.remaining = progressInfo.total -
1725 progressInfo.completed;
1726 } else {
1727 progressInfo.remaining = 0;
1728 progressInfo.completed = 100;
1729 progressInfo.total = 100;
1730 }
1731 retcode = copyout(&progressInfo, *progressInfoPtr,
1732 sizeof(RF_ProgressInfo_t));
1733 return (retcode);
1734
1735 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1736 if (raidPtr->Layout.map->faultsTolerated == 0) {
1737 /* This makes no sense on a RAID 0 */
1738 *(int *) data = 100;
1739 return(0);
1740 }
1741 if (raidPtr->copyback_in_progress == 1) {
1742 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1743 raidPtr->Layout.numStripe;
1744 } else {
1745 *(int *) data = 100;
1746 }
1747 return (0);
1748
1749 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1750 progressInfoPtr = (RF_ProgressInfo_t **) data;
1751 if (raidPtr->copyback_in_progress == 1) {
1752 progressInfo.total = raidPtr->Layout.numStripe;
1753 progressInfo.completed =
1754 raidPtr->copyback_stripes_done;
1755 progressInfo.remaining = progressInfo.total -
1756 progressInfo.completed;
1757 } else {
1758 progressInfo.remaining = 0;
1759 progressInfo.completed = 100;
1760 progressInfo.total = 100;
1761 }
1762 retcode = copyout(&progressInfo, *progressInfoPtr,
1763 sizeof(RF_ProgressInfo_t));
1764 return (retcode);
1765
1766 /* the sparetable daemon calls this to wait for the kernel to
1767 * need a spare table. this ioctl does not return until a
1768 * spare table is needed. XXX -- calling mpsleep here in the
1769 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1770 * -- I should either compute the spare table in the kernel,
1771 * or have a different -- XXX XXX -- interface (a different
1772 * character device) for delivering the table -- XXX */
1773 #if 0
1774 case RAIDFRAME_SPARET_WAIT:
1775 rf_lock_mutex2(rf_sparet_wait_mutex);
1776 while (!rf_sparet_wait_queue)
1777 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1778 waitreq = rf_sparet_wait_queue;
1779 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1780 rf_unlock_mutex2(rf_sparet_wait_mutex);
1781
1782 /* structure assignment */
1783 *((RF_SparetWait_t *) data) = *waitreq;
1784
1785 RF_Free(waitreq, sizeof(*waitreq));
1786 return (0);
1787
1788 /* wakes up a process waiting on SPARET_WAIT and puts an error
1789 * code in it that will cause the dameon to exit */
1790 case RAIDFRAME_ABORT_SPARET_WAIT:
1791 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1792 waitreq->fcol = -1;
1793 rf_lock_mutex2(rf_sparet_wait_mutex);
1794 waitreq->next = rf_sparet_wait_queue;
1795 rf_sparet_wait_queue = waitreq;
1796 rf_broadcast_conf2(rf_sparet_wait_cv);
1797 rf_unlock_mutex2(rf_sparet_wait_mutex);
1798 return (0);
1799
1800 /* used by the spare table daemon to deliver a spare table
1801 * into the kernel */
1802 case RAIDFRAME_SEND_SPARET:
1803
1804 /* install the spare table */
1805 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1806
1807 /* respond to the requestor. the return status of the spare
1808 * table installation is passed in the "fcol" field */
1809 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1810 waitreq->fcol = retcode;
1811 rf_lock_mutex2(rf_sparet_wait_mutex);
1812 waitreq->next = rf_sparet_resp_queue;
1813 rf_sparet_resp_queue = waitreq;
1814 rf_broadcast_cond2(rf_sparet_resp_cv);
1815 rf_unlock_mutex2(rf_sparet_wait_mutex);
1816
1817 return (retcode);
1818 #endif
1819
1820 default:
1821 break; /* fall through to the os-specific code below */
1822
1823 }
1824
1825 if (!raidPtr->valid)
1826 return (EINVAL);
1827
1828 /*
1829 * Add support for "regular" device ioctls here.
1830 */
1831
1832 error = disk_ioctl(&rs->sc_dkdev, dev, cmd, data, flag, l);
1833 if (error != EPASSTHROUGH)
1834 return (error);
1835
1836 switch (cmd) {
1837 case DIOCWDINFO:
1838 case DIOCSDINFO:
1839 #ifdef __HAVE_OLD_DISKLABEL
1840 case ODIOCWDINFO:
1841 case ODIOCSDINFO:
1842 #endif
1843 {
1844 struct disklabel *lp;
1845 #ifdef __HAVE_OLD_DISKLABEL
1846 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1847 memset(&newlabel, 0, sizeof newlabel);
1848 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1849 lp = &newlabel;
1850 } else
1851 #endif
1852 lp = (struct disklabel *)data;
1853
1854 if ((error = raidlock(rs)) != 0)
1855 return (error);
1856
1857 rs->sc_flags |= RAIDF_LABELLING;
1858
1859 error = setdisklabel(rs->sc_dkdev.dk_label,
1860 lp, 0, rs->sc_dkdev.dk_cpulabel);
1861 if (error == 0) {
1862 if (cmd == DIOCWDINFO
1863 #ifdef __HAVE_OLD_DISKLABEL
1864 || cmd == ODIOCWDINFO
1865 #endif
1866 )
1867 error = writedisklabel(RAIDLABELDEV(dev),
1868 raidstrategy, rs->sc_dkdev.dk_label,
1869 rs->sc_dkdev.dk_cpulabel);
1870 }
1871 rs->sc_flags &= ~RAIDF_LABELLING;
1872
1873 raidunlock(rs);
1874
1875 if (error)
1876 return (error);
1877 break;
1878 }
1879
1880 case DIOCWLABEL:
1881 if (*(int *) data != 0)
1882 rs->sc_flags |= RAIDF_WLABEL;
1883 else
1884 rs->sc_flags &= ~RAIDF_WLABEL;
1885 break;
1886
1887 case DIOCGDEFLABEL:
1888 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1889 break;
1890
1891 #ifdef __HAVE_OLD_DISKLABEL
1892 case ODIOCGDEFLABEL:
1893 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1894 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1895 return ENOTTY;
1896 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1897 break;
1898 #endif
1899
1900 case DIOCCACHESYNC:
1901 return rf_sync_component_caches(raidPtr);
1902
1903 case DIOCGSTRATEGY:
1904 {
1905 struct disk_strategy *dks = (void *)data;
1906
1907 s = splbio();
1908 strlcpy(dks->dks_name, bufq_getstrategyname(rs->buf_queue),
1909 sizeof(dks->dks_name));
1910 splx(s);
1911 dks->dks_paramlen = 0;
1912
1913 return 0;
1914 }
1915
1916 case DIOCSSTRATEGY:
1917 {
1918 struct disk_strategy *dks = (void *)data;
1919 struct bufq_state *new;
1920 struct bufq_state *old;
1921
1922 if (dks->dks_param != NULL) {
1923 return EINVAL;
1924 }
1925 dks->dks_name[sizeof(dks->dks_name) - 1] = 0; /* ensure term */
1926 error = bufq_alloc(&new, dks->dks_name,
1927 BUFQ_EXACT|BUFQ_SORT_RAWBLOCK);
1928 if (error) {
1929 return error;
1930 }
1931 s = splbio();
1932 old = rs->buf_queue;
1933 bufq_move(new, old);
1934 rs->buf_queue = new;
1935 splx(s);
1936 bufq_free(old);
1937
1938 return 0;
1939 }
1940
1941 default:
1942 retcode = ENOTTY;
1943 }
1944 return (retcode);
1945
1946 }
1947
1948
1949 /* raidinit -- complete the rest of the initialization for the
1950 RAIDframe device. */
1951
1952
1953 static void
1954 raidinit(struct raid_softc *rs)
1955 {
1956 cfdata_t cf;
1957 int unit;
1958 RF_Raid_t *raidPtr = &rs->sc_r;
1959
1960 unit = raidPtr->raidid;
1961
1962
1963 /* XXX should check return code first... */
1964 rs->sc_flags |= RAIDF_INITED;
1965
1966 /* XXX doesn't check bounds. */
1967 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1968
1969 /* attach the pseudo device */
1970 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1971 cf->cf_name = raid_cd.cd_name;
1972 cf->cf_atname = raid_cd.cd_name;
1973 cf->cf_unit = unit;
1974 cf->cf_fstate = FSTATE_STAR;
1975
1976 rs->sc_dev = config_attach_pseudo(cf);
1977
1978 if (rs->sc_dev == NULL) {
1979 printf("raid%d: config_attach_pseudo failed\n",
1980 raidPtr->raidid);
1981 rs->sc_flags &= ~RAIDF_INITED;
1982 free(cf, M_RAIDFRAME);
1983 return;
1984 }
1985
1986 /* disk_attach actually creates space for the CPU disklabel, among
1987 * other things, so it's critical to call this *BEFORE* we try putzing
1988 * with disklabels. */
1989
1990 disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1991 disk_attach(&rs->sc_dkdev);
1992
1993 /* XXX There may be a weird interaction here between this, and
1994 * protectedSectors, as used in RAIDframe. */
1995
1996 rs->sc_size = raidPtr->totalSectors;
1997
1998 rf_set_geometry(rs, raidPtr);
1999
2000 dkwedge_discover(&rs->sc_dkdev);
2001
2002 }
2003 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
2004 /* wake up the daemon & tell it to get us a spare table
2005 * XXX
2006 * the entries in the queues should be tagged with the raidPtr
2007 * so that in the extremely rare case that two recons happen at once,
2008 * we know for which device were requesting a spare table
2009 * XXX
2010 *
2011 * XXX This code is not currently used. GO
2012 */
2013 int
2014 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
2015 {
2016 int retcode;
2017
2018 rf_lock_mutex2(rf_sparet_wait_mutex);
2019 req->next = rf_sparet_wait_queue;
2020 rf_sparet_wait_queue = req;
2021 rf_broadcast_cond2(rf_sparet_wait_cv);
2022
2023 /* mpsleep unlocks the mutex */
2024 while (!rf_sparet_resp_queue) {
2025 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
2026 }
2027 req = rf_sparet_resp_queue;
2028 rf_sparet_resp_queue = req->next;
2029 rf_unlock_mutex2(rf_sparet_wait_mutex);
2030
2031 retcode = req->fcol;
2032 RF_Free(req, sizeof(*req)); /* this is not the same req as we
2033 * alloc'd */
2034 return (retcode);
2035 }
2036 #endif
2037
2038 /* a wrapper around rf_DoAccess that extracts appropriate info from the
2039 * bp & passes it down.
2040 * any calls originating in the kernel must use non-blocking I/O
2041 * do some extra sanity checking to return "appropriate" error values for
2042 * certain conditions (to make some standard utilities work)
2043 *
2044 * Formerly known as: rf_DoAccessKernel
2045 */
2046 void
2047 raidstart(RF_Raid_t *raidPtr)
2048 {
2049 RF_SectorCount_t num_blocks, pb, sum;
2050 RF_RaidAddr_t raid_addr;
2051 struct partition *pp;
2052 daddr_t blocknum;
2053 struct raid_softc *rs;
2054 int do_async;
2055 struct buf *bp;
2056 int rc;
2057
2058 rs = raidPtr->softc;
2059 /* quick check to see if anything has died recently */
2060 rf_lock_mutex2(raidPtr->mutex);
2061 if (raidPtr->numNewFailures > 0) {
2062 rf_unlock_mutex2(raidPtr->mutex);
2063 rf_update_component_labels(raidPtr,
2064 RF_NORMAL_COMPONENT_UPDATE);
2065 rf_lock_mutex2(raidPtr->mutex);
2066 raidPtr->numNewFailures--;
2067 }
2068
2069 /* Check to see if we're at the limit... */
2070 while (raidPtr->openings > 0) {
2071 rf_unlock_mutex2(raidPtr->mutex);
2072
2073 /* get the next item, if any, from the queue */
2074 if ((bp = bufq_get(rs->buf_queue)) == NULL) {
2075 /* nothing more to do */
2076 return;
2077 }
2078
2079 /* Ok, for the bp we have here, bp->b_blkno is relative to the
2080 * partition.. Need to make it absolute to the underlying
2081 * device.. */
2082
2083 blocknum = bp->b_blkno << DEV_BSHIFT >> raidPtr->logBytesPerSector;
2084 if (DISKPART(bp->b_dev) != RAW_PART) {
2085 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
2086 blocknum += pp->p_offset;
2087 }
2088
2089 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
2090 (int) blocknum));
2091
2092 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
2093 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
2094
2095 /* *THIS* is where we adjust what block we're going to...
2096 * but DO NOT TOUCH bp->b_blkno!!! */
2097 raid_addr = blocknum;
2098
2099 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
2100 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
2101 sum = raid_addr + num_blocks + pb;
2102 if (1 || rf_debugKernelAccess) {
2103 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
2104 (int) raid_addr, (int) sum, (int) num_blocks,
2105 (int) pb, (int) bp->b_resid));
2106 }
2107 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
2108 || (sum < num_blocks) || (sum < pb)) {
2109 bp->b_error = ENOSPC;
2110 bp->b_resid = bp->b_bcount;
2111 biodone(bp);
2112 rf_lock_mutex2(raidPtr->mutex);
2113 continue;
2114 }
2115 /*
2116 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
2117 */
2118
2119 if (bp->b_bcount & raidPtr->sectorMask) {
2120 bp->b_error = EINVAL;
2121 bp->b_resid = bp->b_bcount;
2122 biodone(bp);
2123 rf_lock_mutex2(raidPtr->mutex);
2124 continue;
2125
2126 }
2127 db1_printf(("Calling DoAccess..\n"));
2128
2129
2130 rf_lock_mutex2(raidPtr->mutex);
2131 raidPtr->openings--;
2132 rf_unlock_mutex2(raidPtr->mutex);
2133
2134 /*
2135 * Everything is async.
2136 */
2137 do_async = 1;
2138
2139 disk_busy(&rs->sc_dkdev);
2140
2141 /* XXX we're still at splbio() here... do we *really*
2142 need to be? */
2143
2144 /* don't ever condition on bp->b_flags & B_WRITE.
2145 * always condition on B_READ instead */
2146
2147 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
2148 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
2149 do_async, raid_addr, num_blocks,
2150 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
2151
2152 if (rc) {
2153 bp->b_error = rc;
2154 bp->b_resid = bp->b_bcount;
2155 biodone(bp);
2156 /* continue loop */
2157 }
2158
2159 rf_lock_mutex2(raidPtr->mutex);
2160 }
2161 rf_unlock_mutex2(raidPtr->mutex);
2162 }
2163
2164
2165
2166
2167 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
2168
2169 int
2170 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
2171 {
2172 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2173 struct buf *bp;
2174
2175 req->queue = queue;
2176 bp = req->bp;
2177
2178 switch (req->type) {
2179 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2180 /* XXX need to do something extra here.. */
2181 /* I'm leaving this in, as I've never actually seen it used,
2182 * and I'd like folks to report it... GO */
2183 printf(("WAKEUP CALLED\n"));
2184 queue->numOutstanding++;
2185
2186 bp->b_flags = 0;
2187 bp->b_private = req;
2188
2189 KernelWakeupFunc(bp);
2190 break;
2191
2192 case RF_IO_TYPE_READ:
2193 case RF_IO_TYPE_WRITE:
2194 #if RF_ACC_TRACE > 0
2195 if (req->tracerec) {
2196 RF_ETIMER_START(req->tracerec->timer);
2197 }
2198 #endif
2199 InitBP(bp, queue->rf_cinfo->ci_vp,
2200 op, queue->rf_cinfo->ci_dev,
2201 req->sectorOffset, req->numSector,
2202 req->buf, KernelWakeupFunc, (void *) req,
2203 queue->raidPtr->logBytesPerSector, req->b_proc);
2204
2205 if (rf_debugKernelAccess) {
2206 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2207 (long) bp->b_blkno));
2208 }
2209 queue->numOutstanding++;
2210 queue->last_deq_sector = req->sectorOffset;
2211 /* acc wouldn't have been let in if there were any pending
2212 * reqs at any other priority */
2213 queue->curPriority = req->priority;
2214
2215 db1_printf(("Going for %c to unit %d col %d\n",
2216 req->type, queue->raidPtr->raidid,
2217 queue->col));
2218 db1_printf(("sector %d count %d (%d bytes) %d\n",
2219 (int) req->sectorOffset, (int) req->numSector,
2220 (int) (req->numSector <<
2221 queue->raidPtr->logBytesPerSector),
2222 (int) queue->raidPtr->logBytesPerSector));
2223
2224 /*
2225 * XXX: drop lock here since this can block at
2226 * least with backing SCSI devices. Retake it
2227 * to minimize fuss with calling interfaces.
2228 */
2229
2230 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2231 bdev_strategy(bp);
2232 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2233 break;
2234
2235 default:
2236 panic("bad req->type in rf_DispatchKernelIO");
2237 }
2238 db1_printf(("Exiting from DispatchKernelIO\n"));
2239
2240 return (0);
2241 }
2242 /* this is the callback function associated with a I/O invoked from
2243 kernel code.
2244 */
2245 static void
2246 KernelWakeupFunc(struct buf *bp)
2247 {
2248 RF_DiskQueueData_t *req = NULL;
2249 RF_DiskQueue_t *queue;
2250
2251 db1_printf(("recovering the request queue:\n"));
2252
2253 req = bp->b_private;
2254
2255 queue = (RF_DiskQueue_t *) req->queue;
2256
2257 rf_lock_mutex2(queue->raidPtr->iodone_lock);
2258
2259 #if RF_ACC_TRACE > 0
2260 if (req->tracerec) {
2261 RF_ETIMER_STOP(req->tracerec->timer);
2262 RF_ETIMER_EVAL(req->tracerec->timer);
2263 rf_lock_mutex2(rf_tracing_mutex);
2264 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2265 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2266 req->tracerec->num_phys_ios++;
2267 rf_unlock_mutex2(rf_tracing_mutex);
2268 }
2269 #endif
2270
2271 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2272 * ballistic, and mark the component as hosed... */
2273
2274 if (bp->b_error != 0) {
2275 /* Mark the disk as dead */
2276 /* but only mark it once... */
2277 /* and only if it wouldn't leave this RAID set
2278 completely broken */
2279 if (((queue->raidPtr->Disks[queue->col].status ==
2280 rf_ds_optimal) ||
2281 (queue->raidPtr->Disks[queue->col].status ==
2282 rf_ds_used_spare)) &&
2283 (queue->raidPtr->numFailures <
2284 queue->raidPtr->Layout.map->faultsTolerated)) {
2285 printf("raid%d: IO Error (%d). Marking %s as failed.\n",
2286 queue->raidPtr->raidid,
2287 bp->b_error,
2288 queue->raidPtr->Disks[queue->col].devname);
2289 queue->raidPtr->Disks[queue->col].status =
2290 rf_ds_failed;
2291 queue->raidPtr->status = rf_rs_degraded;
2292 queue->raidPtr->numFailures++;
2293 queue->raidPtr->numNewFailures++;
2294 } else { /* Disk is already dead... */
2295 /* printf("Disk already marked as dead!\n"); */
2296 }
2297
2298 }
2299
2300 /* Fill in the error value */
2301 req->error = bp->b_error;
2302
2303 /* Drop this one on the "finished" queue... */
2304 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2305
2306 /* Let the raidio thread know there is work to be done. */
2307 rf_signal_cond2(queue->raidPtr->iodone_cv);
2308
2309 rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2310 }
2311
2312
2313 /*
2314 * initialize a buf structure for doing an I/O in the kernel.
2315 */
2316 static void
2317 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2318 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2319 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2320 struct proc *b_proc)
2321 {
2322 /* bp->b_flags = B_PHYS | rw_flag; */
2323 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */
2324 bp->b_oflags = 0;
2325 bp->b_cflags = 0;
2326 bp->b_bcount = numSect << logBytesPerSector;
2327 bp->b_bufsize = bp->b_bcount;
2328 bp->b_error = 0;
2329 bp->b_dev = dev;
2330 bp->b_data = bf;
2331 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2332 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2333 if (bp->b_bcount == 0) {
2334 panic("bp->b_bcount is zero in InitBP!!");
2335 }
2336 bp->b_proc = b_proc;
2337 bp->b_iodone = cbFunc;
2338 bp->b_private = cbArg;
2339 }
2340
2341 static void
2342 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2343 struct disklabel *lp)
2344 {
2345 memset(lp, 0, sizeof(*lp));
2346
2347 /* fabricate a label... */
2348 if (raidPtr->totalSectors > UINT32_MAX)
2349 lp->d_secperunit = UINT32_MAX;
2350 else
2351 lp->d_secperunit = raidPtr->totalSectors;
2352 lp->d_secsize = raidPtr->bytesPerSector;
2353 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2354 lp->d_ntracks = 4 * raidPtr->numCol;
2355 lp->d_ncylinders = raidPtr->totalSectors /
2356 (lp->d_nsectors * lp->d_ntracks);
2357 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2358
2359 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2360 lp->d_type = DKTYPE_RAID;
2361 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2362 lp->d_rpm = 3600;
2363 lp->d_interleave = 1;
2364 lp->d_flags = 0;
2365
2366 lp->d_partitions[RAW_PART].p_offset = 0;
2367 lp->d_partitions[RAW_PART].p_size = lp->d_secperunit;
2368 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2369 lp->d_npartitions = RAW_PART + 1;
2370
2371 lp->d_magic = DISKMAGIC;
2372 lp->d_magic2 = DISKMAGIC;
2373 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2374
2375 }
2376 /*
2377 * Read the disklabel from the raid device. If one is not present, fake one
2378 * up.
2379 */
2380 static void
2381 raidgetdisklabel(dev_t dev)
2382 {
2383 int unit = raidunit(dev);
2384 struct raid_softc *rs;
2385 const char *errstring;
2386 struct disklabel *lp;
2387 struct cpu_disklabel *clp;
2388 RF_Raid_t *raidPtr;
2389
2390 if ((rs = raidget(unit)) == NULL)
2391 return;
2392
2393 lp = rs->sc_dkdev.dk_label;
2394 clp = rs->sc_dkdev.dk_cpulabel;
2395
2396 db1_printf(("Getting the disklabel...\n"));
2397
2398 memset(clp, 0, sizeof(*clp));
2399
2400 raidPtr = &rs->sc_r;
2401
2402 raidgetdefaultlabel(raidPtr, rs, lp);
2403
2404 /*
2405 * Call the generic disklabel extraction routine.
2406 */
2407 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2408 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2409 if (errstring)
2410 raidmakedisklabel(rs);
2411 else {
2412 int i;
2413 struct partition *pp;
2414
2415 /*
2416 * Sanity check whether the found disklabel is valid.
2417 *
2418 * This is necessary since total size of the raid device
2419 * may vary when an interleave is changed even though exactly
2420 * same components are used, and old disklabel may used
2421 * if that is found.
2422 */
2423 if (lp->d_secperunit < UINT32_MAX ?
2424 lp->d_secperunit != rs->sc_size :
2425 lp->d_secperunit > rs->sc_size)
2426 printf("raid%d: WARNING: %s: "
2427 "total sector size in disklabel (%ju) != "
2428 "the size of raid (%ju)\n", unit, rs->sc_xname,
2429 (uintmax_t)lp->d_secperunit,
2430 (uintmax_t)rs->sc_size);
2431 for (i = 0; i < lp->d_npartitions; i++) {
2432 pp = &lp->d_partitions[i];
2433 if (pp->p_offset + pp->p_size > rs->sc_size)
2434 printf("raid%d: WARNING: %s: end of partition `%c' "
2435 "exceeds the size of raid (%ju)\n",
2436 unit, rs->sc_xname, 'a' + i,
2437 (uintmax_t)rs->sc_size);
2438 }
2439 }
2440
2441 }
2442 /*
2443 * Take care of things one might want to take care of in the event
2444 * that a disklabel isn't present.
2445 */
2446 static void
2447 raidmakedisklabel(struct raid_softc *rs)
2448 {
2449 struct disklabel *lp = rs->sc_dkdev.dk_label;
2450 db1_printf(("Making a label..\n"));
2451
2452 /*
2453 * For historical reasons, if there's no disklabel present
2454 * the raw partition must be marked FS_BSDFFS.
2455 */
2456
2457 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2458
2459 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2460
2461 lp->d_checksum = dkcksum(lp);
2462 }
2463 /*
2464 * Wait interruptibly for an exclusive lock.
2465 *
2466 * XXX
2467 * Several drivers do this; it should be abstracted and made MP-safe.
2468 * (Hmm... where have we seen this warning before :-> GO )
2469 */
2470 static int
2471 raidlock(struct raid_softc *rs)
2472 {
2473 int error;
2474
2475 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2476 rs->sc_flags |= RAIDF_WANTED;
2477 if ((error =
2478 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2479 return (error);
2480 }
2481 rs->sc_flags |= RAIDF_LOCKED;
2482 return (0);
2483 }
2484 /*
2485 * Unlock and wake up any waiters.
2486 */
2487 static void
2488 raidunlock(struct raid_softc *rs)
2489 {
2490
2491 rs->sc_flags &= ~RAIDF_LOCKED;
2492 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2493 rs->sc_flags &= ~RAIDF_WANTED;
2494 wakeup(rs);
2495 }
2496 }
2497
2498
2499 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2500 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2501 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2502
2503 static daddr_t
2504 rf_component_info_offset(void)
2505 {
2506
2507 return RF_COMPONENT_INFO_OFFSET;
2508 }
2509
2510 static daddr_t
2511 rf_component_info_size(unsigned secsize)
2512 {
2513 daddr_t info_size;
2514
2515 KASSERT(secsize);
2516 if (secsize > RF_COMPONENT_INFO_SIZE)
2517 info_size = secsize;
2518 else
2519 info_size = RF_COMPONENT_INFO_SIZE;
2520
2521 return info_size;
2522 }
2523
2524 static daddr_t
2525 rf_parity_map_offset(RF_Raid_t *raidPtr)
2526 {
2527 daddr_t map_offset;
2528
2529 KASSERT(raidPtr->bytesPerSector);
2530 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2531 map_offset = raidPtr->bytesPerSector;
2532 else
2533 map_offset = RF_COMPONENT_INFO_SIZE;
2534 map_offset += rf_component_info_offset();
2535
2536 return map_offset;
2537 }
2538
2539 static daddr_t
2540 rf_parity_map_size(RF_Raid_t *raidPtr)
2541 {
2542 daddr_t map_size;
2543
2544 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2545 map_size = raidPtr->bytesPerSector;
2546 else
2547 map_size = RF_PARITY_MAP_SIZE;
2548
2549 return map_size;
2550 }
2551
2552 int
2553 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2554 {
2555 RF_ComponentLabel_t *clabel;
2556
2557 clabel = raidget_component_label(raidPtr, col);
2558 clabel->clean = RF_RAID_CLEAN;
2559 raidflush_component_label(raidPtr, col);
2560 return(0);
2561 }
2562
2563
2564 int
2565 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2566 {
2567 RF_ComponentLabel_t *clabel;
2568
2569 clabel = raidget_component_label(raidPtr, col);
2570 clabel->clean = RF_RAID_DIRTY;
2571 raidflush_component_label(raidPtr, col);
2572 return(0);
2573 }
2574
2575 int
2576 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2577 {
2578 KASSERT(raidPtr->bytesPerSector);
2579 return raidread_component_label(raidPtr->bytesPerSector,
2580 raidPtr->Disks[col].dev,
2581 raidPtr->raid_cinfo[col].ci_vp,
2582 &raidPtr->raid_cinfo[col].ci_label);
2583 }
2584
2585 RF_ComponentLabel_t *
2586 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2587 {
2588 return &raidPtr->raid_cinfo[col].ci_label;
2589 }
2590
2591 int
2592 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2593 {
2594 RF_ComponentLabel_t *label;
2595
2596 label = &raidPtr->raid_cinfo[col].ci_label;
2597 label->mod_counter = raidPtr->mod_counter;
2598 #ifndef RF_NO_PARITY_MAP
2599 label->parity_map_modcount = label->mod_counter;
2600 #endif
2601 return raidwrite_component_label(raidPtr->bytesPerSector,
2602 raidPtr->Disks[col].dev,
2603 raidPtr->raid_cinfo[col].ci_vp, label);
2604 }
2605
2606
2607 static int
2608 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2609 RF_ComponentLabel_t *clabel)
2610 {
2611 return raidread_component_area(dev, b_vp, clabel,
2612 sizeof(RF_ComponentLabel_t),
2613 rf_component_info_offset(),
2614 rf_component_info_size(secsize));
2615 }
2616
2617 /* ARGSUSED */
2618 static int
2619 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2620 size_t msize, daddr_t offset, daddr_t dsize)
2621 {
2622 struct buf *bp;
2623 const struct bdevsw *bdev;
2624 int error;
2625
2626 /* XXX should probably ensure that we don't try to do this if
2627 someone has changed rf_protected_sectors. */
2628
2629 if (b_vp == NULL) {
2630 /* For whatever reason, this component is not valid.
2631 Don't try to read a component label from it. */
2632 return(EINVAL);
2633 }
2634
2635 /* get a block of the appropriate size... */
2636 bp = geteblk((int)dsize);
2637 bp->b_dev = dev;
2638
2639 /* get our ducks in a row for the read */
2640 bp->b_blkno = offset / DEV_BSIZE;
2641 bp->b_bcount = dsize;
2642 bp->b_flags |= B_READ;
2643 bp->b_resid = dsize;
2644
2645 bdev = bdevsw_lookup(bp->b_dev);
2646 if (bdev == NULL)
2647 return (ENXIO);
2648 (*bdev->d_strategy)(bp);
2649
2650 error = biowait(bp);
2651
2652 if (!error) {
2653 memcpy(data, bp->b_data, msize);
2654 }
2655
2656 brelse(bp, 0);
2657 return(error);
2658 }
2659
2660
2661 static int
2662 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2663 RF_ComponentLabel_t *clabel)
2664 {
2665 return raidwrite_component_area(dev, b_vp, clabel,
2666 sizeof(RF_ComponentLabel_t),
2667 rf_component_info_offset(),
2668 rf_component_info_size(secsize), 0);
2669 }
2670
2671 /* ARGSUSED */
2672 static int
2673 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2674 size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2675 {
2676 struct buf *bp;
2677 const struct bdevsw *bdev;
2678 int error;
2679
2680 /* get a block of the appropriate size... */
2681 bp = geteblk((int)dsize);
2682 bp->b_dev = dev;
2683
2684 /* get our ducks in a row for the write */
2685 bp->b_blkno = offset / DEV_BSIZE;
2686 bp->b_bcount = dsize;
2687 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2688 bp->b_resid = dsize;
2689
2690 memset(bp->b_data, 0, dsize);
2691 memcpy(bp->b_data, data, msize);
2692
2693 bdev = bdevsw_lookup(bp->b_dev);
2694 if (bdev == NULL)
2695 return (ENXIO);
2696 (*bdev->d_strategy)(bp);
2697 if (asyncp)
2698 return 0;
2699 error = biowait(bp);
2700 brelse(bp, 0);
2701 if (error) {
2702 #if 1
2703 printf("Failed to write RAID component info!\n");
2704 #endif
2705 }
2706
2707 return(error);
2708 }
2709
2710 void
2711 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2712 {
2713 int c;
2714
2715 for (c = 0; c < raidPtr->numCol; c++) {
2716 /* Skip dead disks. */
2717 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2718 continue;
2719 /* XXXjld: what if an error occurs here? */
2720 raidwrite_component_area(raidPtr->Disks[c].dev,
2721 raidPtr->raid_cinfo[c].ci_vp, map,
2722 RF_PARITYMAP_NBYTE,
2723 rf_parity_map_offset(raidPtr),
2724 rf_parity_map_size(raidPtr), 0);
2725 }
2726 }
2727
2728 void
2729 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2730 {
2731 struct rf_paritymap_ondisk tmp;
2732 int c,first;
2733
2734 first=1;
2735 for (c = 0; c < raidPtr->numCol; c++) {
2736 /* Skip dead disks. */
2737 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2738 continue;
2739 raidread_component_area(raidPtr->Disks[c].dev,
2740 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2741 RF_PARITYMAP_NBYTE,
2742 rf_parity_map_offset(raidPtr),
2743 rf_parity_map_size(raidPtr));
2744 if (first) {
2745 memcpy(map, &tmp, sizeof(*map));
2746 first = 0;
2747 } else {
2748 rf_paritymap_merge(map, &tmp);
2749 }
2750 }
2751 }
2752
2753 void
2754 rf_markalldirty(RF_Raid_t *raidPtr)
2755 {
2756 RF_ComponentLabel_t *clabel;
2757 int sparecol;
2758 int c;
2759 int j;
2760 int scol = -1;
2761
2762 raidPtr->mod_counter++;
2763 for (c = 0; c < raidPtr->numCol; c++) {
2764 /* we don't want to touch (at all) a disk that has
2765 failed */
2766 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2767 clabel = raidget_component_label(raidPtr, c);
2768 if (clabel->status == rf_ds_spared) {
2769 /* XXX do something special...
2770 but whatever you do, don't
2771 try to access it!! */
2772 } else {
2773 raidmarkdirty(raidPtr, c);
2774 }
2775 }
2776 }
2777
2778 for( c = 0; c < raidPtr->numSpare ; c++) {
2779 sparecol = raidPtr->numCol + c;
2780 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2781 /*
2782
2783 we claim this disk is "optimal" if it's
2784 rf_ds_used_spare, as that means it should be
2785 directly substitutable for the disk it replaced.
2786 We note that too...
2787
2788 */
2789
2790 for(j=0;j<raidPtr->numCol;j++) {
2791 if (raidPtr->Disks[j].spareCol == sparecol) {
2792 scol = j;
2793 break;
2794 }
2795 }
2796
2797 clabel = raidget_component_label(raidPtr, sparecol);
2798 /* make sure status is noted */
2799
2800 raid_init_component_label(raidPtr, clabel);
2801
2802 clabel->row = 0;
2803 clabel->column = scol;
2804 /* Note: we *don't* change status from rf_ds_used_spare
2805 to rf_ds_optimal */
2806 /* clabel.status = rf_ds_optimal; */
2807
2808 raidmarkdirty(raidPtr, sparecol);
2809 }
2810 }
2811 }
2812
2813
2814 void
2815 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2816 {
2817 RF_ComponentLabel_t *clabel;
2818 int sparecol;
2819 int c;
2820 int j;
2821 int scol;
2822
2823 scol = -1;
2824
2825 /* XXX should do extra checks to make sure things really are clean,
2826 rather than blindly setting the clean bit... */
2827
2828 raidPtr->mod_counter++;
2829
2830 for (c = 0; c < raidPtr->numCol; c++) {
2831 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2832 clabel = raidget_component_label(raidPtr, c);
2833 /* make sure status is noted */
2834 clabel->status = rf_ds_optimal;
2835
2836 /* note what unit we are configured as */
2837 clabel->last_unit = raidPtr->raidid;
2838
2839 raidflush_component_label(raidPtr, c);
2840 if (final == RF_FINAL_COMPONENT_UPDATE) {
2841 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2842 raidmarkclean(raidPtr, c);
2843 }
2844 }
2845 }
2846 /* else we don't touch it.. */
2847 }
2848
2849 for( c = 0; c < raidPtr->numSpare ; c++) {
2850 sparecol = raidPtr->numCol + c;
2851 /* Need to ensure that the reconstruct actually completed! */
2852 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2853 /*
2854
2855 we claim this disk is "optimal" if it's
2856 rf_ds_used_spare, as that means it should be
2857 directly substitutable for the disk it replaced.
2858 We note that too...
2859
2860 */
2861
2862 for(j=0;j<raidPtr->numCol;j++) {
2863 if (raidPtr->Disks[j].spareCol == sparecol) {
2864 scol = j;
2865 break;
2866 }
2867 }
2868
2869 /* XXX shouldn't *really* need this... */
2870 clabel = raidget_component_label(raidPtr, sparecol);
2871 /* make sure status is noted */
2872
2873 raid_init_component_label(raidPtr, clabel);
2874
2875 clabel->column = scol;
2876 clabel->status = rf_ds_optimal;
2877 clabel->last_unit = raidPtr->raidid;
2878
2879 raidflush_component_label(raidPtr, sparecol);
2880 if (final == RF_FINAL_COMPONENT_UPDATE) {
2881 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2882 raidmarkclean(raidPtr, sparecol);
2883 }
2884 }
2885 }
2886 }
2887 }
2888
2889 void
2890 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2891 {
2892
2893 if (vp != NULL) {
2894 if (auto_configured == 1) {
2895 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2896 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2897 vput(vp);
2898
2899 } else {
2900 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2901 }
2902 }
2903 }
2904
2905
2906 void
2907 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2908 {
2909 int r,c;
2910 struct vnode *vp;
2911 int acd;
2912
2913
2914 /* We take this opportunity to close the vnodes like we should.. */
2915
2916 for (c = 0; c < raidPtr->numCol; c++) {
2917 vp = raidPtr->raid_cinfo[c].ci_vp;
2918 acd = raidPtr->Disks[c].auto_configured;
2919 rf_close_component(raidPtr, vp, acd);
2920 raidPtr->raid_cinfo[c].ci_vp = NULL;
2921 raidPtr->Disks[c].auto_configured = 0;
2922 }
2923
2924 for (r = 0; r < raidPtr->numSpare; r++) {
2925 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2926 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2927 rf_close_component(raidPtr, vp, acd);
2928 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2929 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2930 }
2931 }
2932
2933
2934 void
2935 rf_ReconThread(struct rf_recon_req *req)
2936 {
2937 int s;
2938 RF_Raid_t *raidPtr;
2939
2940 s = splbio();
2941 raidPtr = (RF_Raid_t *) req->raidPtr;
2942 raidPtr->recon_in_progress = 1;
2943
2944 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2945 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2946
2947 RF_Free(req, sizeof(*req));
2948
2949 raidPtr->recon_in_progress = 0;
2950 splx(s);
2951
2952 /* That's all... */
2953 kthread_exit(0); /* does not return */
2954 }
2955
2956 void
2957 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2958 {
2959 int retcode;
2960 int s;
2961
2962 raidPtr->parity_rewrite_stripes_done = 0;
2963 raidPtr->parity_rewrite_in_progress = 1;
2964 s = splbio();
2965 retcode = rf_RewriteParity(raidPtr);
2966 splx(s);
2967 if (retcode) {
2968 printf("raid%d: Error re-writing parity (%d)!\n",
2969 raidPtr->raidid, retcode);
2970 } else {
2971 /* set the clean bit! If we shutdown correctly,
2972 the clean bit on each component label will get
2973 set */
2974 raidPtr->parity_good = RF_RAID_CLEAN;
2975 }
2976 raidPtr->parity_rewrite_in_progress = 0;
2977
2978 /* Anyone waiting for us to stop? If so, inform them... */
2979 if (raidPtr->waitShutdown) {
2980 wakeup(&raidPtr->parity_rewrite_in_progress);
2981 }
2982
2983 /* That's all... */
2984 kthread_exit(0); /* does not return */
2985 }
2986
2987
2988 void
2989 rf_CopybackThread(RF_Raid_t *raidPtr)
2990 {
2991 int s;
2992
2993 raidPtr->copyback_in_progress = 1;
2994 s = splbio();
2995 rf_CopybackReconstructedData(raidPtr);
2996 splx(s);
2997 raidPtr->copyback_in_progress = 0;
2998
2999 /* That's all... */
3000 kthread_exit(0); /* does not return */
3001 }
3002
3003
3004 void
3005 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
3006 {
3007 int s;
3008 RF_Raid_t *raidPtr;
3009
3010 s = splbio();
3011 raidPtr = req->raidPtr;
3012 raidPtr->recon_in_progress = 1;
3013 rf_ReconstructInPlace(raidPtr, req->col);
3014 RF_Free(req, sizeof(*req));
3015 raidPtr->recon_in_progress = 0;
3016 splx(s);
3017
3018 /* That's all... */
3019 kthread_exit(0); /* does not return */
3020 }
3021
3022 static RF_AutoConfig_t *
3023 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
3024 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
3025 unsigned secsize)
3026 {
3027 int good_one = 0;
3028 RF_ComponentLabel_t *clabel;
3029 RF_AutoConfig_t *ac;
3030
3031 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
3032 if (clabel == NULL) {
3033 oomem:
3034 while(ac_list) {
3035 ac = ac_list;
3036 if (ac->clabel)
3037 free(ac->clabel, M_RAIDFRAME);
3038 ac_list = ac_list->next;
3039 free(ac, M_RAIDFRAME);
3040 }
3041 printf("RAID auto config: out of memory!\n");
3042 return NULL; /* XXX probably should panic? */
3043 }
3044
3045 if (!raidread_component_label(secsize, dev, vp, clabel)) {
3046 /* Got the label. Does it look reasonable? */
3047 if (rf_reasonable_label(clabel, numsecs) &&
3048 (rf_component_label_partitionsize(clabel) <= size)) {
3049 #ifdef DEBUG
3050 printf("Component on: %s: %llu\n",
3051 cname, (unsigned long long)size);
3052 rf_print_component_label(clabel);
3053 #endif
3054 /* if it's reasonable, add it, else ignore it. */
3055 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
3056 M_NOWAIT);
3057 if (ac == NULL) {
3058 free(clabel, M_RAIDFRAME);
3059 goto oomem;
3060 }
3061 strlcpy(ac->devname, cname, sizeof(ac->devname));
3062 ac->dev = dev;
3063 ac->vp = vp;
3064 ac->clabel = clabel;
3065 ac->next = ac_list;
3066 ac_list = ac;
3067 good_one = 1;
3068 }
3069 }
3070 if (!good_one) {
3071 /* cleanup */
3072 free(clabel, M_RAIDFRAME);
3073 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3074 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3075 vput(vp);
3076 }
3077 return ac_list;
3078 }
3079
3080 RF_AutoConfig_t *
3081 rf_find_raid_components(void)
3082 {
3083 struct vnode *vp;
3084 struct disklabel label;
3085 device_t dv;
3086 deviter_t di;
3087 dev_t dev;
3088 int bmajor, bminor, wedge, rf_part_found;
3089 int error;
3090 int i;
3091 RF_AutoConfig_t *ac_list;
3092 uint64_t numsecs;
3093 unsigned secsize;
3094
3095 /* initialize the AutoConfig list */
3096 ac_list = NULL;
3097
3098 /* we begin by trolling through *all* the devices on the system */
3099
3100 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
3101 dv = deviter_next(&di)) {
3102
3103 /* we are only interested in disks... */
3104 if (device_class(dv) != DV_DISK)
3105 continue;
3106
3107 /* we don't care about floppies... */
3108 if (device_is_a(dv, "fd")) {
3109 continue;
3110 }
3111
3112 /* we don't care about CD's... */
3113 if (device_is_a(dv, "cd")) {
3114 continue;
3115 }
3116
3117 /* we don't care about md's... */
3118 if (device_is_a(dv, "md")) {
3119 continue;
3120 }
3121
3122 /* hdfd is the Atari/Hades floppy driver */
3123 if (device_is_a(dv, "hdfd")) {
3124 continue;
3125 }
3126
3127 /* fdisa is the Atari/Milan floppy driver */
3128 if (device_is_a(dv, "fdisa")) {
3129 continue;
3130 }
3131
3132 /* need to find the device_name_to_block_device_major stuff */
3133 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
3134
3135 rf_part_found = 0; /*No raid partition as yet*/
3136
3137 /* get a vnode for the raw partition of this disk */
3138
3139 wedge = device_is_a(dv, "dk");
3140 bminor = minor(device_unit(dv));
3141 dev = wedge ? makedev(bmajor, bminor) :
3142 MAKEDISKDEV(bmajor, bminor, RAW_PART);
3143 if (bdevvp(dev, &vp))
3144 panic("RAID can't alloc vnode");
3145
3146 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
3147
3148 if (error) {
3149 /* "Who cares." Continue looking
3150 for something that exists*/
3151 vput(vp);
3152 continue;
3153 }
3154
3155 error = getdisksize(vp, &numsecs, &secsize);
3156 if (error) {
3157 vput(vp);
3158 continue;
3159 }
3160 if (wedge) {
3161 struct dkwedge_info dkw;
3162 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
3163 NOCRED);
3164 if (error) {
3165 printf("RAIDframe: can't get wedge info for "
3166 "dev %s (%d)\n", device_xname(dv), error);
3167 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3168 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3169 vput(vp);
3170 continue;
3171 }
3172
3173 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
3174 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3175 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3176 vput(vp);
3177 continue;
3178 }
3179
3180 ac_list = rf_get_component(ac_list, dev, vp,
3181 device_xname(dv), dkw.dkw_size, numsecs, secsize);
3182 rf_part_found = 1; /*There is a raid component on this disk*/
3183 continue;
3184 }
3185
3186 /* Ok, the disk exists. Go get the disklabel. */
3187 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
3188 if (error) {
3189 /*
3190 * XXX can't happen - open() would
3191 * have errored out (or faked up one)
3192 */
3193 if (error != ENOTTY)
3194 printf("RAIDframe: can't get label for dev "
3195 "%s (%d)\n", device_xname(dv), error);
3196 }
3197
3198 /* don't need this any more. We'll allocate it again
3199 a little later if we really do... */
3200 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3201 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3202 vput(vp);
3203
3204 if (error)
3205 continue;
3206
3207 rf_part_found = 0; /*No raid partitions yet*/
3208 for (i = 0; i < label.d_npartitions; i++) {
3209 char cname[sizeof(ac_list->devname)];
3210
3211 /* We only support partitions marked as RAID */
3212 if (label.d_partitions[i].p_fstype != FS_RAID)
3213 continue;
3214
3215 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
3216 if (bdevvp(dev, &vp))
3217 panic("RAID can't alloc vnode");
3218
3219 error = VOP_OPEN(vp, FREAD, NOCRED);
3220 if (error) {
3221 /* Whatever... */
3222 vput(vp);
3223 continue;
3224 }
3225 snprintf(cname, sizeof(cname), "%s%c",
3226 device_xname(dv), 'a' + i);
3227 ac_list = rf_get_component(ac_list, dev, vp, cname,
3228 label.d_partitions[i].p_size, numsecs, secsize);
3229 rf_part_found = 1; /*There is at least one raid partition on this disk*/
3230 }
3231
3232 /*
3233 *If there is no raid component on this disk, either in a
3234 *disklabel or inside a wedge, check the raw partition as well,
3235 *as it is possible to configure raid components on raw disk
3236 *devices.
3237 */
3238
3239 if (!rf_part_found) {
3240 char cname[sizeof(ac_list->devname)];
3241
3242 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
3243 if (bdevvp(dev, &vp))
3244 panic("RAID can't alloc vnode");
3245
3246 error = VOP_OPEN(vp, FREAD, NOCRED);
3247 if (error) {
3248 /* Whatever... */
3249 vput(vp);
3250 continue;
3251 }
3252 snprintf(cname, sizeof(cname), "%s%c",
3253 device_xname(dv), 'a' + RAW_PART);
3254 ac_list = rf_get_component(ac_list, dev, vp, cname,
3255 label.d_partitions[RAW_PART].p_size, numsecs, secsize);
3256 }
3257 }
3258 deviter_release(&di);
3259 return ac_list;
3260 }
3261
3262
3263 int
3264 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3265 {
3266
3267 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
3268 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
3269 ((clabel->clean == RF_RAID_CLEAN) ||
3270 (clabel->clean == RF_RAID_DIRTY)) &&
3271 clabel->row >=0 &&
3272 clabel->column >= 0 &&
3273 clabel->num_rows > 0 &&
3274 clabel->num_columns > 0 &&
3275 clabel->row < clabel->num_rows &&
3276 clabel->column < clabel->num_columns &&
3277 clabel->blockSize > 0 &&
3278 /*
3279 * numBlocksHi may contain garbage, but it is ok since
3280 * the type is unsigned. If it is really garbage,
3281 * rf_fix_old_label_size() will fix it.
3282 */
3283 rf_component_label_numblocks(clabel) > 0) {
3284 /*
3285 * label looks reasonable enough...
3286 * let's make sure it has no old garbage.
3287 */
3288 if (numsecs)
3289 rf_fix_old_label_size(clabel, numsecs);
3290 return(1);
3291 }
3292 return(0);
3293 }
3294
3295
3296 /*
3297 * For reasons yet unknown, some old component labels have garbage in
3298 * the newer numBlocksHi region, and this causes lossage. Since those
3299 * disks will also have numsecs set to less than 32 bits of sectors,
3300 * we can determine when this corruption has occurred, and fix it.
3301 *
3302 * The exact same problem, with the same unknown reason, happens to
3303 * the partitionSizeHi member as well.
3304 */
3305 static void
3306 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3307 {
3308
3309 if (numsecs < ((uint64_t)1 << 32)) {
3310 if (clabel->numBlocksHi) {
3311 printf("WARNING: total sectors < 32 bits, yet "
3312 "numBlocksHi set\n"
3313 "WARNING: resetting numBlocksHi to zero.\n");
3314 clabel->numBlocksHi = 0;
3315 }
3316
3317 if (clabel->partitionSizeHi) {
3318 printf("WARNING: total sectors < 32 bits, yet "
3319 "partitionSizeHi set\n"
3320 "WARNING: resetting partitionSizeHi to zero.\n");
3321 clabel->partitionSizeHi = 0;
3322 }
3323 }
3324 }
3325
3326
3327 #ifdef DEBUG
3328 void
3329 rf_print_component_label(RF_ComponentLabel_t *clabel)
3330 {
3331 uint64_t numBlocks;
3332 static const char *rp[] = {
3333 "No", "Force", "Soft", "*invalid*"
3334 };
3335
3336
3337 numBlocks = rf_component_label_numblocks(clabel);
3338
3339 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3340 clabel->row, clabel->column,
3341 clabel->num_rows, clabel->num_columns);
3342 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3343 clabel->version, clabel->serial_number,
3344 clabel->mod_counter);
3345 printf(" Clean: %s Status: %d\n",
3346 clabel->clean ? "Yes" : "No", clabel->status);
3347 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3348 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3349 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3350 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3351 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3352 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]);
3353 printf(" Last configured as: raid%d\n", clabel->last_unit);
3354 #if 0
3355 printf(" Config order: %d\n", clabel->config_order);
3356 #endif
3357
3358 }
3359 #endif
3360
3361 RF_ConfigSet_t *
3362 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3363 {
3364 RF_AutoConfig_t *ac;
3365 RF_ConfigSet_t *config_sets;
3366 RF_ConfigSet_t *cset;
3367 RF_AutoConfig_t *ac_next;
3368
3369
3370 config_sets = NULL;
3371
3372 /* Go through the AutoConfig list, and figure out which components
3373 belong to what sets. */
3374 ac = ac_list;
3375 while(ac!=NULL) {
3376 /* we're going to putz with ac->next, so save it here
3377 for use at the end of the loop */
3378 ac_next = ac->next;
3379
3380 if (config_sets == NULL) {
3381 /* will need at least this one... */
3382 config_sets = (RF_ConfigSet_t *)
3383 malloc(sizeof(RF_ConfigSet_t),
3384 M_RAIDFRAME, M_NOWAIT);
3385 if (config_sets == NULL) {
3386 panic("rf_create_auto_sets: No memory!");
3387 }
3388 /* this one is easy :) */
3389 config_sets->ac = ac;
3390 config_sets->next = NULL;
3391 config_sets->rootable = 0;
3392 ac->next = NULL;
3393 } else {
3394 /* which set does this component fit into? */
3395 cset = config_sets;
3396 while(cset!=NULL) {
3397 if (rf_does_it_fit(cset, ac)) {
3398 /* looks like it matches... */
3399 ac->next = cset->ac;
3400 cset->ac = ac;
3401 break;
3402 }
3403 cset = cset->next;
3404 }
3405 if (cset==NULL) {
3406 /* didn't find a match above... new set..*/
3407 cset = (RF_ConfigSet_t *)
3408 malloc(sizeof(RF_ConfigSet_t),
3409 M_RAIDFRAME, M_NOWAIT);
3410 if (cset == NULL) {
3411 panic("rf_create_auto_sets: No memory!");
3412 }
3413 cset->ac = ac;
3414 ac->next = NULL;
3415 cset->next = config_sets;
3416 cset->rootable = 0;
3417 config_sets = cset;
3418 }
3419 }
3420 ac = ac_next;
3421 }
3422
3423
3424 return(config_sets);
3425 }
3426
3427 static int
3428 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3429 {
3430 RF_ComponentLabel_t *clabel1, *clabel2;
3431
3432 /* If this one matches the *first* one in the set, that's good
3433 enough, since the other members of the set would have been
3434 through here too... */
3435 /* note that we are not checking partitionSize here..
3436
3437 Note that we are also not checking the mod_counters here.
3438 If everything else matches except the mod_counter, that's
3439 good enough for this test. We will deal with the mod_counters
3440 a little later in the autoconfiguration process.
3441
3442 (clabel1->mod_counter == clabel2->mod_counter) &&
3443
3444 The reason we don't check for this is that failed disks
3445 will have lower modification counts. If those disks are
3446 not added to the set they used to belong to, then they will
3447 form their own set, which may result in 2 different sets,
3448 for example, competing to be configured at raid0, and
3449 perhaps competing to be the root filesystem set. If the
3450 wrong ones get configured, or both attempt to become /,
3451 weird behaviour and or serious lossage will occur. Thus we
3452 need to bring them into the fold here, and kick them out at
3453 a later point.
3454
3455 */
3456
3457 clabel1 = cset->ac->clabel;
3458 clabel2 = ac->clabel;
3459 if ((clabel1->version == clabel2->version) &&
3460 (clabel1->serial_number == clabel2->serial_number) &&
3461 (clabel1->num_rows == clabel2->num_rows) &&
3462 (clabel1->num_columns == clabel2->num_columns) &&
3463 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3464 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3465 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3466 (clabel1->parityConfig == clabel2->parityConfig) &&
3467 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3468 (clabel1->blockSize == clabel2->blockSize) &&
3469 rf_component_label_numblocks(clabel1) ==
3470 rf_component_label_numblocks(clabel2) &&
3471 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3472 (clabel1->root_partition == clabel2->root_partition) &&
3473 (clabel1->last_unit == clabel2->last_unit) &&
3474 (clabel1->config_order == clabel2->config_order)) {
3475 /* if it get's here, it almost *has* to be a match */
3476 } else {
3477 /* it's not consistent with somebody in the set..
3478 punt */
3479 return(0);
3480 }
3481 /* all was fine.. it must fit... */
3482 return(1);
3483 }
3484
3485 int
3486 rf_have_enough_components(RF_ConfigSet_t *cset)
3487 {
3488 RF_AutoConfig_t *ac;
3489 RF_AutoConfig_t *auto_config;
3490 RF_ComponentLabel_t *clabel;
3491 int c;
3492 int num_cols;
3493 int num_missing;
3494 int mod_counter;
3495 int mod_counter_found;
3496 int even_pair_failed;
3497 char parity_type;
3498
3499
3500 /* check to see that we have enough 'live' components
3501 of this set. If so, we can configure it if necessary */
3502
3503 num_cols = cset->ac->clabel->num_columns;
3504 parity_type = cset->ac->clabel->parityConfig;
3505
3506 /* XXX Check for duplicate components!?!?!? */
3507
3508 /* Determine what the mod_counter is supposed to be for this set. */
3509
3510 mod_counter_found = 0;
3511 mod_counter = 0;
3512 ac = cset->ac;
3513 while(ac!=NULL) {
3514 if (mod_counter_found==0) {
3515 mod_counter = ac->clabel->mod_counter;
3516 mod_counter_found = 1;
3517 } else {
3518 if (ac->clabel->mod_counter > mod_counter) {
3519 mod_counter = ac->clabel->mod_counter;
3520 }
3521 }
3522 ac = ac->next;
3523 }
3524
3525 num_missing = 0;
3526 auto_config = cset->ac;
3527
3528 even_pair_failed = 0;
3529 for(c=0; c<num_cols; c++) {
3530 ac = auto_config;
3531 while(ac!=NULL) {
3532 if ((ac->clabel->column == c) &&
3533 (ac->clabel->mod_counter == mod_counter)) {
3534 /* it's this one... */
3535 #ifdef DEBUG
3536 printf("Found: %s at %d\n",
3537 ac->devname,c);
3538 #endif
3539 break;
3540 }
3541 ac=ac->next;
3542 }
3543 if (ac==NULL) {
3544 /* Didn't find one here! */
3545 /* special case for RAID 1, especially
3546 where there are more than 2
3547 components (where RAIDframe treats
3548 things a little differently :( ) */
3549 if (parity_type == '1') {
3550 if (c%2 == 0) { /* even component */
3551 even_pair_failed = 1;
3552 } else { /* odd component. If
3553 we're failed, and
3554 so is the even
3555 component, it's
3556 "Good Night, Charlie" */
3557 if (even_pair_failed == 1) {
3558 return(0);
3559 }
3560 }
3561 } else {
3562 /* normal accounting */
3563 num_missing++;
3564 }
3565 }
3566 if ((parity_type == '1') && (c%2 == 1)) {
3567 /* Just did an even component, and we didn't
3568 bail.. reset the even_pair_failed flag,
3569 and go on to the next component.... */
3570 even_pair_failed = 0;
3571 }
3572 }
3573
3574 clabel = cset->ac->clabel;
3575
3576 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3577 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3578 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3579 /* XXX this needs to be made *much* more general */
3580 /* Too many failures */
3581 return(0);
3582 }
3583 /* otherwise, all is well, and we've got enough to take a kick
3584 at autoconfiguring this set */
3585 return(1);
3586 }
3587
3588 void
3589 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3590 RF_Raid_t *raidPtr)
3591 {
3592 RF_ComponentLabel_t *clabel;
3593 int i;
3594
3595 clabel = ac->clabel;
3596
3597 /* 1. Fill in the common stuff */
3598 config->numRow = clabel->num_rows = 1;
3599 config->numCol = clabel->num_columns;
3600 config->numSpare = 0; /* XXX should this be set here? */
3601 config->sectPerSU = clabel->sectPerSU;
3602 config->SUsPerPU = clabel->SUsPerPU;
3603 config->SUsPerRU = clabel->SUsPerRU;
3604 config->parityConfig = clabel->parityConfig;
3605 /* XXX... */
3606 strcpy(config->diskQueueType,"fifo");
3607 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3608 config->layoutSpecificSize = 0; /* XXX ?? */
3609
3610 while(ac!=NULL) {
3611 /* row/col values will be in range due to the checks
3612 in reasonable_label() */
3613 strcpy(config->devnames[0][ac->clabel->column],
3614 ac->devname);
3615 ac = ac->next;
3616 }
3617
3618 for(i=0;i<RF_MAXDBGV;i++) {
3619 config->debugVars[i][0] = 0;
3620 }
3621 }
3622
3623 int
3624 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3625 {
3626 RF_ComponentLabel_t *clabel;
3627 int column;
3628 int sparecol;
3629
3630 raidPtr->autoconfigure = new_value;
3631
3632 for(column=0; column<raidPtr->numCol; column++) {
3633 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3634 clabel = raidget_component_label(raidPtr, column);
3635 clabel->autoconfigure = new_value;
3636 raidflush_component_label(raidPtr, column);
3637 }
3638 }
3639 for(column = 0; column < raidPtr->numSpare ; column++) {
3640 sparecol = raidPtr->numCol + column;
3641 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3642 clabel = raidget_component_label(raidPtr, sparecol);
3643 clabel->autoconfigure = new_value;
3644 raidflush_component_label(raidPtr, sparecol);
3645 }
3646 }
3647 return(new_value);
3648 }
3649
3650 int
3651 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3652 {
3653 RF_ComponentLabel_t *clabel;
3654 int column;
3655 int sparecol;
3656
3657 raidPtr->root_partition = new_value;
3658 for(column=0; column<raidPtr->numCol; column++) {
3659 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3660 clabel = raidget_component_label(raidPtr, column);
3661 clabel->root_partition = new_value;
3662 raidflush_component_label(raidPtr, column);
3663 }
3664 }
3665 for(column = 0; column < raidPtr->numSpare ; column++) {
3666 sparecol = raidPtr->numCol + column;
3667 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3668 clabel = raidget_component_label(raidPtr, sparecol);
3669 clabel->root_partition = new_value;
3670 raidflush_component_label(raidPtr, sparecol);
3671 }
3672 }
3673 return(new_value);
3674 }
3675
3676 void
3677 rf_release_all_vps(RF_ConfigSet_t *cset)
3678 {
3679 RF_AutoConfig_t *ac;
3680
3681 ac = cset->ac;
3682 while(ac!=NULL) {
3683 /* Close the vp, and give it back */
3684 if (ac->vp) {
3685 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3686 VOP_CLOSE(ac->vp, FREAD, NOCRED);
3687 vput(ac->vp);
3688 ac->vp = NULL;
3689 }
3690 ac = ac->next;
3691 }
3692 }
3693
3694
3695 void
3696 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3697 {
3698 RF_AutoConfig_t *ac;
3699 RF_AutoConfig_t *next_ac;
3700
3701 ac = cset->ac;
3702 while(ac!=NULL) {
3703 next_ac = ac->next;
3704 /* nuke the label */
3705 free(ac->clabel, M_RAIDFRAME);
3706 /* cleanup the config structure */
3707 free(ac, M_RAIDFRAME);
3708 /* "next.." */
3709 ac = next_ac;
3710 }
3711 /* and, finally, nuke the config set */
3712 free(cset, M_RAIDFRAME);
3713 }
3714
3715
3716 void
3717 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3718 {
3719 /* current version number */
3720 clabel->version = RF_COMPONENT_LABEL_VERSION;
3721 clabel->serial_number = raidPtr->serial_number;
3722 clabel->mod_counter = raidPtr->mod_counter;
3723
3724 clabel->num_rows = 1;
3725 clabel->num_columns = raidPtr->numCol;
3726 clabel->clean = RF_RAID_DIRTY; /* not clean */
3727 clabel->status = rf_ds_optimal; /* "It's good!" */
3728
3729 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3730 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3731 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3732
3733 clabel->blockSize = raidPtr->bytesPerSector;
3734 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3735
3736 /* XXX not portable */
3737 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3738 clabel->maxOutstanding = raidPtr->maxOutstanding;
3739 clabel->autoconfigure = raidPtr->autoconfigure;
3740 clabel->root_partition = raidPtr->root_partition;
3741 clabel->last_unit = raidPtr->raidid;
3742 clabel->config_order = raidPtr->config_order;
3743
3744 #ifndef RF_NO_PARITY_MAP
3745 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3746 #endif
3747 }
3748
3749 struct raid_softc *
3750 rf_auto_config_set(RF_ConfigSet_t *cset)
3751 {
3752 RF_Raid_t *raidPtr;
3753 RF_Config_t *config;
3754 int raidID;
3755 struct raid_softc *sc;
3756
3757 #ifdef DEBUG
3758 printf("RAID autoconfigure\n");
3759 #endif
3760
3761 /* 1. Create a config structure */
3762 config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO);
3763 if (config == NULL) {
3764 printf("Out of mem!?!?\n");
3765 /* XXX do something more intelligent here. */
3766 return NULL;
3767 }
3768
3769 /*
3770 2. Figure out what RAID ID this one is supposed to live at
3771 See if we can get the same RAID dev that it was configured
3772 on last time..
3773 */
3774
3775 raidID = cset->ac->clabel->last_unit;
3776 for (sc = raidget(raidID); sc->sc_r.valid != 0; sc = raidget(++raidID))
3777 continue;
3778 #ifdef DEBUG
3779 printf("Configuring raid%d:\n",raidID);
3780 #endif
3781
3782 raidPtr = &sc->sc_r;
3783
3784 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3785 raidPtr->softc = sc;
3786 raidPtr->raidid = raidID;
3787 raidPtr->openings = RAIDOUTSTANDING;
3788
3789 /* 3. Build the configuration structure */
3790 rf_create_configuration(cset->ac, config, raidPtr);
3791
3792 /* 4. Do the configuration */
3793 if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3794 raidinit(sc);
3795
3796 rf_markalldirty(raidPtr);
3797 raidPtr->autoconfigure = 1; /* XXX do this here? */
3798 switch (cset->ac->clabel->root_partition) {
3799 case 1: /* Force Root */
3800 case 2: /* Soft Root: root when boot partition part of raid */
3801 /*
3802 * everything configured just fine. Make a note
3803 * that this set is eligible to be root,
3804 * or forced to be root
3805 */
3806 cset->rootable = cset->ac->clabel->root_partition;
3807 /* XXX do this here? */
3808 raidPtr->root_partition = cset->rootable;
3809 break;
3810 default:
3811 break;
3812 }
3813 } else {
3814 raidput(sc);
3815 sc = NULL;
3816 }
3817
3818 /* 5. Cleanup */
3819 free(config, M_RAIDFRAME);
3820 return sc;
3821 }
3822
3823 void
3824 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3825 {
3826 struct buf *bp;
3827 struct raid_softc *rs;
3828
3829 bp = (struct buf *)desc->bp;
3830 rs = desc->raidPtr->softc;
3831 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid),
3832 (bp->b_flags & B_READ));
3833 }
3834
3835 void
3836 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3837 size_t xmin, size_t xmax)
3838 {
3839 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3840 pool_sethiwat(p, xmax);
3841 pool_prime(p, xmin);
3842 pool_setlowat(p, xmin);
3843 }
3844
3845 /*
3846 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buf_queue to see
3847 * if there is IO pending and if that IO could possibly be done for a
3848 * given RAID set. Returns 0 if IO is waiting and can be done, 1
3849 * otherwise.
3850 *
3851 */
3852
3853 int
3854 rf_buf_queue_check(RF_Raid_t *raidPtr)
3855 {
3856 struct raid_softc *rs = raidPtr->softc;
3857 if ((bufq_peek(rs->buf_queue) != NULL) && raidPtr->openings > 0) {
3858 /* there is work to do */
3859 return 0;
3860 }
3861 /* default is nothing to do */
3862 return 1;
3863 }
3864
3865 int
3866 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3867 {
3868 uint64_t numsecs;
3869 unsigned secsize;
3870 int error;
3871
3872 error = getdisksize(vp, &numsecs, &secsize);
3873 if (error == 0) {
3874 diskPtr->blockSize = secsize;
3875 diskPtr->numBlocks = numsecs - rf_protectedSectors;
3876 diskPtr->partitionSize = numsecs;
3877 return 0;
3878 }
3879 return error;
3880 }
3881
3882 static int
3883 raid_match(device_t self, cfdata_t cfdata, void *aux)
3884 {
3885 return 1;
3886 }
3887
3888 static void
3889 raid_attach(device_t parent, device_t self, void *aux)
3890 {
3891
3892 }
3893
3894
3895 static int
3896 raid_detach(device_t self, int flags)
3897 {
3898 int error;
3899 struct raid_softc *rs = raidget(device_unit(self));
3900
3901 if (rs == NULL)
3902 return ENXIO;
3903
3904 if ((error = raidlock(rs)) != 0)
3905 return (error);
3906
3907 error = raid_detach_unlocked(rs);
3908
3909 raidunlock(rs);
3910
3911 /* XXXkd: raidput(rs) ??? */
3912
3913 return error;
3914 }
3915
3916 static void
3917 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
3918 {
3919 struct disk_geom *dg = &rs->sc_dkdev.dk_geom;
3920
3921 memset(dg, 0, sizeof(*dg));
3922
3923 dg->dg_secperunit = raidPtr->totalSectors;
3924 dg->dg_secsize = raidPtr->bytesPerSector;
3925 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
3926 dg->dg_ntracks = 4 * raidPtr->numCol;
3927
3928 disk_set_info(rs->sc_dev, &rs->sc_dkdev, NULL);
3929 }
3930
3931 /*
3932 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3933 * We end up returning whatever error was returned by the first cache flush
3934 * that fails.
3935 */
3936
3937 int
3938 rf_sync_component_caches(RF_Raid_t *raidPtr)
3939 {
3940 int c, sparecol;
3941 int e,error;
3942 int force = 1;
3943
3944 error = 0;
3945 for (c = 0; c < raidPtr->numCol; c++) {
3946 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3947 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3948 &force, FWRITE, NOCRED);
3949 if (e) {
3950 if (e != ENODEV)
3951 printf("raid%d: cache flush to component %s failed.\n",
3952 raidPtr->raidid, raidPtr->Disks[c].devname);
3953 if (error == 0) {
3954 error = e;
3955 }
3956 }
3957 }
3958 }
3959
3960 for( c = 0; c < raidPtr->numSpare ; c++) {
3961 sparecol = raidPtr->numCol + c;
3962 /* Need to ensure that the reconstruct actually completed! */
3963 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3964 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3965 DIOCCACHESYNC, &force, FWRITE, NOCRED);
3966 if (e) {
3967 if (e != ENODEV)
3968 printf("raid%d: cache flush to component %s failed.\n",
3969 raidPtr->raidid, raidPtr->Disks[sparecol].devname);
3970 if (error == 0) {
3971 error = e;
3972 }
3973 }
3974 }
3975 }
3976 return error;
3977 }
3978