rf_netbsdkintf.c revision 1.310 1 /* $NetBSD: rf_netbsdkintf.c,v 1.310 2014/05/12 15:53:01 christos Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: cd.c 1.6 90/11/28$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
89 * School of Computer Science
90 * Carnegie Mellon University
91 * Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97 /***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.310 2014/05/12 15:53:01 christos Exp $");
105
106 #ifdef _KERNEL_OPT
107 #include "opt_compat_netbsd.h"
108 #include "opt_raid_autoconfig.h"
109 #endif
110
111 #include <sys/param.h>
112 #include <sys/errno.h>
113 #include <sys/pool.h>
114 #include <sys/proc.h>
115 #include <sys/queue.h>
116 #include <sys/disk.h>
117 #include <sys/device.h>
118 #include <sys/stat.h>
119 #include <sys/ioctl.h>
120 #include <sys/fcntl.h>
121 #include <sys/systm.h>
122 #include <sys/vnode.h>
123 #include <sys/disklabel.h>
124 #include <sys/conf.h>
125 #include <sys/buf.h>
126 #include <sys/bufq.h>
127 #include <sys/reboot.h>
128 #include <sys/kauth.h>
129
130 #include <prop/proplib.h>
131
132 #include <dev/raidframe/raidframevar.h>
133 #include <dev/raidframe/raidframeio.h>
134 #include <dev/raidframe/rf_paritymap.h>
135
136 #include "rf_raid.h"
137 #include "rf_copyback.h"
138 #include "rf_dag.h"
139 #include "rf_dagflags.h"
140 #include "rf_desc.h"
141 #include "rf_diskqueue.h"
142 #include "rf_etimer.h"
143 #include "rf_general.h"
144 #include "rf_kintf.h"
145 #include "rf_options.h"
146 #include "rf_driver.h"
147 #include "rf_parityscan.h"
148 #include "rf_threadstuff.h"
149
150 #ifdef COMPAT_50
151 #include "rf_compat50.h"
152 #endif
153
154 #ifdef DEBUG
155 int rf_kdebug_level = 0;
156 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
157 #else /* DEBUG */
158 #define db1_printf(a) { }
159 #endif /* DEBUG */
160
161 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
162 static rf_declare_mutex2(rf_sparet_wait_mutex);
163 static rf_declare_cond2(rf_sparet_wait_cv);
164 static rf_declare_cond2(rf_sparet_resp_cv);
165
166 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
167 * spare table */
168 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
169 * installation process */
170 #endif
171
172 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
173
174 /* prototypes */
175 static void KernelWakeupFunc(struct buf *);
176 static void InitBP(struct buf *, struct vnode *, unsigned,
177 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
178 void *, int, struct proc *);
179 struct raid_softc;
180 static void raidinit(struct raid_softc *);
181
182 void raidattach(int);
183 static int raid_match(device_t, cfdata_t, void *);
184 static void raid_attach(device_t, device_t, void *);
185 static int raid_detach(device_t, int);
186
187 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
188 daddr_t, daddr_t);
189 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
190 daddr_t, daddr_t, int);
191
192 static int raidwrite_component_label(unsigned,
193 dev_t, struct vnode *, RF_ComponentLabel_t *);
194 static int raidread_component_label(unsigned,
195 dev_t, struct vnode *, RF_ComponentLabel_t *);
196
197
198 dev_type_open(raidopen);
199 dev_type_close(raidclose);
200 dev_type_read(raidread);
201 dev_type_write(raidwrite);
202 dev_type_ioctl(raidioctl);
203 dev_type_strategy(raidstrategy);
204 dev_type_dump(raiddump);
205 dev_type_size(raidsize);
206
207 const struct bdevsw raid_bdevsw = {
208 .d_open = raidopen,
209 .d_close = raidclose,
210 .d_strategy = raidstrategy,
211 .d_ioctl = raidioctl,
212 .d_dump = raiddump,
213 .d_psize = raidsize,
214 .d_flag = D_DISK
215 };
216
217 const struct cdevsw raid_cdevsw = {
218 .d_open = raidopen,
219 .d_close = raidclose,
220 .d_read = raidread,
221 .d_write = raidwrite,
222 .d_ioctl = raidioctl,
223 .d_stop = nostop,
224 .d_tty = notty,
225 .d_poll = nopoll,
226 .d_mmap = nommap,
227 .d_kqfilter = nokqfilter,
228 .d_flag = D_DISK
229 };
230
231 static struct dkdriver rf_dkdriver = { raidstrategy, minphys };
232
233 struct raid_softc {
234 device_t sc_dev;
235 int sc_unit;
236 int sc_flags; /* flags */
237 int sc_cflags; /* configuration flags */
238 uint64_t sc_size; /* size of the raid device */
239 char sc_xname[20]; /* XXX external name */
240 struct disk sc_dkdev; /* generic disk device info */
241 struct bufq_state *buf_queue; /* used for the device queue */
242 RF_Raid_t sc_r;
243 LIST_ENTRY(raid_softc) sc_link;
244 };
245 /* sc_flags */
246 #define RAIDF_INITED 0x01 /* unit has been initialized */
247 #define RAIDF_WLABEL 0x02 /* label area is writable */
248 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
249 #define RAIDF_SHUTDOWN 0x08 /* unit is being shutdown */
250 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
251 #define RAIDF_LOCKED 0x80 /* unit is locked */
252
253 #define raidunit(x) DISKUNIT(x)
254
255 extern struct cfdriver raid_cd;
256 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
257 raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
258 DVF_DETACH_SHUTDOWN);
259
260 /*
261 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
262 * Be aware that large numbers can allow the driver to consume a lot of
263 * kernel memory, especially on writes, and in degraded mode reads.
264 *
265 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
266 * a single 64K write will typically require 64K for the old data,
267 * 64K for the old parity, and 64K for the new parity, for a total
268 * of 192K (if the parity buffer is not re-used immediately).
269 * Even it if is used immediately, that's still 128K, which when multiplied
270 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
271 *
272 * Now in degraded mode, for example, a 64K read on the above setup may
273 * require data reconstruction, which will require *all* of the 4 remaining
274 * disks to participate -- 4 * 32K/disk == 128K again.
275 */
276
277 #ifndef RAIDOUTSTANDING
278 #define RAIDOUTSTANDING 6
279 #endif
280
281 #define RAIDLABELDEV(dev) \
282 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
283
284 /* declared here, and made public, for the benefit of KVM stuff.. */
285
286 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
287 struct disklabel *);
288 static void raidgetdisklabel(dev_t);
289 static void raidmakedisklabel(struct raid_softc *);
290
291 static int raidlock(struct raid_softc *);
292 static void raidunlock(struct raid_softc *);
293
294 static int raid_detach_unlocked(struct raid_softc *);
295
296 static void rf_markalldirty(RF_Raid_t *);
297 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
298
299 void rf_ReconThread(struct rf_recon_req *);
300 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
301 void rf_CopybackThread(RF_Raid_t *raidPtr);
302 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
303 int rf_autoconfig(device_t);
304 void rf_buildroothack(RF_ConfigSet_t *);
305
306 RF_AutoConfig_t *rf_find_raid_components(void);
307 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
308 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
309 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
310 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
311 int rf_set_autoconfig(RF_Raid_t *, int);
312 int rf_set_rootpartition(RF_Raid_t *, int);
313 void rf_release_all_vps(RF_ConfigSet_t *);
314 void rf_cleanup_config_set(RF_ConfigSet_t *);
315 int rf_have_enough_components(RF_ConfigSet_t *);
316 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
317 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
318
319 /*
320 * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
321 * Note that this is overridden by having RAID_AUTOCONFIG as an option
322 * in the kernel config file.
323 */
324 #ifdef RAID_AUTOCONFIG
325 int raidautoconfig = 1;
326 #else
327 int raidautoconfig = 0;
328 #endif
329 static bool raidautoconfigdone = false;
330
331 struct RF_Pools_s rf_pools;
332
333 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
334 static kmutex_t raid_lock;
335
336 static struct raid_softc *
337 raidcreate(int unit) {
338 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
339 if (sc == NULL) {
340 #ifdef DIAGNOSTIC
341 printf("%s: out of memory\n", __func__);
342 #endif
343 return NULL;
344 }
345 sc->sc_unit = unit;
346 bufq_alloc(&sc->buf_queue, "fcfs", BUFQ_SORT_RAWBLOCK);
347 return sc;
348 }
349
350 static void
351 raiddestroy(struct raid_softc *sc) {
352 bufq_free(sc->buf_queue);
353 kmem_free(sc, sizeof(*sc));
354 }
355
356 static struct raid_softc *
357 raidget(int unit) {
358 struct raid_softc *sc;
359 if (unit < 0) {
360 #ifdef DIAGNOSTIC
361 panic("%s: unit %d!", __func__, unit);
362 #endif
363 return NULL;
364 }
365 mutex_enter(&raid_lock);
366 LIST_FOREACH(sc, &raids, sc_link) {
367 if (sc->sc_unit == unit) {
368 mutex_exit(&raid_lock);
369 return sc;
370 }
371 }
372 mutex_exit(&raid_lock);
373 if ((sc = raidcreate(unit)) == NULL)
374 return NULL;
375 mutex_enter(&raid_lock);
376 LIST_INSERT_HEAD(&raids, sc, sc_link);
377 mutex_exit(&raid_lock);
378 return sc;
379 }
380
381 static void
382 raidput(struct raid_softc *sc) {
383 mutex_enter(&raid_lock);
384 LIST_REMOVE(sc, sc_link);
385 mutex_exit(&raid_lock);
386 raiddestroy(sc);
387 }
388
389 void
390 raidattach(int num)
391 {
392 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
393 /* This is where all the initialization stuff gets done. */
394
395 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
396 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
397 rf_init_cond2(rf_sparet_wait_cv, "sparetw");
398 rf_init_cond2(rf_sparet_resp_cv, "rfgst");
399
400 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
401 #endif
402
403 if (rf_BootRaidframe() == 0)
404 aprint_verbose("Kernelized RAIDframe activated\n");
405 else
406 panic("Serious error booting RAID!!");
407
408 if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) {
409 aprint_error("raidattach: config_cfattach_attach failed?\n");
410 }
411
412 raidautoconfigdone = false;
413
414 /*
415 * Register a finalizer which will be used to auto-config RAID
416 * sets once all real hardware devices have been found.
417 */
418 if (config_finalize_register(NULL, rf_autoconfig) != 0)
419 aprint_error("WARNING: unable to register RAIDframe finalizer\n");
420 }
421
422 int
423 rf_autoconfig(device_t self)
424 {
425 RF_AutoConfig_t *ac_list;
426 RF_ConfigSet_t *config_sets;
427
428 if (!raidautoconfig || raidautoconfigdone == true)
429 return (0);
430
431 /* XXX This code can only be run once. */
432 raidautoconfigdone = true;
433
434 #ifdef __HAVE_CPU_BOOTCONF
435 /*
436 * 0. find the boot device if needed first so we can use it later
437 * this needs to be done before we autoconfigure any raid sets,
438 * because if we use wedges we are not going to be able to open
439 * the boot device later
440 */
441 if (booted_device == NULL)
442 cpu_bootconf();
443 #endif
444 /* 1. locate all RAID components on the system */
445 aprint_debug("Searching for RAID components...\n");
446 ac_list = rf_find_raid_components();
447
448 /* 2. Sort them into their respective sets. */
449 config_sets = rf_create_auto_sets(ac_list);
450
451 /*
452 * 3. Evaluate each set and configure the valid ones.
453 * This gets done in rf_buildroothack().
454 */
455 rf_buildroothack(config_sets);
456
457 return 1;
458 }
459
460 static int
461 rf_containsboot(RF_Raid_t *r, device_t bdv) {
462 const char *bootname = device_xname(bdv);
463 size_t len = strlen(bootname);
464
465 for (int col = 0; col < r->numCol; col++) {
466 const char *devname = r->Disks[col].devname;
467 devname += sizeof("/dev/") - 1;
468 if (strncmp(devname, "dk", 2) == 0) {
469 const char *parent =
470 dkwedge_get_parent_name(r->Disks[col].dev);
471 if (parent != NULL)
472 devname = parent;
473 }
474 if (strncmp(devname, bootname, len) == 0) {
475 struct raid_softc *sc = r->softc;
476 aprint_debug("raid%d includes boot device %s\n",
477 sc->sc_unit, devname);
478 return 1;
479 }
480 }
481 return 0;
482 }
483
484 void
485 rf_buildroothack(RF_ConfigSet_t *config_sets)
486 {
487 RF_ConfigSet_t *cset;
488 RF_ConfigSet_t *next_cset;
489 int num_root;
490 struct raid_softc *sc, *rsc;
491
492 sc = rsc = NULL;
493 num_root = 0;
494 cset = config_sets;
495 while (cset != NULL) {
496 next_cset = cset->next;
497 if (rf_have_enough_components(cset) &&
498 cset->ac->clabel->autoconfigure == 1) {
499 sc = rf_auto_config_set(cset);
500 if (sc != NULL) {
501 aprint_debug("raid%d: configured ok\n",
502 sc->sc_unit);
503 if (cset->rootable) {
504 rsc = sc;
505 num_root++;
506 }
507 } else {
508 /* The autoconfig didn't work :( */
509 aprint_debug("Autoconfig failed\n");
510 rf_release_all_vps(cset);
511 }
512 } else {
513 /* we're not autoconfiguring this set...
514 release the associated resources */
515 rf_release_all_vps(cset);
516 }
517 /* cleanup */
518 rf_cleanup_config_set(cset);
519 cset = next_cset;
520 }
521
522 /* if the user has specified what the root device should be
523 then we don't touch booted_device or boothowto... */
524
525 if (rootspec != NULL)
526 return;
527
528 /* we found something bootable... */
529
530 /*
531 * XXX: The following code assumes that the root raid
532 * is the first ('a') partition. This is about the best
533 * we can do with a BSD disklabel, but we might be able
534 * to do better with a GPT label, by setting a specified
535 * attribute to indicate the root partition. We can then
536 * stash the partition number in the r->root_partition
537 * high bits (the bottom 2 bits are already used). For
538 * now we just set booted_partition to 0 when we override
539 * root.
540 */
541 if (num_root == 1) {
542 device_t candidate_root;
543 if (rsc->sc_dkdev.dk_nwedges != 0) {
544 char cname[sizeof(cset->ac->devname)];
545 /* XXX: assume 'a' */
546 snprintf(cname, sizeof(cname), "%s%c",
547 device_xname(rsc->sc_dev), 'a');
548 candidate_root = dkwedge_find_by_wname(cname);
549 } else
550 candidate_root = rsc->sc_dev;
551 if (booted_device == NULL ||
552 rsc->sc_r.root_partition == 1 ||
553 rf_containsboot(&rsc->sc_r, booted_device)) {
554 booted_device = candidate_root;
555 booted_partition = 0; /* XXX assume 'a' */
556 }
557 } else if (num_root > 1) {
558
559 /*
560 * Maybe the MD code can help. If it cannot, then
561 * setroot() will discover that we have no
562 * booted_device and will ask the user if nothing was
563 * hardwired in the kernel config file
564 */
565 if (booted_device == NULL)
566 return;
567
568 num_root = 0;
569 mutex_enter(&raid_lock);
570 LIST_FOREACH(sc, &raids, sc_link) {
571 RF_Raid_t *r = &sc->sc_r;
572 if (r->valid == 0)
573 continue;
574
575 if (r->root_partition == 0)
576 continue;
577
578 if (rf_containsboot(r, booted_device)) {
579 num_root++;
580 rsc = sc;
581 }
582 }
583 mutex_exit(&raid_lock);
584
585 if (num_root == 1) {
586 booted_device = rsc->sc_dev;
587 booted_partition = 0; /* XXX assume 'a' */
588 } else {
589 /* we can't guess.. require the user to answer... */
590 boothowto |= RB_ASKNAME;
591 }
592 }
593 }
594
595
596 int
597 raidsize(dev_t dev)
598 {
599 struct raid_softc *rs;
600 struct disklabel *lp;
601 int part, unit, omask, size;
602
603 unit = raidunit(dev);
604 if ((rs = raidget(unit)) == NULL)
605 return -1;
606 if ((rs->sc_flags & RAIDF_INITED) == 0)
607 return (-1);
608
609 part = DISKPART(dev);
610 omask = rs->sc_dkdev.dk_openmask & (1 << part);
611 lp = rs->sc_dkdev.dk_label;
612
613 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
614 return (-1);
615
616 if (lp->d_partitions[part].p_fstype != FS_SWAP)
617 size = -1;
618 else
619 size = lp->d_partitions[part].p_size *
620 (lp->d_secsize / DEV_BSIZE);
621
622 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
623 return (-1);
624
625 return (size);
626
627 }
628
629 int
630 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
631 {
632 int unit = raidunit(dev);
633 struct raid_softc *rs;
634 const struct bdevsw *bdev;
635 struct disklabel *lp;
636 RF_Raid_t *raidPtr;
637 daddr_t offset;
638 int part, c, sparecol, j, scol, dumpto;
639 int error = 0;
640
641 if ((rs = raidget(unit)) == NULL)
642 return ENXIO;
643
644 raidPtr = &rs->sc_r;
645
646 if ((rs->sc_flags & RAIDF_INITED) == 0)
647 return ENXIO;
648
649 /* we only support dumping to RAID 1 sets */
650 if (raidPtr->Layout.numDataCol != 1 ||
651 raidPtr->Layout.numParityCol != 1)
652 return EINVAL;
653
654
655 if ((error = raidlock(rs)) != 0)
656 return error;
657
658 if (size % DEV_BSIZE != 0) {
659 error = EINVAL;
660 goto out;
661 }
662
663 if (blkno + size / DEV_BSIZE > rs->sc_size) {
664 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
665 "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
666 size / DEV_BSIZE, rs->sc_size);
667 error = EINVAL;
668 goto out;
669 }
670
671 part = DISKPART(dev);
672 lp = rs->sc_dkdev.dk_label;
673 offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
674
675 /* figure out what device is alive.. */
676
677 /*
678 Look for a component to dump to. The preference for the
679 component to dump to is as follows:
680 1) the master
681 2) a used_spare of the master
682 3) the slave
683 4) a used_spare of the slave
684 */
685
686 dumpto = -1;
687 for (c = 0; c < raidPtr->numCol; c++) {
688 if (raidPtr->Disks[c].status == rf_ds_optimal) {
689 /* this might be the one */
690 dumpto = c;
691 break;
692 }
693 }
694
695 /*
696 At this point we have possibly selected a live master or a
697 live slave. We now check to see if there is a spared
698 master (or a spared slave), if we didn't find a live master
699 or a live slave.
700 */
701
702 for (c = 0; c < raidPtr->numSpare; c++) {
703 sparecol = raidPtr->numCol + c;
704 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
705 /* How about this one? */
706 scol = -1;
707 for(j=0;j<raidPtr->numCol;j++) {
708 if (raidPtr->Disks[j].spareCol == sparecol) {
709 scol = j;
710 break;
711 }
712 }
713 if (scol == 0) {
714 /*
715 We must have found a spared master!
716 We'll take that over anything else
717 found so far. (We couldn't have
718 found a real master before, since
719 this is a used spare, and it's
720 saying that it's replacing the
721 master.) On reboot (with
722 autoconfiguration turned on)
723 sparecol will become the 1st
724 component (component0) of this set.
725 */
726 dumpto = sparecol;
727 break;
728 } else if (scol != -1) {
729 /*
730 Must be a spared slave. We'll dump
731 to that if we havn't found anything
732 else so far.
733 */
734 if (dumpto == -1)
735 dumpto = sparecol;
736 }
737 }
738 }
739
740 if (dumpto == -1) {
741 /* we couldn't find any live components to dump to!?!?
742 */
743 error = EINVAL;
744 goto out;
745 }
746
747 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
748
749 /*
750 Note that blkno is relative to this particular partition.
751 By adding the offset of this partition in the RAID
752 set, and also adding RF_PROTECTED_SECTORS, we get a
753 value that is relative to the partition used for the
754 underlying component.
755 */
756
757 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
758 blkno + offset, va, size);
759
760 out:
761 raidunlock(rs);
762
763 return error;
764 }
765 /* ARGSUSED */
766 int
767 raidopen(dev_t dev, int flags, int fmt,
768 struct lwp *l)
769 {
770 int unit = raidunit(dev);
771 struct raid_softc *rs;
772 struct disklabel *lp;
773 int part, pmask;
774 int error = 0;
775
776 if ((rs = raidget(unit)) == NULL)
777 return ENXIO;
778 if ((error = raidlock(rs)) != 0)
779 return (error);
780
781 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
782 error = EBUSY;
783 goto bad;
784 }
785
786 lp = rs->sc_dkdev.dk_label;
787
788 part = DISKPART(dev);
789
790 /*
791 * If there are wedges, and this is not RAW_PART, then we
792 * need to fail.
793 */
794 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
795 error = EBUSY;
796 goto bad;
797 }
798 pmask = (1 << part);
799
800 if ((rs->sc_flags & RAIDF_INITED) &&
801 (rs->sc_dkdev.dk_openmask == 0))
802 raidgetdisklabel(dev);
803
804 /* make sure that this partition exists */
805
806 if (part != RAW_PART) {
807 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
808 ((part >= lp->d_npartitions) ||
809 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
810 error = ENXIO;
811 goto bad;
812 }
813 }
814 /* Prevent this unit from being unconfigured while open. */
815 switch (fmt) {
816 case S_IFCHR:
817 rs->sc_dkdev.dk_copenmask |= pmask;
818 break;
819
820 case S_IFBLK:
821 rs->sc_dkdev.dk_bopenmask |= pmask;
822 break;
823 }
824
825 if ((rs->sc_dkdev.dk_openmask == 0) &&
826 ((rs->sc_flags & RAIDF_INITED) != 0)) {
827 /* First one... mark things as dirty... Note that we *MUST*
828 have done a configure before this. I DO NOT WANT TO BE
829 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
830 THAT THEY BELONG TOGETHER!!!!! */
831 /* XXX should check to see if we're only open for reading
832 here... If so, we needn't do this, but then need some
833 other way of keeping track of what's happened.. */
834
835 rf_markalldirty(&rs->sc_r);
836 }
837
838
839 rs->sc_dkdev.dk_openmask =
840 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
841
842 bad:
843 raidunlock(rs);
844
845 return (error);
846
847
848 }
849 /* ARGSUSED */
850 int
851 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
852 {
853 int unit = raidunit(dev);
854 struct raid_softc *rs;
855 int error = 0;
856 int part;
857
858 if ((rs = raidget(unit)) == NULL)
859 return ENXIO;
860
861 if ((error = raidlock(rs)) != 0)
862 return (error);
863
864 part = DISKPART(dev);
865
866 /* ...that much closer to allowing unconfiguration... */
867 switch (fmt) {
868 case S_IFCHR:
869 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
870 break;
871
872 case S_IFBLK:
873 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
874 break;
875 }
876 rs->sc_dkdev.dk_openmask =
877 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
878
879 if ((rs->sc_dkdev.dk_openmask == 0) &&
880 ((rs->sc_flags & RAIDF_INITED) != 0)) {
881 /* Last one... device is not unconfigured yet.
882 Device shutdown has taken care of setting the
883 clean bits if RAIDF_INITED is not set
884 mark things as clean... */
885
886 rf_update_component_labels(&rs->sc_r,
887 RF_FINAL_COMPONENT_UPDATE);
888
889 /* If the kernel is shutting down, it will detach
890 * this RAID set soon enough.
891 */
892 }
893
894 raidunlock(rs);
895 return (0);
896
897 }
898
899 void
900 raidstrategy(struct buf *bp)
901 {
902 unsigned int unit = raidunit(bp->b_dev);
903 RF_Raid_t *raidPtr;
904 int wlabel;
905 struct raid_softc *rs;
906
907 if ((rs = raidget(unit)) == NULL) {
908 bp->b_error = ENXIO;
909 goto done;
910 }
911 if ((rs->sc_flags & RAIDF_INITED) == 0) {
912 bp->b_error = ENXIO;
913 goto done;
914 }
915 raidPtr = &rs->sc_r;
916 if (!raidPtr->valid) {
917 bp->b_error = ENODEV;
918 goto done;
919 }
920 if (bp->b_bcount == 0) {
921 db1_printf(("b_bcount is zero..\n"));
922 goto done;
923 }
924
925 /*
926 * Do bounds checking and adjust transfer. If there's an
927 * error, the bounds check will flag that for us.
928 */
929
930 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
931 if (DISKPART(bp->b_dev) == RAW_PART) {
932 uint64_t size; /* device size in DEV_BSIZE unit */
933
934 if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
935 size = raidPtr->totalSectors <<
936 (raidPtr->logBytesPerSector - DEV_BSHIFT);
937 } else {
938 size = raidPtr->totalSectors >>
939 (DEV_BSHIFT - raidPtr->logBytesPerSector);
940 }
941 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
942 goto done;
943 }
944 } else {
945 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
946 db1_printf(("Bounds check failed!!:%d %d\n",
947 (int) bp->b_blkno, (int) wlabel));
948 goto done;
949 }
950 }
951
952 rf_lock_mutex2(raidPtr->iodone_lock);
953
954 bp->b_resid = 0;
955
956 /* stuff it onto our queue */
957 bufq_put(rs->buf_queue, bp);
958
959 /* scheduled the IO to happen at the next convenient time */
960 rf_signal_cond2(raidPtr->iodone_cv);
961 rf_unlock_mutex2(raidPtr->iodone_lock);
962
963 return;
964
965 done:
966 bp->b_resid = bp->b_bcount;
967 biodone(bp);
968 }
969 /* ARGSUSED */
970 int
971 raidread(dev_t dev, struct uio *uio, int flags)
972 {
973 int unit = raidunit(dev);
974 struct raid_softc *rs;
975
976 if ((rs = raidget(unit)) == NULL)
977 return ENXIO;
978
979 if ((rs->sc_flags & RAIDF_INITED) == 0)
980 return (ENXIO);
981
982 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
983
984 }
985 /* ARGSUSED */
986 int
987 raidwrite(dev_t dev, struct uio *uio, int flags)
988 {
989 int unit = raidunit(dev);
990 struct raid_softc *rs;
991
992 if ((rs = raidget(unit)) == NULL)
993 return ENXIO;
994
995 if ((rs->sc_flags & RAIDF_INITED) == 0)
996 return (ENXIO);
997
998 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
999
1000 }
1001
1002 static int
1003 raid_detach_unlocked(struct raid_softc *rs)
1004 {
1005 int error;
1006 RF_Raid_t *raidPtr;
1007
1008 raidPtr = &rs->sc_r;
1009
1010 /*
1011 * If somebody has a partition mounted, we shouldn't
1012 * shutdown.
1013 */
1014 if (rs->sc_dkdev.dk_openmask != 0)
1015 return EBUSY;
1016
1017 if ((rs->sc_flags & RAIDF_INITED) == 0)
1018 ; /* not initialized: nothing to do */
1019 else if ((error = rf_Shutdown(raidPtr)) != 0)
1020 return error;
1021 else
1022 rs->sc_flags &= ~(RAIDF_INITED|RAIDF_SHUTDOWN);
1023
1024 /* Detach the disk. */
1025 dkwedge_delall(&rs->sc_dkdev);
1026 disk_detach(&rs->sc_dkdev);
1027 disk_destroy(&rs->sc_dkdev);
1028
1029 aprint_normal_dev(rs->sc_dev, "detached\n");
1030
1031 return 0;
1032 }
1033
1034 int
1035 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1036 {
1037 int unit = raidunit(dev);
1038 int error = 0;
1039 int part, pmask, s;
1040 cfdata_t cf;
1041 struct raid_softc *rs;
1042 RF_Config_t *k_cfg, *u_cfg;
1043 RF_Raid_t *raidPtr;
1044 RF_RaidDisk_t *diskPtr;
1045 RF_AccTotals_t *totals;
1046 RF_DeviceConfig_t *d_cfg, **ucfgp;
1047 u_char *specific_buf;
1048 int retcode = 0;
1049 int column;
1050 /* int raidid; */
1051 struct rf_recon_req *rrcopy, *rr;
1052 RF_ComponentLabel_t *clabel;
1053 RF_ComponentLabel_t *ci_label;
1054 RF_ComponentLabel_t **clabel_ptr;
1055 RF_SingleComponent_t *sparePtr,*componentPtr;
1056 RF_SingleComponent_t component;
1057 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
1058 int i, j, d;
1059 #ifdef __HAVE_OLD_DISKLABEL
1060 struct disklabel newlabel;
1061 #endif
1062 struct dkwedge_info *dkw;
1063
1064 if ((rs = raidget(unit)) == NULL)
1065 return ENXIO;
1066 raidPtr = &rs->sc_r;
1067
1068 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1069 (int) DISKPART(dev), (int) unit, cmd));
1070
1071 /* Must be open for writes for these commands... */
1072 switch (cmd) {
1073 #ifdef DIOCGSECTORSIZE
1074 case DIOCGSECTORSIZE:
1075 *(u_int *)data = raidPtr->bytesPerSector;
1076 return 0;
1077 case DIOCGMEDIASIZE:
1078 *(off_t *)data =
1079 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
1080 return 0;
1081 #endif
1082 case DIOCSDINFO:
1083 case DIOCWDINFO:
1084 #ifdef __HAVE_OLD_DISKLABEL
1085 case ODIOCWDINFO:
1086 case ODIOCSDINFO:
1087 #endif
1088 case DIOCWLABEL:
1089 case DIOCAWEDGE:
1090 case DIOCDWEDGE:
1091 case DIOCSSTRATEGY:
1092 if ((flag & FWRITE) == 0)
1093 return (EBADF);
1094 }
1095
1096 /* Must be initialized for these... */
1097 switch (cmd) {
1098 case DIOCGDINFO:
1099 case DIOCSDINFO:
1100 case DIOCWDINFO:
1101 #ifdef __HAVE_OLD_DISKLABEL
1102 case ODIOCGDINFO:
1103 case ODIOCWDINFO:
1104 case ODIOCSDINFO:
1105 case ODIOCGDEFLABEL:
1106 #endif
1107 case DIOCGPART:
1108 case DIOCWLABEL:
1109 case DIOCGDEFLABEL:
1110 case DIOCAWEDGE:
1111 case DIOCDWEDGE:
1112 case DIOCLWEDGES:
1113 case DIOCCACHESYNC:
1114 case RAIDFRAME_SHUTDOWN:
1115 case RAIDFRAME_REWRITEPARITY:
1116 case RAIDFRAME_GET_INFO:
1117 case RAIDFRAME_RESET_ACCTOTALS:
1118 case RAIDFRAME_GET_ACCTOTALS:
1119 case RAIDFRAME_KEEP_ACCTOTALS:
1120 case RAIDFRAME_GET_SIZE:
1121 case RAIDFRAME_FAIL_DISK:
1122 case RAIDFRAME_COPYBACK:
1123 case RAIDFRAME_CHECK_RECON_STATUS:
1124 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1125 case RAIDFRAME_GET_COMPONENT_LABEL:
1126 case RAIDFRAME_SET_COMPONENT_LABEL:
1127 case RAIDFRAME_ADD_HOT_SPARE:
1128 case RAIDFRAME_REMOVE_HOT_SPARE:
1129 case RAIDFRAME_INIT_LABELS:
1130 case RAIDFRAME_REBUILD_IN_PLACE:
1131 case RAIDFRAME_CHECK_PARITY:
1132 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1133 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1134 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1135 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1136 case RAIDFRAME_SET_AUTOCONFIG:
1137 case RAIDFRAME_SET_ROOT:
1138 case RAIDFRAME_DELETE_COMPONENT:
1139 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1140 case RAIDFRAME_PARITYMAP_STATUS:
1141 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1142 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1143 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1144 case DIOCGSTRATEGY:
1145 case DIOCSSTRATEGY:
1146 if ((rs->sc_flags & RAIDF_INITED) == 0)
1147 return (ENXIO);
1148 }
1149
1150 switch (cmd) {
1151 #ifdef COMPAT_50
1152 case RAIDFRAME_GET_INFO50:
1153 return rf_get_info50(raidPtr, data);
1154
1155 case RAIDFRAME_CONFIGURE50:
1156 if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0)
1157 return retcode;
1158 goto config;
1159 #endif
1160 /* configure the system */
1161 case RAIDFRAME_CONFIGURE:
1162
1163 if (raidPtr->valid) {
1164 /* There is a valid RAID set running on this unit! */
1165 printf("raid%d: Device already configured!\n",unit);
1166 return(EINVAL);
1167 }
1168
1169 /* copy-in the configuration information */
1170 /* data points to a pointer to the configuration structure */
1171
1172 u_cfg = *((RF_Config_t **) data);
1173 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1174 if (k_cfg == NULL) {
1175 return (ENOMEM);
1176 }
1177 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1178 if (retcode) {
1179 RF_Free(k_cfg, sizeof(RF_Config_t));
1180 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1181 retcode));
1182 return (retcode);
1183 }
1184 goto config;
1185 config:
1186 /* allocate a buffer for the layout-specific data, and copy it
1187 * in */
1188 if (k_cfg->layoutSpecificSize) {
1189 if (k_cfg->layoutSpecificSize > 10000) {
1190 /* sanity check */
1191 RF_Free(k_cfg, sizeof(RF_Config_t));
1192 return (EINVAL);
1193 }
1194 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1195 (u_char *));
1196 if (specific_buf == NULL) {
1197 RF_Free(k_cfg, sizeof(RF_Config_t));
1198 return (ENOMEM);
1199 }
1200 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1201 k_cfg->layoutSpecificSize);
1202 if (retcode) {
1203 RF_Free(k_cfg, sizeof(RF_Config_t));
1204 RF_Free(specific_buf,
1205 k_cfg->layoutSpecificSize);
1206 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1207 retcode));
1208 return (retcode);
1209 }
1210 } else
1211 specific_buf = NULL;
1212 k_cfg->layoutSpecific = specific_buf;
1213
1214 /* should do some kind of sanity check on the configuration.
1215 * Store the sum of all the bytes in the last byte? */
1216
1217 /* configure the system */
1218
1219 /*
1220 * Clear the entire RAID descriptor, just to make sure
1221 * there is no stale data left in the case of a
1222 * reconfiguration
1223 */
1224 memset(raidPtr, 0, sizeof(*raidPtr));
1225 raidPtr->softc = rs;
1226 raidPtr->raidid = unit;
1227
1228 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1229
1230 if (retcode == 0) {
1231
1232 /* allow this many simultaneous IO's to
1233 this RAID device */
1234 raidPtr->openings = RAIDOUTSTANDING;
1235
1236 raidinit(rs);
1237 rf_markalldirty(raidPtr);
1238 }
1239 /* free the buffers. No return code here. */
1240 if (k_cfg->layoutSpecificSize) {
1241 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1242 }
1243 RF_Free(k_cfg, sizeof(RF_Config_t));
1244
1245 return (retcode);
1246
1247 /* shutdown the system */
1248 case RAIDFRAME_SHUTDOWN:
1249
1250 part = DISKPART(dev);
1251 pmask = (1 << part);
1252
1253 if ((error = raidlock(rs)) != 0)
1254 return (error);
1255
1256 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
1257 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1258 (rs->sc_dkdev.dk_copenmask & pmask)))
1259 retcode = EBUSY;
1260 else {
1261 rs->sc_flags |= RAIDF_SHUTDOWN;
1262 rs->sc_dkdev.dk_copenmask &= ~pmask;
1263 rs->sc_dkdev.dk_bopenmask &= ~pmask;
1264 rs->sc_dkdev.dk_openmask &= ~pmask;
1265 retcode = 0;
1266 }
1267
1268 raidunlock(rs);
1269
1270 if (retcode != 0)
1271 return retcode;
1272
1273 /* free the pseudo device attach bits */
1274
1275 cf = device_cfdata(rs->sc_dev);
1276 if ((retcode = config_detach(rs->sc_dev, DETACH_QUIET)) == 0)
1277 free(cf, M_RAIDFRAME);
1278
1279 return (retcode);
1280 case RAIDFRAME_GET_COMPONENT_LABEL:
1281 clabel_ptr = (RF_ComponentLabel_t **) data;
1282 /* need to read the component label for the disk indicated
1283 by row,column in clabel */
1284
1285 /*
1286 * Perhaps there should be an option to skip the in-core
1287 * copy and hit the disk, as with disklabel(8).
1288 */
1289 RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *));
1290
1291 retcode = copyin(*clabel_ptr, clabel, sizeof(*clabel));
1292
1293 if (retcode) {
1294 RF_Free(clabel, sizeof(*clabel));
1295 return retcode;
1296 }
1297
1298 clabel->row = 0; /* Don't allow looking at anything else.*/
1299
1300 column = clabel->column;
1301
1302 if ((column < 0) || (column >= raidPtr->numCol +
1303 raidPtr->numSpare)) {
1304 RF_Free(clabel, sizeof(*clabel));
1305 return EINVAL;
1306 }
1307
1308 RF_Free(clabel, sizeof(*clabel));
1309
1310 clabel = raidget_component_label(raidPtr, column);
1311
1312 return copyout(clabel, *clabel_ptr, sizeof(**clabel_ptr));
1313
1314 #if 0
1315 case RAIDFRAME_SET_COMPONENT_LABEL:
1316 clabel = (RF_ComponentLabel_t *) data;
1317
1318 /* XXX check the label for valid stuff... */
1319 /* Note that some things *should not* get modified --
1320 the user should be re-initing the labels instead of
1321 trying to patch things.
1322 */
1323
1324 raidid = raidPtr->raidid;
1325 #ifdef DEBUG
1326 printf("raid%d: Got component label:\n", raidid);
1327 printf("raid%d: Version: %d\n", raidid, clabel->version);
1328 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1329 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1330 printf("raid%d: Column: %d\n", raidid, clabel->column);
1331 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1332 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1333 printf("raid%d: Status: %d\n", raidid, clabel->status);
1334 #endif
1335 clabel->row = 0;
1336 column = clabel->column;
1337
1338 if ((column < 0) || (column >= raidPtr->numCol)) {
1339 return(EINVAL);
1340 }
1341
1342 /* XXX this isn't allowed to do anything for now :-) */
1343
1344 /* XXX and before it is, we need to fill in the rest
1345 of the fields!?!?!?! */
1346 memcpy(raidget_component_label(raidPtr, column),
1347 clabel, sizeof(*clabel));
1348 raidflush_component_label(raidPtr, column);
1349 return (0);
1350 #endif
1351
1352 case RAIDFRAME_INIT_LABELS:
1353 clabel = (RF_ComponentLabel_t *) data;
1354 /*
1355 we only want the serial number from
1356 the above. We get all the rest of the information
1357 from the config that was used to create this RAID
1358 set.
1359 */
1360
1361 raidPtr->serial_number = clabel->serial_number;
1362
1363 for(column=0;column<raidPtr->numCol;column++) {
1364 diskPtr = &raidPtr->Disks[column];
1365 if (!RF_DEAD_DISK(diskPtr->status)) {
1366 ci_label = raidget_component_label(raidPtr,
1367 column);
1368 /* Zeroing this is important. */
1369 memset(ci_label, 0, sizeof(*ci_label));
1370 raid_init_component_label(raidPtr, ci_label);
1371 ci_label->serial_number =
1372 raidPtr->serial_number;
1373 ci_label->row = 0; /* we dont' pretend to support more */
1374 rf_component_label_set_partitionsize(ci_label,
1375 diskPtr->partitionSize);
1376 ci_label->column = column;
1377 raidflush_component_label(raidPtr, column);
1378 }
1379 /* XXXjld what about the spares? */
1380 }
1381
1382 return (retcode);
1383 case RAIDFRAME_SET_AUTOCONFIG:
1384 d = rf_set_autoconfig(raidPtr, *(int *) data);
1385 printf("raid%d: New autoconfig value is: %d\n",
1386 raidPtr->raidid, d);
1387 *(int *) data = d;
1388 return (retcode);
1389
1390 case RAIDFRAME_SET_ROOT:
1391 d = rf_set_rootpartition(raidPtr, *(int *) data);
1392 printf("raid%d: New rootpartition value is: %d\n",
1393 raidPtr->raidid, d);
1394 *(int *) data = d;
1395 return (retcode);
1396
1397 /* initialize all parity */
1398 case RAIDFRAME_REWRITEPARITY:
1399
1400 if (raidPtr->Layout.map->faultsTolerated == 0) {
1401 /* Parity for RAID 0 is trivially correct */
1402 raidPtr->parity_good = RF_RAID_CLEAN;
1403 return(0);
1404 }
1405
1406 if (raidPtr->parity_rewrite_in_progress == 1) {
1407 /* Re-write is already in progress! */
1408 return(EINVAL);
1409 }
1410
1411 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1412 rf_RewriteParityThread,
1413 raidPtr,"raid_parity");
1414 return (retcode);
1415
1416
1417 case RAIDFRAME_ADD_HOT_SPARE:
1418 sparePtr = (RF_SingleComponent_t *) data;
1419 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1420 retcode = rf_add_hot_spare(raidPtr, &component);
1421 return(retcode);
1422
1423 case RAIDFRAME_REMOVE_HOT_SPARE:
1424 return(retcode);
1425
1426 case RAIDFRAME_DELETE_COMPONENT:
1427 componentPtr = (RF_SingleComponent_t *)data;
1428 memcpy( &component, componentPtr,
1429 sizeof(RF_SingleComponent_t));
1430 retcode = rf_delete_component(raidPtr, &component);
1431 return(retcode);
1432
1433 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1434 componentPtr = (RF_SingleComponent_t *)data;
1435 memcpy( &component, componentPtr,
1436 sizeof(RF_SingleComponent_t));
1437 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1438 return(retcode);
1439
1440 case RAIDFRAME_REBUILD_IN_PLACE:
1441
1442 if (raidPtr->Layout.map->faultsTolerated == 0) {
1443 /* Can't do this on a RAID 0!! */
1444 return(EINVAL);
1445 }
1446
1447 if (raidPtr->recon_in_progress == 1) {
1448 /* a reconstruct is already in progress! */
1449 return(EINVAL);
1450 }
1451
1452 componentPtr = (RF_SingleComponent_t *) data;
1453 memcpy( &component, componentPtr,
1454 sizeof(RF_SingleComponent_t));
1455 component.row = 0; /* we don't support any more */
1456 column = component.column;
1457
1458 if ((column < 0) || (column >= raidPtr->numCol)) {
1459 return(EINVAL);
1460 }
1461
1462 rf_lock_mutex2(raidPtr->mutex);
1463 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1464 (raidPtr->numFailures > 0)) {
1465 /* XXX 0 above shouldn't be constant!!! */
1466 /* some component other than this has failed.
1467 Let's not make things worse than they already
1468 are... */
1469 printf("raid%d: Unable to reconstruct to disk at:\n",
1470 raidPtr->raidid);
1471 printf("raid%d: Col: %d Too many failures.\n",
1472 raidPtr->raidid, column);
1473 rf_unlock_mutex2(raidPtr->mutex);
1474 return (EINVAL);
1475 }
1476 if (raidPtr->Disks[column].status ==
1477 rf_ds_reconstructing) {
1478 printf("raid%d: Unable to reconstruct to disk at:\n",
1479 raidPtr->raidid);
1480 printf("raid%d: Col: %d Reconstruction already occurring!\n", raidPtr->raidid, column);
1481
1482 rf_unlock_mutex2(raidPtr->mutex);
1483 return (EINVAL);
1484 }
1485 if (raidPtr->Disks[column].status == rf_ds_spared) {
1486 rf_unlock_mutex2(raidPtr->mutex);
1487 return (EINVAL);
1488 }
1489 rf_unlock_mutex2(raidPtr->mutex);
1490
1491 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1492 if (rrcopy == NULL)
1493 return(ENOMEM);
1494
1495 rrcopy->raidPtr = (void *) raidPtr;
1496 rrcopy->col = column;
1497
1498 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1499 rf_ReconstructInPlaceThread,
1500 rrcopy,"raid_reconip");
1501 return(retcode);
1502
1503 case RAIDFRAME_GET_INFO:
1504 if (!raidPtr->valid)
1505 return (ENODEV);
1506 ucfgp = (RF_DeviceConfig_t **) data;
1507 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1508 (RF_DeviceConfig_t *));
1509 if (d_cfg == NULL)
1510 return (ENOMEM);
1511 d_cfg->rows = 1; /* there is only 1 row now */
1512 d_cfg->cols = raidPtr->numCol;
1513 d_cfg->ndevs = raidPtr->numCol;
1514 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1515 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1516 return (ENOMEM);
1517 }
1518 d_cfg->nspares = raidPtr->numSpare;
1519 if (d_cfg->nspares >= RF_MAX_DISKS) {
1520 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1521 return (ENOMEM);
1522 }
1523 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1524 d = 0;
1525 for (j = 0; j < d_cfg->cols; j++) {
1526 d_cfg->devs[d] = raidPtr->Disks[j];
1527 d++;
1528 }
1529 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1530 d_cfg->spares[i] = raidPtr->Disks[j];
1531 }
1532 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1533 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1534
1535 return (retcode);
1536
1537 case RAIDFRAME_CHECK_PARITY:
1538 *(int *) data = raidPtr->parity_good;
1539 return (0);
1540
1541 case RAIDFRAME_PARITYMAP_STATUS:
1542 if (rf_paritymap_ineligible(raidPtr))
1543 return EINVAL;
1544 rf_paritymap_status(raidPtr->parity_map,
1545 (struct rf_pmstat *)data);
1546 return 0;
1547
1548 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1549 if (rf_paritymap_ineligible(raidPtr))
1550 return EINVAL;
1551 if (raidPtr->parity_map == NULL)
1552 return ENOENT; /* ??? */
1553 if (0 != rf_paritymap_set_params(raidPtr->parity_map,
1554 (struct rf_pmparams *)data, 1))
1555 return EINVAL;
1556 return 0;
1557
1558 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1559 if (rf_paritymap_ineligible(raidPtr))
1560 return EINVAL;
1561 *(int *) data = rf_paritymap_get_disable(raidPtr);
1562 return 0;
1563
1564 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1565 if (rf_paritymap_ineligible(raidPtr))
1566 return EINVAL;
1567 rf_paritymap_set_disable(raidPtr, *(int *)data);
1568 /* XXX should errors be passed up? */
1569 return 0;
1570
1571 case RAIDFRAME_RESET_ACCTOTALS:
1572 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1573 return (0);
1574
1575 case RAIDFRAME_GET_ACCTOTALS:
1576 totals = (RF_AccTotals_t *) data;
1577 *totals = raidPtr->acc_totals;
1578 return (0);
1579
1580 case RAIDFRAME_KEEP_ACCTOTALS:
1581 raidPtr->keep_acc_totals = *(int *)data;
1582 return (0);
1583
1584 case RAIDFRAME_GET_SIZE:
1585 *(int *) data = raidPtr->totalSectors;
1586 return (0);
1587
1588 /* fail a disk & optionally start reconstruction */
1589 case RAIDFRAME_FAIL_DISK:
1590
1591 if (raidPtr->Layout.map->faultsTolerated == 0) {
1592 /* Can't do this on a RAID 0!! */
1593 return(EINVAL);
1594 }
1595
1596 rr = (struct rf_recon_req *) data;
1597 rr->row = 0;
1598 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1599 return (EINVAL);
1600
1601
1602 rf_lock_mutex2(raidPtr->mutex);
1603 if (raidPtr->status == rf_rs_reconstructing) {
1604 /* you can't fail a disk while we're reconstructing! */
1605 /* XXX wrong for RAID6 */
1606 rf_unlock_mutex2(raidPtr->mutex);
1607 return (EINVAL);
1608 }
1609 if ((raidPtr->Disks[rr->col].status ==
1610 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1611 /* some other component has failed. Let's not make
1612 things worse. XXX wrong for RAID6 */
1613 rf_unlock_mutex2(raidPtr->mutex);
1614 return (EINVAL);
1615 }
1616 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1617 /* Can't fail a spared disk! */
1618 rf_unlock_mutex2(raidPtr->mutex);
1619 return (EINVAL);
1620 }
1621 rf_unlock_mutex2(raidPtr->mutex);
1622
1623 /* make a copy of the recon request so that we don't rely on
1624 * the user's buffer */
1625 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1626 if (rrcopy == NULL)
1627 return(ENOMEM);
1628 memcpy(rrcopy, rr, sizeof(*rr));
1629 rrcopy->raidPtr = (void *) raidPtr;
1630
1631 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1632 rf_ReconThread,
1633 rrcopy,"raid_recon");
1634 return (0);
1635
1636 /* invoke a copyback operation after recon on whatever disk
1637 * needs it, if any */
1638 case RAIDFRAME_COPYBACK:
1639
1640 if (raidPtr->Layout.map->faultsTolerated == 0) {
1641 /* This makes no sense on a RAID 0!! */
1642 return(EINVAL);
1643 }
1644
1645 if (raidPtr->copyback_in_progress == 1) {
1646 /* Copyback is already in progress! */
1647 return(EINVAL);
1648 }
1649
1650 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1651 rf_CopybackThread,
1652 raidPtr,"raid_copyback");
1653 return (retcode);
1654
1655 /* return the percentage completion of reconstruction */
1656 case RAIDFRAME_CHECK_RECON_STATUS:
1657 if (raidPtr->Layout.map->faultsTolerated == 0) {
1658 /* This makes no sense on a RAID 0, so tell the
1659 user it's done. */
1660 *(int *) data = 100;
1661 return(0);
1662 }
1663 if (raidPtr->status != rf_rs_reconstructing)
1664 *(int *) data = 100;
1665 else {
1666 if (raidPtr->reconControl->numRUsTotal > 0) {
1667 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1668 } else {
1669 *(int *) data = 0;
1670 }
1671 }
1672 return (0);
1673 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1674 progressInfoPtr = (RF_ProgressInfo_t **) data;
1675 if (raidPtr->status != rf_rs_reconstructing) {
1676 progressInfo.remaining = 0;
1677 progressInfo.completed = 100;
1678 progressInfo.total = 100;
1679 } else {
1680 progressInfo.total =
1681 raidPtr->reconControl->numRUsTotal;
1682 progressInfo.completed =
1683 raidPtr->reconControl->numRUsComplete;
1684 progressInfo.remaining = progressInfo.total -
1685 progressInfo.completed;
1686 }
1687 retcode = copyout(&progressInfo, *progressInfoPtr,
1688 sizeof(RF_ProgressInfo_t));
1689 return (retcode);
1690
1691 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1692 if (raidPtr->Layout.map->faultsTolerated == 0) {
1693 /* This makes no sense on a RAID 0, so tell the
1694 user it's done. */
1695 *(int *) data = 100;
1696 return(0);
1697 }
1698 if (raidPtr->parity_rewrite_in_progress == 1) {
1699 *(int *) data = 100 *
1700 raidPtr->parity_rewrite_stripes_done /
1701 raidPtr->Layout.numStripe;
1702 } else {
1703 *(int *) data = 100;
1704 }
1705 return (0);
1706
1707 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1708 progressInfoPtr = (RF_ProgressInfo_t **) data;
1709 if (raidPtr->parity_rewrite_in_progress == 1) {
1710 progressInfo.total = raidPtr->Layout.numStripe;
1711 progressInfo.completed =
1712 raidPtr->parity_rewrite_stripes_done;
1713 progressInfo.remaining = progressInfo.total -
1714 progressInfo.completed;
1715 } else {
1716 progressInfo.remaining = 0;
1717 progressInfo.completed = 100;
1718 progressInfo.total = 100;
1719 }
1720 retcode = copyout(&progressInfo, *progressInfoPtr,
1721 sizeof(RF_ProgressInfo_t));
1722 return (retcode);
1723
1724 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1725 if (raidPtr->Layout.map->faultsTolerated == 0) {
1726 /* This makes no sense on a RAID 0 */
1727 *(int *) data = 100;
1728 return(0);
1729 }
1730 if (raidPtr->copyback_in_progress == 1) {
1731 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1732 raidPtr->Layout.numStripe;
1733 } else {
1734 *(int *) data = 100;
1735 }
1736 return (0);
1737
1738 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1739 progressInfoPtr = (RF_ProgressInfo_t **) data;
1740 if (raidPtr->copyback_in_progress == 1) {
1741 progressInfo.total = raidPtr->Layout.numStripe;
1742 progressInfo.completed =
1743 raidPtr->copyback_stripes_done;
1744 progressInfo.remaining = progressInfo.total -
1745 progressInfo.completed;
1746 } else {
1747 progressInfo.remaining = 0;
1748 progressInfo.completed = 100;
1749 progressInfo.total = 100;
1750 }
1751 retcode = copyout(&progressInfo, *progressInfoPtr,
1752 sizeof(RF_ProgressInfo_t));
1753 return (retcode);
1754
1755 /* the sparetable daemon calls this to wait for the kernel to
1756 * need a spare table. this ioctl does not return until a
1757 * spare table is needed. XXX -- calling mpsleep here in the
1758 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1759 * -- I should either compute the spare table in the kernel,
1760 * or have a different -- XXX XXX -- interface (a different
1761 * character device) for delivering the table -- XXX */
1762 #if 0
1763 case RAIDFRAME_SPARET_WAIT:
1764 rf_lock_mutex2(rf_sparet_wait_mutex);
1765 while (!rf_sparet_wait_queue)
1766 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1767 waitreq = rf_sparet_wait_queue;
1768 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1769 rf_unlock_mutex2(rf_sparet_wait_mutex);
1770
1771 /* structure assignment */
1772 *((RF_SparetWait_t *) data) = *waitreq;
1773
1774 RF_Free(waitreq, sizeof(*waitreq));
1775 return (0);
1776
1777 /* wakes up a process waiting on SPARET_WAIT and puts an error
1778 * code in it that will cause the dameon to exit */
1779 case RAIDFRAME_ABORT_SPARET_WAIT:
1780 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1781 waitreq->fcol = -1;
1782 rf_lock_mutex2(rf_sparet_wait_mutex);
1783 waitreq->next = rf_sparet_wait_queue;
1784 rf_sparet_wait_queue = waitreq;
1785 rf_broadcast_conf2(rf_sparet_wait_cv);
1786 rf_unlock_mutex2(rf_sparet_wait_mutex);
1787 return (0);
1788
1789 /* used by the spare table daemon to deliver a spare table
1790 * into the kernel */
1791 case RAIDFRAME_SEND_SPARET:
1792
1793 /* install the spare table */
1794 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1795
1796 /* respond to the requestor. the return status of the spare
1797 * table installation is passed in the "fcol" field */
1798 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1799 waitreq->fcol = retcode;
1800 rf_lock_mutex2(rf_sparet_wait_mutex);
1801 waitreq->next = rf_sparet_resp_queue;
1802 rf_sparet_resp_queue = waitreq;
1803 rf_broadcast_cond2(rf_sparet_resp_cv);
1804 rf_unlock_mutex2(rf_sparet_wait_mutex);
1805
1806 return (retcode);
1807 #endif
1808
1809 default:
1810 break; /* fall through to the os-specific code below */
1811
1812 }
1813
1814 if (!raidPtr->valid)
1815 return (EINVAL);
1816
1817 /*
1818 * Add support for "regular" device ioctls here.
1819 */
1820
1821 error = disk_ioctl(&rs->sc_dkdev, cmd, data, flag, l);
1822 if (error != EPASSTHROUGH)
1823 return (error);
1824
1825 switch (cmd) {
1826 case DIOCGDINFO:
1827 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1828 break;
1829 #ifdef __HAVE_OLD_DISKLABEL
1830 case ODIOCGDINFO:
1831 newlabel = *(rs->sc_dkdev.dk_label);
1832 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1833 return ENOTTY;
1834 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1835 break;
1836 #endif
1837
1838 case DIOCGPART:
1839 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1840 ((struct partinfo *) data)->part =
1841 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1842 break;
1843
1844 case DIOCWDINFO:
1845 case DIOCSDINFO:
1846 #ifdef __HAVE_OLD_DISKLABEL
1847 case ODIOCWDINFO:
1848 case ODIOCSDINFO:
1849 #endif
1850 {
1851 struct disklabel *lp;
1852 #ifdef __HAVE_OLD_DISKLABEL
1853 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1854 memset(&newlabel, 0, sizeof newlabel);
1855 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1856 lp = &newlabel;
1857 } else
1858 #endif
1859 lp = (struct disklabel *)data;
1860
1861 if ((error = raidlock(rs)) != 0)
1862 return (error);
1863
1864 rs->sc_flags |= RAIDF_LABELLING;
1865
1866 error = setdisklabel(rs->sc_dkdev.dk_label,
1867 lp, 0, rs->sc_dkdev.dk_cpulabel);
1868 if (error == 0) {
1869 if (cmd == DIOCWDINFO
1870 #ifdef __HAVE_OLD_DISKLABEL
1871 || cmd == ODIOCWDINFO
1872 #endif
1873 )
1874 error = writedisklabel(RAIDLABELDEV(dev),
1875 raidstrategy, rs->sc_dkdev.dk_label,
1876 rs->sc_dkdev.dk_cpulabel);
1877 }
1878 rs->sc_flags &= ~RAIDF_LABELLING;
1879
1880 raidunlock(rs);
1881
1882 if (error)
1883 return (error);
1884 break;
1885 }
1886
1887 case DIOCWLABEL:
1888 if (*(int *) data != 0)
1889 rs->sc_flags |= RAIDF_WLABEL;
1890 else
1891 rs->sc_flags &= ~RAIDF_WLABEL;
1892 break;
1893
1894 case DIOCGDEFLABEL:
1895 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1896 break;
1897
1898 #ifdef __HAVE_OLD_DISKLABEL
1899 case ODIOCGDEFLABEL:
1900 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1901 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1902 return ENOTTY;
1903 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1904 break;
1905 #endif
1906
1907 case DIOCAWEDGE:
1908 case DIOCDWEDGE:
1909 dkw = (void *)data;
1910
1911 /* If the ioctl happens here, the parent is us. */
1912 (void)strcpy(dkw->dkw_parent, rs->sc_xname);
1913 return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw);
1914
1915 case DIOCLWEDGES:
1916 return dkwedge_list(&rs->sc_dkdev,
1917 (struct dkwedge_list *)data, l);
1918 case DIOCCACHESYNC:
1919 return rf_sync_component_caches(raidPtr);
1920
1921 case DIOCGSTRATEGY:
1922 {
1923 struct disk_strategy *dks = (void *)data;
1924
1925 s = splbio();
1926 strlcpy(dks->dks_name, bufq_getstrategyname(rs->buf_queue),
1927 sizeof(dks->dks_name));
1928 splx(s);
1929 dks->dks_paramlen = 0;
1930
1931 return 0;
1932 }
1933
1934 case DIOCSSTRATEGY:
1935 {
1936 struct disk_strategy *dks = (void *)data;
1937 struct bufq_state *new;
1938 struct bufq_state *old;
1939
1940 if (dks->dks_param != NULL) {
1941 return EINVAL;
1942 }
1943 dks->dks_name[sizeof(dks->dks_name) - 1] = 0; /* ensure term */
1944 error = bufq_alloc(&new, dks->dks_name,
1945 BUFQ_EXACT|BUFQ_SORT_RAWBLOCK);
1946 if (error) {
1947 return error;
1948 }
1949 s = splbio();
1950 old = rs->buf_queue;
1951 bufq_move(new, old);
1952 rs->buf_queue = new;
1953 splx(s);
1954 bufq_free(old);
1955
1956 return 0;
1957 }
1958
1959 default:
1960 retcode = ENOTTY;
1961 }
1962 return (retcode);
1963
1964 }
1965
1966
1967 /* raidinit -- complete the rest of the initialization for the
1968 RAIDframe device. */
1969
1970
1971 static void
1972 raidinit(struct raid_softc *rs)
1973 {
1974 cfdata_t cf;
1975 int unit;
1976 RF_Raid_t *raidPtr = &rs->sc_r;
1977
1978 unit = raidPtr->raidid;
1979
1980
1981 /* XXX should check return code first... */
1982 rs->sc_flags |= RAIDF_INITED;
1983
1984 /* XXX doesn't check bounds. */
1985 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1986
1987 /* attach the pseudo device */
1988 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1989 cf->cf_name = raid_cd.cd_name;
1990 cf->cf_atname = raid_cd.cd_name;
1991 cf->cf_unit = unit;
1992 cf->cf_fstate = FSTATE_STAR;
1993
1994 rs->sc_dev = config_attach_pseudo(cf);
1995
1996 if (rs->sc_dev == NULL) {
1997 printf("raid%d: config_attach_pseudo failed\n",
1998 raidPtr->raidid);
1999 rs->sc_flags &= ~RAIDF_INITED;
2000 free(cf, M_RAIDFRAME);
2001 return;
2002 }
2003
2004 /* disk_attach actually creates space for the CPU disklabel, among
2005 * other things, so it's critical to call this *BEFORE* we try putzing
2006 * with disklabels. */
2007
2008 disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver);
2009 disk_attach(&rs->sc_dkdev);
2010 disk_blocksize(&rs->sc_dkdev, raidPtr->bytesPerSector);
2011
2012 /* XXX There may be a weird interaction here between this, and
2013 * protectedSectors, as used in RAIDframe. */
2014
2015 rs->sc_size = raidPtr->totalSectors;
2016
2017 dkwedge_discover(&rs->sc_dkdev);
2018
2019 rf_set_geometry(rs, raidPtr);
2020
2021 }
2022 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
2023 /* wake up the daemon & tell it to get us a spare table
2024 * XXX
2025 * the entries in the queues should be tagged with the raidPtr
2026 * so that in the extremely rare case that two recons happen at once,
2027 * we know for which device were requesting a spare table
2028 * XXX
2029 *
2030 * XXX This code is not currently used. GO
2031 */
2032 int
2033 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
2034 {
2035 int retcode;
2036
2037 rf_lock_mutex2(rf_sparet_wait_mutex);
2038 req->next = rf_sparet_wait_queue;
2039 rf_sparet_wait_queue = req;
2040 rf_broadcast_cond2(rf_sparet_wait_cv);
2041
2042 /* mpsleep unlocks the mutex */
2043 while (!rf_sparet_resp_queue) {
2044 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
2045 }
2046 req = rf_sparet_resp_queue;
2047 rf_sparet_resp_queue = req->next;
2048 rf_unlock_mutex2(rf_sparet_wait_mutex);
2049
2050 retcode = req->fcol;
2051 RF_Free(req, sizeof(*req)); /* this is not the same req as we
2052 * alloc'd */
2053 return (retcode);
2054 }
2055 #endif
2056
2057 /* a wrapper around rf_DoAccess that extracts appropriate info from the
2058 * bp & passes it down.
2059 * any calls originating in the kernel must use non-blocking I/O
2060 * do some extra sanity checking to return "appropriate" error values for
2061 * certain conditions (to make some standard utilities work)
2062 *
2063 * Formerly known as: rf_DoAccessKernel
2064 */
2065 void
2066 raidstart(RF_Raid_t *raidPtr)
2067 {
2068 RF_SectorCount_t num_blocks, pb, sum;
2069 RF_RaidAddr_t raid_addr;
2070 struct partition *pp;
2071 daddr_t blocknum;
2072 struct raid_softc *rs;
2073 int do_async;
2074 struct buf *bp;
2075 int rc;
2076
2077 rs = raidPtr->softc;
2078 /* quick check to see if anything has died recently */
2079 rf_lock_mutex2(raidPtr->mutex);
2080 if (raidPtr->numNewFailures > 0) {
2081 rf_unlock_mutex2(raidPtr->mutex);
2082 rf_update_component_labels(raidPtr,
2083 RF_NORMAL_COMPONENT_UPDATE);
2084 rf_lock_mutex2(raidPtr->mutex);
2085 raidPtr->numNewFailures--;
2086 }
2087
2088 /* Check to see if we're at the limit... */
2089 while (raidPtr->openings > 0) {
2090 rf_unlock_mutex2(raidPtr->mutex);
2091
2092 /* get the next item, if any, from the queue */
2093 if ((bp = bufq_get(rs->buf_queue)) == NULL) {
2094 /* nothing more to do */
2095 return;
2096 }
2097
2098 /* Ok, for the bp we have here, bp->b_blkno is relative to the
2099 * partition.. Need to make it absolute to the underlying
2100 * device.. */
2101
2102 blocknum = bp->b_blkno << DEV_BSHIFT >> raidPtr->logBytesPerSector;
2103 if (DISKPART(bp->b_dev) != RAW_PART) {
2104 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
2105 blocknum += pp->p_offset;
2106 }
2107
2108 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
2109 (int) blocknum));
2110
2111 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
2112 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
2113
2114 /* *THIS* is where we adjust what block we're going to...
2115 * but DO NOT TOUCH bp->b_blkno!!! */
2116 raid_addr = blocknum;
2117
2118 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
2119 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
2120 sum = raid_addr + num_blocks + pb;
2121 if (1 || rf_debugKernelAccess) {
2122 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
2123 (int) raid_addr, (int) sum, (int) num_blocks,
2124 (int) pb, (int) bp->b_resid));
2125 }
2126 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
2127 || (sum < num_blocks) || (sum < pb)) {
2128 bp->b_error = ENOSPC;
2129 bp->b_resid = bp->b_bcount;
2130 biodone(bp);
2131 rf_lock_mutex2(raidPtr->mutex);
2132 continue;
2133 }
2134 /*
2135 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
2136 */
2137
2138 if (bp->b_bcount & raidPtr->sectorMask) {
2139 bp->b_error = EINVAL;
2140 bp->b_resid = bp->b_bcount;
2141 biodone(bp);
2142 rf_lock_mutex2(raidPtr->mutex);
2143 continue;
2144
2145 }
2146 db1_printf(("Calling DoAccess..\n"));
2147
2148
2149 rf_lock_mutex2(raidPtr->mutex);
2150 raidPtr->openings--;
2151 rf_unlock_mutex2(raidPtr->mutex);
2152
2153 /*
2154 * Everything is async.
2155 */
2156 do_async = 1;
2157
2158 disk_busy(&rs->sc_dkdev);
2159
2160 /* XXX we're still at splbio() here... do we *really*
2161 need to be? */
2162
2163 /* don't ever condition on bp->b_flags & B_WRITE.
2164 * always condition on B_READ instead */
2165
2166 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
2167 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
2168 do_async, raid_addr, num_blocks,
2169 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
2170
2171 if (rc) {
2172 bp->b_error = rc;
2173 bp->b_resid = bp->b_bcount;
2174 biodone(bp);
2175 /* continue loop */
2176 }
2177
2178 rf_lock_mutex2(raidPtr->mutex);
2179 }
2180 rf_unlock_mutex2(raidPtr->mutex);
2181 }
2182
2183
2184
2185
2186 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
2187
2188 int
2189 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
2190 {
2191 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2192 struct buf *bp;
2193
2194 req->queue = queue;
2195 bp = req->bp;
2196
2197 switch (req->type) {
2198 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2199 /* XXX need to do something extra here.. */
2200 /* I'm leaving this in, as I've never actually seen it used,
2201 * and I'd like folks to report it... GO */
2202 printf(("WAKEUP CALLED\n"));
2203 queue->numOutstanding++;
2204
2205 bp->b_flags = 0;
2206 bp->b_private = req;
2207
2208 KernelWakeupFunc(bp);
2209 break;
2210
2211 case RF_IO_TYPE_READ:
2212 case RF_IO_TYPE_WRITE:
2213 #if RF_ACC_TRACE > 0
2214 if (req->tracerec) {
2215 RF_ETIMER_START(req->tracerec->timer);
2216 }
2217 #endif
2218 InitBP(bp, queue->rf_cinfo->ci_vp,
2219 op, queue->rf_cinfo->ci_dev,
2220 req->sectorOffset, req->numSector,
2221 req->buf, KernelWakeupFunc, (void *) req,
2222 queue->raidPtr->logBytesPerSector, req->b_proc);
2223
2224 if (rf_debugKernelAccess) {
2225 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2226 (long) bp->b_blkno));
2227 }
2228 queue->numOutstanding++;
2229 queue->last_deq_sector = req->sectorOffset;
2230 /* acc wouldn't have been let in if there were any pending
2231 * reqs at any other priority */
2232 queue->curPriority = req->priority;
2233
2234 db1_printf(("Going for %c to unit %d col %d\n",
2235 req->type, queue->raidPtr->raidid,
2236 queue->col));
2237 db1_printf(("sector %d count %d (%d bytes) %d\n",
2238 (int) req->sectorOffset, (int) req->numSector,
2239 (int) (req->numSector <<
2240 queue->raidPtr->logBytesPerSector),
2241 (int) queue->raidPtr->logBytesPerSector));
2242
2243 /*
2244 * XXX: drop lock here since this can block at
2245 * least with backing SCSI devices. Retake it
2246 * to minimize fuss with calling interfaces.
2247 */
2248
2249 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2250 bdev_strategy(bp);
2251 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2252 break;
2253
2254 default:
2255 panic("bad req->type in rf_DispatchKernelIO");
2256 }
2257 db1_printf(("Exiting from DispatchKernelIO\n"));
2258
2259 return (0);
2260 }
2261 /* this is the callback function associated with a I/O invoked from
2262 kernel code.
2263 */
2264 static void
2265 KernelWakeupFunc(struct buf *bp)
2266 {
2267 RF_DiskQueueData_t *req = NULL;
2268 RF_DiskQueue_t *queue;
2269
2270 db1_printf(("recovering the request queue:\n"));
2271
2272 req = bp->b_private;
2273
2274 queue = (RF_DiskQueue_t *) req->queue;
2275
2276 rf_lock_mutex2(queue->raidPtr->iodone_lock);
2277
2278 #if RF_ACC_TRACE > 0
2279 if (req->tracerec) {
2280 RF_ETIMER_STOP(req->tracerec->timer);
2281 RF_ETIMER_EVAL(req->tracerec->timer);
2282 rf_lock_mutex2(rf_tracing_mutex);
2283 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2284 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2285 req->tracerec->num_phys_ios++;
2286 rf_unlock_mutex2(rf_tracing_mutex);
2287 }
2288 #endif
2289
2290 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2291 * ballistic, and mark the component as hosed... */
2292
2293 if (bp->b_error != 0) {
2294 /* Mark the disk as dead */
2295 /* but only mark it once... */
2296 /* and only if it wouldn't leave this RAID set
2297 completely broken */
2298 if (((queue->raidPtr->Disks[queue->col].status ==
2299 rf_ds_optimal) ||
2300 (queue->raidPtr->Disks[queue->col].status ==
2301 rf_ds_used_spare)) &&
2302 (queue->raidPtr->numFailures <
2303 queue->raidPtr->Layout.map->faultsTolerated)) {
2304 printf("raid%d: IO Error. Marking %s as failed.\n",
2305 queue->raidPtr->raidid,
2306 queue->raidPtr->Disks[queue->col].devname);
2307 queue->raidPtr->Disks[queue->col].status =
2308 rf_ds_failed;
2309 queue->raidPtr->status = rf_rs_degraded;
2310 queue->raidPtr->numFailures++;
2311 queue->raidPtr->numNewFailures++;
2312 } else { /* Disk is already dead... */
2313 /* printf("Disk already marked as dead!\n"); */
2314 }
2315
2316 }
2317
2318 /* Fill in the error value */
2319 req->error = bp->b_error;
2320
2321 /* Drop this one on the "finished" queue... */
2322 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2323
2324 /* Let the raidio thread know there is work to be done. */
2325 rf_signal_cond2(queue->raidPtr->iodone_cv);
2326
2327 rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2328 }
2329
2330
2331 /*
2332 * initialize a buf structure for doing an I/O in the kernel.
2333 */
2334 static void
2335 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2336 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2337 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2338 struct proc *b_proc)
2339 {
2340 /* bp->b_flags = B_PHYS | rw_flag; */
2341 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */
2342 bp->b_oflags = 0;
2343 bp->b_cflags = 0;
2344 bp->b_bcount = numSect << logBytesPerSector;
2345 bp->b_bufsize = bp->b_bcount;
2346 bp->b_error = 0;
2347 bp->b_dev = dev;
2348 bp->b_data = bf;
2349 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2350 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2351 if (bp->b_bcount == 0) {
2352 panic("bp->b_bcount is zero in InitBP!!");
2353 }
2354 bp->b_proc = b_proc;
2355 bp->b_iodone = cbFunc;
2356 bp->b_private = cbArg;
2357 }
2358
2359 static void
2360 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2361 struct disklabel *lp)
2362 {
2363 memset(lp, 0, sizeof(*lp));
2364
2365 /* fabricate a label... */
2366 lp->d_secperunit = raidPtr->totalSectors;
2367 lp->d_secsize = raidPtr->bytesPerSector;
2368 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2369 lp->d_ntracks = 4 * raidPtr->numCol;
2370 lp->d_ncylinders = raidPtr->totalSectors /
2371 (lp->d_nsectors * lp->d_ntracks);
2372 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2373
2374 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2375 lp->d_type = DTYPE_RAID;
2376 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2377 lp->d_rpm = 3600;
2378 lp->d_interleave = 1;
2379 lp->d_flags = 0;
2380
2381 lp->d_partitions[RAW_PART].p_offset = 0;
2382 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2383 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2384 lp->d_npartitions = RAW_PART + 1;
2385
2386 lp->d_magic = DISKMAGIC;
2387 lp->d_magic2 = DISKMAGIC;
2388 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2389
2390 }
2391 /*
2392 * Read the disklabel from the raid device. If one is not present, fake one
2393 * up.
2394 */
2395 static void
2396 raidgetdisklabel(dev_t dev)
2397 {
2398 int unit = raidunit(dev);
2399 struct raid_softc *rs;
2400 const char *errstring;
2401 struct disklabel *lp;
2402 struct cpu_disklabel *clp;
2403 RF_Raid_t *raidPtr;
2404
2405 if ((rs = raidget(unit)) == NULL)
2406 return;
2407
2408 lp = rs->sc_dkdev.dk_label;
2409 clp = rs->sc_dkdev.dk_cpulabel;
2410
2411 db1_printf(("Getting the disklabel...\n"));
2412
2413 memset(clp, 0, sizeof(*clp));
2414
2415 raidPtr = &rs->sc_r;
2416
2417 raidgetdefaultlabel(raidPtr, rs, lp);
2418
2419 /*
2420 * Call the generic disklabel extraction routine.
2421 */
2422 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2423 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2424 if (errstring)
2425 raidmakedisklabel(rs);
2426 else {
2427 int i;
2428 struct partition *pp;
2429
2430 /*
2431 * Sanity check whether the found disklabel is valid.
2432 *
2433 * This is necessary since total size of the raid device
2434 * may vary when an interleave is changed even though exactly
2435 * same components are used, and old disklabel may used
2436 * if that is found.
2437 */
2438 if (lp->d_secperunit != rs->sc_size)
2439 printf("raid%d: WARNING: %s: "
2440 "total sector size in disklabel (%" PRIu32 ") != "
2441 "the size of raid (%" PRIu64 ")\n", unit, rs->sc_xname,
2442 lp->d_secperunit, rs->sc_size);
2443 for (i = 0; i < lp->d_npartitions; i++) {
2444 pp = &lp->d_partitions[i];
2445 if (pp->p_offset + pp->p_size > rs->sc_size)
2446 printf("raid%d: WARNING: %s: end of partition `%c' "
2447 "exceeds the size of raid (%" PRIu64 ")\n",
2448 unit, rs->sc_xname, 'a' + i, rs->sc_size);
2449 }
2450 }
2451
2452 }
2453 /*
2454 * Take care of things one might want to take care of in the event
2455 * that a disklabel isn't present.
2456 */
2457 static void
2458 raidmakedisklabel(struct raid_softc *rs)
2459 {
2460 struct disklabel *lp = rs->sc_dkdev.dk_label;
2461 db1_printf(("Making a label..\n"));
2462
2463 /*
2464 * For historical reasons, if there's no disklabel present
2465 * the raw partition must be marked FS_BSDFFS.
2466 */
2467
2468 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2469
2470 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2471
2472 lp->d_checksum = dkcksum(lp);
2473 }
2474 /*
2475 * Wait interruptibly for an exclusive lock.
2476 *
2477 * XXX
2478 * Several drivers do this; it should be abstracted and made MP-safe.
2479 * (Hmm... where have we seen this warning before :-> GO )
2480 */
2481 static int
2482 raidlock(struct raid_softc *rs)
2483 {
2484 int error;
2485
2486 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2487 rs->sc_flags |= RAIDF_WANTED;
2488 if ((error =
2489 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2490 return (error);
2491 }
2492 rs->sc_flags |= RAIDF_LOCKED;
2493 return (0);
2494 }
2495 /*
2496 * Unlock and wake up any waiters.
2497 */
2498 static void
2499 raidunlock(struct raid_softc *rs)
2500 {
2501
2502 rs->sc_flags &= ~RAIDF_LOCKED;
2503 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2504 rs->sc_flags &= ~RAIDF_WANTED;
2505 wakeup(rs);
2506 }
2507 }
2508
2509
2510 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2511 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2512 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2513
2514 static daddr_t
2515 rf_component_info_offset(void)
2516 {
2517
2518 return RF_COMPONENT_INFO_OFFSET;
2519 }
2520
2521 static daddr_t
2522 rf_component_info_size(unsigned secsize)
2523 {
2524 daddr_t info_size;
2525
2526 KASSERT(secsize);
2527 if (secsize > RF_COMPONENT_INFO_SIZE)
2528 info_size = secsize;
2529 else
2530 info_size = RF_COMPONENT_INFO_SIZE;
2531
2532 return info_size;
2533 }
2534
2535 static daddr_t
2536 rf_parity_map_offset(RF_Raid_t *raidPtr)
2537 {
2538 daddr_t map_offset;
2539
2540 KASSERT(raidPtr->bytesPerSector);
2541 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2542 map_offset = raidPtr->bytesPerSector;
2543 else
2544 map_offset = RF_COMPONENT_INFO_SIZE;
2545 map_offset += rf_component_info_offset();
2546
2547 return map_offset;
2548 }
2549
2550 static daddr_t
2551 rf_parity_map_size(RF_Raid_t *raidPtr)
2552 {
2553 daddr_t map_size;
2554
2555 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2556 map_size = raidPtr->bytesPerSector;
2557 else
2558 map_size = RF_PARITY_MAP_SIZE;
2559
2560 return map_size;
2561 }
2562
2563 int
2564 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2565 {
2566 RF_ComponentLabel_t *clabel;
2567
2568 clabel = raidget_component_label(raidPtr, col);
2569 clabel->clean = RF_RAID_CLEAN;
2570 raidflush_component_label(raidPtr, col);
2571 return(0);
2572 }
2573
2574
2575 int
2576 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2577 {
2578 RF_ComponentLabel_t *clabel;
2579
2580 clabel = raidget_component_label(raidPtr, col);
2581 clabel->clean = RF_RAID_DIRTY;
2582 raidflush_component_label(raidPtr, col);
2583 return(0);
2584 }
2585
2586 int
2587 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2588 {
2589 KASSERT(raidPtr->bytesPerSector);
2590 return raidread_component_label(raidPtr->bytesPerSector,
2591 raidPtr->Disks[col].dev,
2592 raidPtr->raid_cinfo[col].ci_vp,
2593 &raidPtr->raid_cinfo[col].ci_label);
2594 }
2595
2596 RF_ComponentLabel_t *
2597 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2598 {
2599 return &raidPtr->raid_cinfo[col].ci_label;
2600 }
2601
2602 int
2603 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2604 {
2605 RF_ComponentLabel_t *label;
2606
2607 label = &raidPtr->raid_cinfo[col].ci_label;
2608 label->mod_counter = raidPtr->mod_counter;
2609 #ifndef RF_NO_PARITY_MAP
2610 label->parity_map_modcount = label->mod_counter;
2611 #endif
2612 return raidwrite_component_label(raidPtr->bytesPerSector,
2613 raidPtr->Disks[col].dev,
2614 raidPtr->raid_cinfo[col].ci_vp, label);
2615 }
2616
2617
2618 static int
2619 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2620 RF_ComponentLabel_t *clabel)
2621 {
2622 return raidread_component_area(dev, b_vp, clabel,
2623 sizeof(RF_ComponentLabel_t),
2624 rf_component_info_offset(),
2625 rf_component_info_size(secsize));
2626 }
2627
2628 /* ARGSUSED */
2629 static int
2630 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2631 size_t msize, daddr_t offset, daddr_t dsize)
2632 {
2633 struct buf *bp;
2634 const struct bdevsw *bdev;
2635 int error;
2636
2637 /* XXX should probably ensure that we don't try to do this if
2638 someone has changed rf_protected_sectors. */
2639
2640 if (b_vp == NULL) {
2641 /* For whatever reason, this component is not valid.
2642 Don't try to read a component label from it. */
2643 return(EINVAL);
2644 }
2645
2646 /* get a block of the appropriate size... */
2647 bp = geteblk((int)dsize);
2648 bp->b_dev = dev;
2649
2650 /* get our ducks in a row for the read */
2651 bp->b_blkno = offset / DEV_BSIZE;
2652 bp->b_bcount = dsize;
2653 bp->b_flags |= B_READ;
2654 bp->b_resid = dsize;
2655
2656 bdev = bdevsw_lookup(bp->b_dev);
2657 if (bdev == NULL)
2658 return (ENXIO);
2659 (*bdev->d_strategy)(bp);
2660
2661 error = biowait(bp);
2662
2663 if (!error) {
2664 memcpy(data, bp->b_data, msize);
2665 }
2666
2667 brelse(bp, 0);
2668 return(error);
2669 }
2670
2671
2672 static int
2673 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2674 RF_ComponentLabel_t *clabel)
2675 {
2676 return raidwrite_component_area(dev, b_vp, clabel,
2677 sizeof(RF_ComponentLabel_t),
2678 rf_component_info_offset(),
2679 rf_component_info_size(secsize), 0);
2680 }
2681
2682 /* ARGSUSED */
2683 static int
2684 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2685 size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2686 {
2687 struct buf *bp;
2688 const struct bdevsw *bdev;
2689 int error;
2690
2691 /* get a block of the appropriate size... */
2692 bp = geteblk((int)dsize);
2693 bp->b_dev = dev;
2694
2695 /* get our ducks in a row for the write */
2696 bp->b_blkno = offset / DEV_BSIZE;
2697 bp->b_bcount = dsize;
2698 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2699 bp->b_resid = dsize;
2700
2701 memset(bp->b_data, 0, dsize);
2702 memcpy(bp->b_data, data, msize);
2703
2704 bdev = bdevsw_lookup(bp->b_dev);
2705 if (bdev == NULL)
2706 return (ENXIO);
2707 (*bdev->d_strategy)(bp);
2708 if (asyncp)
2709 return 0;
2710 error = biowait(bp);
2711 brelse(bp, 0);
2712 if (error) {
2713 #if 1
2714 printf("Failed to write RAID component info!\n");
2715 #endif
2716 }
2717
2718 return(error);
2719 }
2720
2721 void
2722 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2723 {
2724 int c;
2725
2726 for (c = 0; c < raidPtr->numCol; c++) {
2727 /* Skip dead disks. */
2728 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2729 continue;
2730 /* XXXjld: what if an error occurs here? */
2731 raidwrite_component_area(raidPtr->Disks[c].dev,
2732 raidPtr->raid_cinfo[c].ci_vp, map,
2733 RF_PARITYMAP_NBYTE,
2734 rf_parity_map_offset(raidPtr),
2735 rf_parity_map_size(raidPtr), 0);
2736 }
2737 }
2738
2739 void
2740 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2741 {
2742 struct rf_paritymap_ondisk tmp;
2743 int c,first;
2744
2745 first=1;
2746 for (c = 0; c < raidPtr->numCol; c++) {
2747 /* Skip dead disks. */
2748 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2749 continue;
2750 raidread_component_area(raidPtr->Disks[c].dev,
2751 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2752 RF_PARITYMAP_NBYTE,
2753 rf_parity_map_offset(raidPtr),
2754 rf_parity_map_size(raidPtr));
2755 if (first) {
2756 memcpy(map, &tmp, sizeof(*map));
2757 first = 0;
2758 } else {
2759 rf_paritymap_merge(map, &tmp);
2760 }
2761 }
2762 }
2763
2764 void
2765 rf_markalldirty(RF_Raid_t *raidPtr)
2766 {
2767 RF_ComponentLabel_t *clabel;
2768 int sparecol;
2769 int c;
2770 int j;
2771 int scol = -1;
2772
2773 raidPtr->mod_counter++;
2774 for (c = 0; c < raidPtr->numCol; c++) {
2775 /* we don't want to touch (at all) a disk that has
2776 failed */
2777 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2778 clabel = raidget_component_label(raidPtr, c);
2779 if (clabel->status == rf_ds_spared) {
2780 /* XXX do something special...
2781 but whatever you do, don't
2782 try to access it!! */
2783 } else {
2784 raidmarkdirty(raidPtr, c);
2785 }
2786 }
2787 }
2788
2789 for( c = 0; c < raidPtr->numSpare ; c++) {
2790 sparecol = raidPtr->numCol + c;
2791 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2792 /*
2793
2794 we claim this disk is "optimal" if it's
2795 rf_ds_used_spare, as that means it should be
2796 directly substitutable for the disk it replaced.
2797 We note that too...
2798
2799 */
2800
2801 for(j=0;j<raidPtr->numCol;j++) {
2802 if (raidPtr->Disks[j].spareCol == sparecol) {
2803 scol = j;
2804 break;
2805 }
2806 }
2807
2808 clabel = raidget_component_label(raidPtr, sparecol);
2809 /* make sure status is noted */
2810
2811 raid_init_component_label(raidPtr, clabel);
2812
2813 clabel->row = 0;
2814 clabel->column = scol;
2815 /* Note: we *don't* change status from rf_ds_used_spare
2816 to rf_ds_optimal */
2817 /* clabel.status = rf_ds_optimal; */
2818
2819 raidmarkdirty(raidPtr, sparecol);
2820 }
2821 }
2822 }
2823
2824
2825 void
2826 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2827 {
2828 RF_ComponentLabel_t *clabel;
2829 int sparecol;
2830 int c;
2831 int j;
2832 int scol;
2833
2834 scol = -1;
2835
2836 /* XXX should do extra checks to make sure things really are clean,
2837 rather than blindly setting the clean bit... */
2838
2839 raidPtr->mod_counter++;
2840
2841 for (c = 0; c < raidPtr->numCol; c++) {
2842 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2843 clabel = raidget_component_label(raidPtr, c);
2844 /* make sure status is noted */
2845 clabel->status = rf_ds_optimal;
2846
2847 /* note what unit we are configured as */
2848 clabel->last_unit = raidPtr->raidid;
2849
2850 raidflush_component_label(raidPtr, c);
2851 if (final == RF_FINAL_COMPONENT_UPDATE) {
2852 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2853 raidmarkclean(raidPtr, c);
2854 }
2855 }
2856 }
2857 /* else we don't touch it.. */
2858 }
2859
2860 for( c = 0; c < raidPtr->numSpare ; c++) {
2861 sparecol = raidPtr->numCol + c;
2862 /* Need to ensure that the reconstruct actually completed! */
2863 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2864 /*
2865
2866 we claim this disk is "optimal" if it's
2867 rf_ds_used_spare, as that means it should be
2868 directly substitutable for the disk it replaced.
2869 We note that too...
2870
2871 */
2872
2873 for(j=0;j<raidPtr->numCol;j++) {
2874 if (raidPtr->Disks[j].spareCol == sparecol) {
2875 scol = j;
2876 break;
2877 }
2878 }
2879
2880 /* XXX shouldn't *really* need this... */
2881 clabel = raidget_component_label(raidPtr, sparecol);
2882 /* make sure status is noted */
2883
2884 raid_init_component_label(raidPtr, clabel);
2885
2886 clabel->column = scol;
2887 clabel->status = rf_ds_optimal;
2888 clabel->last_unit = raidPtr->raidid;
2889
2890 raidflush_component_label(raidPtr, sparecol);
2891 if (final == RF_FINAL_COMPONENT_UPDATE) {
2892 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2893 raidmarkclean(raidPtr, sparecol);
2894 }
2895 }
2896 }
2897 }
2898 }
2899
2900 void
2901 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2902 {
2903
2904 if (vp != NULL) {
2905 if (auto_configured == 1) {
2906 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2907 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2908 vput(vp);
2909
2910 } else {
2911 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2912 }
2913 }
2914 }
2915
2916
2917 void
2918 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2919 {
2920 int r,c;
2921 struct vnode *vp;
2922 int acd;
2923
2924
2925 /* We take this opportunity to close the vnodes like we should.. */
2926
2927 for (c = 0; c < raidPtr->numCol; c++) {
2928 vp = raidPtr->raid_cinfo[c].ci_vp;
2929 acd = raidPtr->Disks[c].auto_configured;
2930 rf_close_component(raidPtr, vp, acd);
2931 raidPtr->raid_cinfo[c].ci_vp = NULL;
2932 raidPtr->Disks[c].auto_configured = 0;
2933 }
2934
2935 for (r = 0; r < raidPtr->numSpare; r++) {
2936 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2937 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2938 rf_close_component(raidPtr, vp, acd);
2939 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2940 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2941 }
2942 }
2943
2944
2945 void
2946 rf_ReconThread(struct rf_recon_req *req)
2947 {
2948 int s;
2949 RF_Raid_t *raidPtr;
2950
2951 s = splbio();
2952 raidPtr = (RF_Raid_t *) req->raidPtr;
2953 raidPtr->recon_in_progress = 1;
2954
2955 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2956 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2957
2958 RF_Free(req, sizeof(*req));
2959
2960 raidPtr->recon_in_progress = 0;
2961 splx(s);
2962
2963 /* That's all... */
2964 kthread_exit(0); /* does not return */
2965 }
2966
2967 void
2968 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2969 {
2970 int retcode;
2971 int s;
2972
2973 raidPtr->parity_rewrite_stripes_done = 0;
2974 raidPtr->parity_rewrite_in_progress = 1;
2975 s = splbio();
2976 retcode = rf_RewriteParity(raidPtr);
2977 splx(s);
2978 if (retcode) {
2979 printf("raid%d: Error re-writing parity (%d)!\n",
2980 raidPtr->raidid, retcode);
2981 } else {
2982 /* set the clean bit! If we shutdown correctly,
2983 the clean bit on each component label will get
2984 set */
2985 raidPtr->parity_good = RF_RAID_CLEAN;
2986 }
2987 raidPtr->parity_rewrite_in_progress = 0;
2988
2989 /* Anyone waiting for us to stop? If so, inform them... */
2990 if (raidPtr->waitShutdown) {
2991 wakeup(&raidPtr->parity_rewrite_in_progress);
2992 }
2993
2994 /* That's all... */
2995 kthread_exit(0); /* does not return */
2996 }
2997
2998
2999 void
3000 rf_CopybackThread(RF_Raid_t *raidPtr)
3001 {
3002 int s;
3003
3004 raidPtr->copyback_in_progress = 1;
3005 s = splbio();
3006 rf_CopybackReconstructedData(raidPtr);
3007 splx(s);
3008 raidPtr->copyback_in_progress = 0;
3009
3010 /* That's all... */
3011 kthread_exit(0); /* does not return */
3012 }
3013
3014
3015 void
3016 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
3017 {
3018 int s;
3019 RF_Raid_t *raidPtr;
3020
3021 s = splbio();
3022 raidPtr = req->raidPtr;
3023 raidPtr->recon_in_progress = 1;
3024 rf_ReconstructInPlace(raidPtr, req->col);
3025 RF_Free(req, sizeof(*req));
3026 raidPtr->recon_in_progress = 0;
3027 splx(s);
3028
3029 /* That's all... */
3030 kthread_exit(0); /* does not return */
3031 }
3032
3033 static RF_AutoConfig_t *
3034 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
3035 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
3036 unsigned secsize)
3037 {
3038 int good_one = 0;
3039 RF_ComponentLabel_t *clabel;
3040 RF_AutoConfig_t *ac;
3041
3042 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
3043 if (clabel == NULL) {
3044 oomem:
3045 while(ac_list) {
3046 ac = ac_list;
3047 if (ac->clabel)
3048 free(ac->clabel, M_RAIDFRAME);
3049 ac_list = ac_list->next;
3050 free(ac, M_RAIDFRAME);
3051 }
3052 printf("RAID auto config: out of memory!\n");
3053 return NULL; /* XXX probably should panic? */
3054 }
3055
3056 if (!raidread_component_label(secsize, dev, vp, clabel)) {
3057 /* Got the label. Does it look reasonable? */
3058 if (rf_reasonable_label(clabel, numsecs) &&
3059 (rf_component_label_partitionsize(clabel) <= size)) {
3060 #ifdef DEBUG
3061 printf("Component on: %s: %llu\n",
3062 cname, (unsigned long long)size);
3063 rf_print_component_label(clabel);
3064 #endif
3065 /* if it's reasonable, add it, else ignore it. */
3066 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
3067 M_NOWAIT);
3068 if (ac == NULL) {
3069 free(clabel, M_RAIDFRAME);
3070 goto oomem;
3071 }
3072 strlcpy(ac->devname, cname, sizeof(ac->devname));
3073 ac->dev = dev;
3074 ac->vp = vp;
3075 ac->clabel = clabel;
3076 ac->next = ac_list;
3077 ac_list = ac;
3078 good_one = 1;
3079 }
3080 }
3081 if (!good_one) {
3082 /* cleanup */
3083 free(clabel, M_RAIDFRAME);
3084 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3085 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3086 vput(vp);
3087 }
3088 return ac_list;
3089 }
3090
3091 RF_AutoConfig_t *
3092 rf_find_raid_components(void)
3093 {
3094 struct vnode *vp;
3095 struct disklabel label;
3096 device_t dv;
3097 deviter_t di;
3098 dev_t dev;
3099 int bmajor, bminor, wedge, rf_part_found;
3100 int error;
3101 int i;
3102 RF_AutoConfig_t *ac_list;
3103 uint64_t numsecs;
3104 unsigned secsize;
3105
3106 /* initialize the AutoConfig list */
3107 ac_list = NULL;
3108
3109 /* we begin by trolling through *all* the devices on the system */
3110
3111 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
3112 dv = deviter_next(&di)) {
3113
3114 /* we are only interested in disks... */
3115 if (device_class(dv) != DV_DISK)
3116 continue;
3117
3118 /* we don't care about floppies... */
3119 if (device_is_a(dv, "fd")) {
3120 continue;
3121 }
3122
3123 /* we don't care about CD's... */
3124 if (device_is_a(dv, "cd")) {
3125 continue;
3126 }
3127
3128 /* we don't care about md's... */
3129 if (device_is_a(dv, "md")) {
3130 continue;
3131 }
3132
3133 /* hdfd is the Atari/Hades floppy driver */
3134 if (device_is_a(dv, "hdfd")) {
3135 continue;
3136 }
3137
3138 /* fdisa is the Atari/Milan floppy driver */
3139 if (device_is_a(dv, "fdisa")) {
3140 continue;
3141 }
3142
3143 /* need to find the device_name_to_block_device_major stuff */
3144 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
3145
3146 rf_part_found = 0; /*No raid partition as yet*/
3147
3148 /* get a vnode for the raw partition of this disk */
3149
3150 wedge = device_is_a(dv, "dk");
3151 bminor = minor(device_unit(dv));
3152 dev = wedge ? makedev(bmajor, bminor) :
3153 MAKEDISKDEV(bmajor, bminor, RAW_PART);
3154 if (bdevvp(dev, &vp))
3155 panic("RAID can't alloc vnode");
3156
3157 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
3158
3159 if (error) {
3160 /* "Who cares." Continue looking
3161 for something that exists*/
3162 vput(vp);
3163 continue;
3164 }
3165
3166 error = getdisksize(vp, &numsecs, &secsize);
3167 if (error) {
3168 vput(vp);
3169 continue;
3170 }
3171 if (wedge) {
3172 struct dkwedge_info dkw;
3173 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
3174 NOCRED);
3175 if (error) {
3176 printf("RAIDframe: can't get wedge info for "
3177 "dev %s (%d)\n", device_xname(dv), error);
3178 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3179 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3180 vput(vp);
3181 continue;
3182 }
3183
3184 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
3185 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3186 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3187 vput(vp);
3188 continue;
3189 }
3190
3191 ac_list = rf_get_component(ac_list, dev, vp,
3192 device_xname(dv), dkw.dkw_size, numsecs, secsize);
3193 rf_part_found = 1; /*There is a raid component on this disk*/
3194 continue;
3195 }
3196
3197 /* Ok, the disk exists. Go get the disklabel. */
3198 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
3199 if (error) {
3200 /*
3201 * XXX can't happen - open() would
3202 * have errored out (or faked up one)
3203 */
3204 if (error != ENOTTY)
3205 printf("RAIDframe: can't get label for dev "
3206 "%s (%d)\n", device_xname(dv), error);
3207 }
3208
3209 /* don't need this any more. We'll allocate it again
3210 a little later if we really do... */
3211 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3212 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3213 vput(vp);
3214
3215 if (error)
3216 continue;
3217
3218 rf_part_found = 0; /*No raid partitions yet*/
3219 for (i = 0; i < label.d_npartitions; i++) {
3220 char cname[sizeof(ac_list->devname)];
3221
3222 /* We only support partitions marked as RAID */
3223 if (label.d_partitions[i].p_fstype != FS_RAID)
3224 continue;
3225
3226 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
3227 if (bdevvp(dev, &vp))
3228 panic("RAID can't alloc vnode");
3229
3230 error = VOP_OPEN(vp, FREAD, NOCRED);
3231 if (error) {
3232 /* Whatever... */
3233 vput(vp);
3234 continue;
3235 }
3236 snprintf(cname, sizeof(cname), "%s%c",
3237 device_xname(dv), 'a' + i);
3238 ac_list = rf_get_component(ac_list, dev, vp, cname,
3239 label.d_partitions[i].p_size, numsecs, secsize);
3240 rf_part_found = 1; /*There is at least one raid partition on this disk*/
3241 }
3242
3243 /*
3244 *If there is no raid component on this disk, either in a
3245 *disklabel or inside a wedge, check the raw partition as well,
3246 *as it is possible to configure raid components on raw disk
3247 *devices.
3248 */
3249
3250 if (!rf_part_found) {
3251 char cname[sizeof(ac_list->devname)];
3252
3253 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
3254 if (bdevvp(dev, &vp))
3255 panic("RAID can't alloc vnode");
3256
3257 error = VOP_OPEN(vp, FREAD, NOCRED);
3258 if (error) {
3259 /* Whatever... */
3260 vput(vp);
3261 continue;
3262 }
3263 snprintf(cname, sizeof(cname), "%s%c",
3264 device_xname(dv), 'a' + RAW_PART);
3265 ac_list = rf_get_component(ac_list, dev, vp, cname,
3266 label.d_partitions[RAW_PART].p_size, numsecs, secsize);
3267 }
3268 }
3269 deviter_release(&di);
3270 return ac_list;
3271 }
3272
3273
3274 int
3275 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3276 {
3277
3278 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
3279 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
3280 ((clabel->clean == RF_RAID_CLEAN) ||
3281 (clabel->clean == RF_RAID_DIRTY)) &&
3282 clabel->row >=0 &&
3283 clabel->column >= 0 &&
3284 clabel->num_rows > 0 &&
3285 clabel->num_columns > 0 &&
3286 clabel->row < clabel->num_rows &&
3287 clabel->column < clabel->num_columns &&
3288 clabel->blockSize > 0 &&
3289 /*
3290 * numBlocksHi may contain garbage, but it is ok since
3291 * the type is unsigned. If it is really garbage,
3292 * rf_fix_old_label_size() will fix it.
3293 */
3294 rf_component_label_numblocks(clabel) > 0) {
3295 /*
3296 * label looks reasonable enough...
3297 * let's make sure it has no old garbage.
3298 */
3299 if (numsecs)
3300 rf_fix_old_label_size(clabel, numsecs);
3301 return(1);
3302 }
3303 return(0);
3304 }
3305
3306
3307 /*
3308 * For reasons yet unknown, some old component labels have garbage in
3309 * the newer numBlocksHi region, and this causes lossage. Since those
3310 * disks will also have numsecs set to less than 32 bits of sectors,
3311 * we can determine when this corruption has occurred, and fix it.
3312 *
3313 * The exact same problem, with the same unknown reason, happens to
3314 * the partitionSizeHi member as well.
3315 */
3316 static void
3317 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3318 {
3319
3320 if (numsecs < ((uint64_t)1 << 32)) {
3321 if (clabel->numBlocksHi) {
3322 printf("WARNING: total sectors < 32 bits, yet "
3323 "numBlocksHi set\n"
3324 "WARNING: resetting numBlocksHi to zero.\n");
3325 clabel->numBlocksHi = 0;
3326 }
3327
3328 if (clabel->partitionSizeHi) {
3329 printf("WARNING: total sectors < 32 bits, yet "
3330 "partitionSizeHi set\n"
3331 "WARNING: resetting partitionSizeHi to zero.\n");
3332 clabel->partitionSizeHi = 0;
3333 }
3334 }
3335 }
3336
3337
3338 #ifdef DEBUG
3339 void
3340 rf_print_component_label(RF_ComponentLabel_t *clabel)
3341 {
3342 uint64_t numBlocks;
3343 static const char *rp[] = {
3344 "No", "Force", "Soft", "*invalid*"
3345 };
3346
3347
3348 numBlocks = rf_component_label_numblocks(clabel);
3349
3350 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3351 clabel->row, clabel->column,
3352 clabel->num_rows, clabel->num_columns);
3353 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3354 clabel->version, clabel->serial_number,
3355 clabel->mod_counter);
3356 printf(" Clean: %s Status: %d\n",
3357 clabel->clean ? "Yes" : "No", clabel->status);
3358 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3359 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3360 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3361 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3362 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3363 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]);
3364 printf(" Last configured as: raid%d\n", clabel->last_unit);
3365 #if 0
3366 printf(" Config order: %d\n", clabel->config_order);
3367 #endif
3368
3369 }
3370 #endif
3371
3372 RF_ConfigSet_t *
3373 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3374 {
3375 RF_AutoConfig_t *ac;
3376 RF_ConfigSet_t *config_sets;
3377 RF_ConfigSet_t *cset;
3378 RF_AutoConfig_t *ac_next;
3379
3380
3381 config_sets = NULL;
3382
3383 /* Go through the AutoConfig list, and figure out which components
3384 belong to what sets. */
3385 ac = ac_list;
3386 while(ac!=NULL) {
3387 /* we're going to putz with ac->next, so save it here
3388 for use at the end of the loop */
3389 ac_next = ac->next;
3390
3391 if (config_sets == NULL) {
3392 /* will need at least this one... */
3393 config_sets = (RF_ConfigSet_t *)
3394 malloc(sizeof(RF_ConfigSet_t),
3395 M_RAIDFRAME, M_NOWAIT);
3396 if (config_sets == NULL) {
3397 panic("rf_create_auto_sets: No memory!");
3398 }
3399 /* this one is easy :) */
3400 config_sets->ac = ac;
3401 config_sets->next = NULL;
3402 config_sets->rootable = 0;
3403 ac->next = NULL;
3404 } else {
3405 /* which set does this component fit into? */
3406 cset = config_sets;
3407 while(cset!=NULL) {
3408 if (rf_does_it_fit(cset, ac)) {
3409 /* looks like it matches... */
3410 ac->next = cset->ac;
3411 cset->ac = ac;
3412 break;
3413 }
3414 cset = cset->next;
3415 }
3416 if (cset==NULL) {
3417 /* didn't find a match above... new set..*/
3418 cset = (RF_ConfigSet_t *)
3419 malloc(sizeof(RF_ConfigSet_t),
3420 M_RAIDFRAME, M_NOWAIT);
3421 if (cset == NULL) {
3422 panic("rf_create_auto_sets: No memory!");
3423 }
3424 cset->ac = ac;
3425 ac->next = NULL;
3426 cset->next = config_sets;
3427 cset->rootable = 0;
3428 config_sets = cset;
3429 }
3430 }
3431 ac = ac_next;
3432 }
3433
3434
3435 return(config_sets);
3436 }
3437
3438 static int
3439 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3440 {
3441 RF_ComponentLabel_t *clabel1, *clabel2;
3442
3443 /* If this one matches the *first* one in the set, that's good
3444 enough, since the other members of the set would have been
3445 through here too... */
3446 /* note that we are not checking partitionSize here..
3447
3448 Note that we are also not checking the mod_counters here.
3449 If everything else matches except the mod_counter, that's
3450 good enough for this test. We will deal with the mod_counters
3451 a little later in the autoconfiguration process.
3452
3453 (clabel1->mod_counter == clabel2->mod_counter) &&
3454
3455 The reason we don't check for this is that failed disks
3456 will have lower modification counts. If those disks are
3457 not added to the set they used to belong to, then they will
3458 form their own set, which may result in 2 different sets,
3459 for example, competing to be configured at raid0, and
3460 perhaps competing to be the root filesystem set. If the
3461 wrong ones get configured, or both attempt to become /,
3462 weird behaviour and or serious lossage will occur. Thus we
3463 need to bring them into the fold here, and kick them out at
3464 a later point.
3465
3466 */
3467
3468 clabel1 = cset->ac->clabel;
3469 clabel2 = ac->clabel;
3470 if ((clabel1->version == clabel2->version) &&
3471 (clabel1->serial_number == clabel2->serial_number) &&
3472 (clabel1->num_rows == clabel2->num_rows) &&
3473 (clabel1->num_columns == clabel2->num_columns) &&
3474 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3475 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3476 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3477 (clabel1->parityConfig == clabel2->parityConfig) &&
3478 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3479 (clabel1->blockSize == clabel2->blockSize) &&
3480 rf_component_label_numblocks(clabel1) ==
3481 rf_component_label_numblocks(clabel2) &&
3482 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3483 (clabel1->root_partition == clabel2->root_partition) &&
3484 (clabel1->last_unit == clabel2->last_unit) &&
3485 (clabel1->config_order == clabel2->config_order)) {
3486 /* if it get's here, it almost *has* to be a match */
3487 } else {
3488 /* it's not consistent with somebody in the set..
3489 punt */
3490 return(0);
3491 }
3492 /* all was fine.. it must fit... */
3493 return(1);
3494 }
3495
3496 int
3497 rf_have_enough_components(RF_ConfigSet_t *cset)
3498 {
3499 RF_AutoConfig_t *ac;
3500 RF_AutoConfig_t *auto_config;
3501 RF_ComponentLabel_t *clabel;
3502 int c;
3503 int num_cols;
3504 int num_missing;
3505 int mod_counter;
3506 int mod_counter_found;
3507 int even_pair_failed;
3508 char parity_type;
3509
3510
3511 /* check to see that we have enough 'live' components
3512 of this set. If so, we can configure it if necessary */
3513
3514 num_cols = cset->ac->clabel->num_columns;
3515 parity_type = cset->ac->clabel->parityConfig;
3516
3517 /* XXX Check for duplicate components!?!?!? */
3518
3519 /* Determine what the mod_counter is supposed to be for this set. */
3520
3521 mod_counter_found = 0;
3522 mod_counter = 0;
3523 ac = cset->ac;
3524 while(ac!=NULL) {
3525 if (mod_counter_found==0) {
3526 mod_counter = ac->clabel->mod_counter;
3527 mod_counter_found = 1;
3528 } else {
3529 if (ac->clabel->mod_counter > mod_counter) {
3530 mod_counter = ac->clabel->mod_counter;
3531 }
3532 }
3533 ac = ac->next;
3534 }
3535
3536 num_missing = 0;
3537 auto_config = cset->ac;
3538
3539 even_pair_failed = 0;
3540 for(c=0; c<num_cols; c++) {
3541 ac = auto_config;
3542 while(ac!=NULL) {
3543 if ((ac->clabel->column == c) &&
3544 (ac->clabel->mod_counter == mod_counter)) {
3545 /* it's this one... */
3546 #ifdef DEBUG
3547 printf("Found: %s at %d\n",
3548 ac->devname,c);
3549 #endif
3550 break;
3551 }
3552 ac=ac->next;
3553 }
3554 if (ac==NULL) {
3555 /* Didn't find one here! */
3556 /* special case for RAID 1, especially
3557 where there are more than 2
3558 components (where RAIDframe treats
3559 things a little differently :( ) */
3560 if (parity_type == '1') {
3561 if (c%2 == 0) { /* even component */
3562 even_pair_failed = 1;
3563 } else { /* odd component. If
3564 we're failed, and
3565 so is the even
3566 component, it's
3567 "Good Night, Charlie" */
3568 if (even_pair_failed == 1) {
3569 return(0);
3570 }
3571 }
3572 } else {
3573 /* normal accounting */
3574 num_missing++;
3575 }
3576 }
3577 if ((parity_type == '1') && (c%2 == 1)) {
3578 /* Just did an even component, and we didn't
3579 bail.. reset the even_pair_failed flag,
3580 and go on to the next component.... */
3581 even_pair_failed = 0;
3582 }
3583 }
3584
3585 clabel = cset->ac->clabel;
3586
3587 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3588 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3589 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3590 /* XXX this needs to be made *much* more general */
3591 /* Too many failures */
3592 return(0);
3593 }
3594 /* otherwise, all is well, and we've got enough to take a kick
3595 at autoconfiguring this set */
3596 return(1);
3597 }
3598
3599 void
3600 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3601 RF_Raid_t *raidPtr)
3602 {
3603 RF_ComponentLabel_t *clabel;
3604 int i;
3605
3606 clabel = ac->clabel;
3607
3608 /* 1. Fill in the common stuff */
3609 config->numRow = clabel->num_rows = 1;
3610 config->numCol = clabel->num_columns;
3611 config->numSpare = 0; /* XXX should this be set here? */
3612 config->sectPerSU = clabel->sectPerSU;
3613 config->SUsPerPU = clabel->SUsPerPU;
3614 config->SUsPerRU = clabel->SUsPerRU;
3615 config->parityConfig = clabel->parityConfig;
3616 /* XXX... */
3617 strcpy(config->diskQueueType,"fifo");
3618 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3619 config->layoutSpecificSize = 0; /* XXX ?? */
3620
3621 while(ac!=NULL) {
3622 /* row/col values will be in range due to the checks
3623 in reasonable_label() */
3624 strcpy(config->devnames[0][ac->clabel->column],
3625 ac->devname);
3626 ac = ac->next;
3627 }
3628
3629 for(i=0;i<RF_MAXDBGV;i++) {
3630 config->debugVars[i][0] = 0;
3631 }
3632 }
3633
3634 int
3635 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3636 {
3637 RF_ComponentLabel_t *clabel;
3638 int column;
3639 int sparecol;
3640
3641 raidPtr->autoconfigure = new_value;
3642
3643 for(column=0; column<raidPtr->numCol; column++) {
3644 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3645 clabel = raidget_component_label(raidPtr, column);
3646 clabel->autoconfigure = new_value;
3647 raidflush_component_label(raidPtr, column);
3648 }
3649 }
3650 for(column = 0; column < raidPtr->numSpare ; column++) {
3651 sparecol = raidPtr->numCol + column;
3652 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3653 clabel = raidget_component_label(raidPtr, sparecol);
3654 clabel->autoconfigure = new_value;
3655 raidflush_component_label(raidPtr, sparecol);
3656 }
3657 }
3658 return(new_value);
3659 }
3660
3661 int
3662 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3663 {
3664 RF_ComponentLabel_t *clabel;
3665 int column;
3666 int sparecol;
3667
3668 raidPtr->root_partition = new_value;
3669 for(column=0; column<raidPtr->numCol; column++) {
3670 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3671 clabel = raidget_component_label(raidPtr, column);
3672 clabel->root_partition = new_value;
3673 raidflush_component_label(raidPtr, column);
3674 }
3675 }
3676 for(column = 0; column < raidPtr->numSpare ; column++) {
3677 sparecol = raidPtr->numCol + column;
3678 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3679 clabel = raidget_component_label(raidPtr, sparecol);
3680 clabel->root_partition = new_value;
3681 raidflush_component_label(raidPtr, sparecol);
3682 }
3683 }
3684 return(new_value);
3685 }
3686
3687 void
3688 rf_release_all_vps(RF_ConfigSet_t *cset)
3689 {
3690 RF_AutoConfig_t *ac;
3691
3692 ac = cset->ac;
3693 while(ac!=NULL) {
3694 /* Close the vp, and give it back */
3695 if (ac->vp) {
3696 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3697 VOP_CLOSE(ac->vp, FREAD, NOCRED);
3698 vput(ac->vp);
3699 ac->vp = NULL;
3700 }
3701 ac = ac->next;
3702 }
3703 }
3704
3705
3706 void
3707 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3708 {
3709 RF_AutoConfig_t *ac;
3710 RF_AutoConfig_t *next_ac;
3711
3712 ac = cset->ac;
3713 while(ac!=NULL) {
3714 next_ac = ac->next;
3715 /* nuke the label */
3716 free(ac->clabel, M_RAIDFRAME);
3717 /* cleanup the config structure */
3718 free(ac, M_RAIDFRAME);
3719 /* "next.." */
3720 ac = next_ac;
3721 }
3722 /* and, finally, nuke the config set */
3723 free(cset, M_RAIDFRAME);
3724 }
3725
3726
3727 void
3728 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3729 {
3730 /* current version number */
3731 clabel->version = RF_COMPONENT_LABEL_VERSION;
3732 clabel->serial_number = raidPtr->serial_number;
3733 clabel->mod_counter = raidPtr->mod_counter;
3734
3735 clabel->num_rows = 1;
3736 clabel->num_columns = raidPtr->numCol;
3737 clabel->clean = RF_RAID_DIRTY; /* not clean */
3738 clabel->status = rf_ds_optimal; /* "It's good!" */
3739
3740 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3741 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3742 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3743
3744 clabel->blockSize = raidPtr->bytesPerSector;
3745 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3746
3747 /* XXX not portable */
3748 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3749 clabel->maxOutstanding = raidPtr->maxOutstanding;
3750 clabel->autoconfigure = raidPtr->autoconfigure;
3751 clabel->root_partition = raidPtr->root_partition;
3752 clabel->last_unit = raidPtr->raidid;
3753 clabel->config_order = raidPtr->config_order;
3754
3755 #ifndef RF_NO_PARITY_MAP
3756 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3757 #endif
3758 }
3759
3760 struct raid_softc *
3761 rf_auto_config_set(RF_ConfigSet_t *cset)
3762 {
3763 RF_Raid_t *raidPtr;
3764 RF_Config_t *config;
3765 int raidID;
3766 struct raid_softc *sc;
3767
3768 #ifdef DEBUG
3769 printf("RAID autoconfigure\n");
3770 #endif
3771
3772 /* 1. Create a config structure */
3773 config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO);
3774 if (config == NULL) {
3775 printf("Out of mem!?!?\n");
3776 /* XXX do something more intelligent here. */
3777 return NULL;
3778 }
3779
3780 /*
3781 2. Figure out what RAID ID this one is supposed to live at
3782 See if we can get the same RAID dev that it was configured
3783 on last time..
3784 */
3785
3786 raidID = cset->ac->clabel->last_unit;
3787 for (sc = raidget(raidID); sc->sc_r.valid != 0; sc = raidget(++raidID))
3788 continue;
3789 #ifdef DEBUG
3790 printf("Configuring raid%d:\n",raidID);
3791 #endif
3792
3793 raidPtr = &sc->sc_r;
3794
3795 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3796 raidPtr->softc = sc;
3797 raidPtr->raidid = raidID;
3798 raidPtr->openings = RAIDOUTSTANDING;
3799
3800 /* 3. Build the configuration structure */
3801 rf_create_configuration(cset->ac, config, raidPtr);
3802
3803 /* 4. Do the configuration */
3804 if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3805 raidinit(sc);
3806
3807 rf_markalldirty(raidPtr);
3808 raidPtr->autoconfigure = 1; /* XXX do this here? */
3809 switch (cset->ac->clabel->root_partition) {
3810 case 1: /* Force Root */
3811 case 2: /* Soft Root: root when boot partition part of raid */
3812 /*
3813 * everything configured just fine. Make a note
3814 * that this set is eligible to be root,
3815 * or forced to be root
3816 */
3817 cset->rootable = cset->ac->clabel->root_partition;
3818 /* XXX do this here? */
3819 raidPtr->root_partition = cset->rootable;
3820 break;
3821 default:
3822 break;
3823 }
3824 } else {
3825 raidput(sc);
3826 sc = NULL;
3827 }
3828
3829 /* 5. Cleanup */
3830 free(config, M_RAIDFRAME);
3831 return sc;
3832 }
3833
3834 void
3835 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3836 {
3837 struct buf *bp;
3838 struct raid_softc *rs;
3839
3840 bp = (struct buf *)desc->bp;
3841 rs = desc->raidPtr->softc;
3842 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid),
3843 (bp->b_flags & B_READ));
3844 }
3845
3846 void
3847 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3848 size_t xmin, size_t xmax)
3849 {
3850 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3851 pool_sethiwat(p, xmax);
3852 pool_prime(p, xmin);
3853 pool_setlowat(p, xmin);
3854 }
3855
3856 /*
3857 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buf_queue to see
3858 * if there is IO pending and if that IO could possibly be done for a
3859 * given RAID set. Returns 0 if IO is waiting and can be done, 1
3860 * otherwise.
3861 *
3862 */
3863
3864 int
3865 rf_buf_queue_check(RF_Raid_t *raidPtr)
3866 {
3867 struct raid_softc *rs = raidPtr->softc;
3868 if ((bufq_peek(rs->buf_queue) != NULL) && raidPtr->openings > 0) {
3869 /* there is work to do */
3870 return 0;
3871 }
3872 /* default is nothing to do */
3873 return 1;
3874 }
3875
3876 int
3877 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3878 {
3879 uint64_t numsecs;
3880 unsigned secsize;
3881 int error;
3882
3883 error = getdisksize(vp, &numsecs, &secsize);
3884 if (error == 0) {
3885 diskPtr->blockSize = secsize;
3886 diskPtr->numBlocks = numsecs - rf_protectedSectors;
3887 diskPtr->partitionSize = numsecs;
3888 return 0;
3889 }
3890 return error;
3891 }
3892
3893 static int
3894 raid_match(device_t self, cfdata_t cfdata, void *aux)
3895 {
3896 return 1;
3897 }
3898
3899 static void
3900 raid_attach(device_t parent, device_t self, void *aux)
3901 {
3902
3903 }
3904
3905
3906 static int
3907 raid_detach(device_t self, int flags)
3908 {
3909 int error;
3910 struct raid_softc *rs = raidget(device_unit(self));
3911
3912 if (rs == NULL)
3913 return ENXIO;
3914
3915 if ((error = raidlock(rs)) != 0)
3916 return (error);
3917
3918 error = raid_detach_unlocked(rs);
3919
3920 raidunlock(rs);
3921
3922 /* XXXkd: raidput(rs) ??? */
3923
3924 return error;
3925 }
3926
3927 static void
3928 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
3929 {
3930 struct disk_geom *dg = &rs->sc_dkdev.dk_geom;
3931
3932 memset(dg, 0, sizeof(*dg));
3933
3934 dg->dg_secperunit = raidPtr->totalSectors;
3935 dg->dg_secsize = raidPtr->bytesPerSector;
3936 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
3937 dg->dg_ntracks = 4 * raidPtr->numCol;
3938
3939 disk_set_info(rs->sc_dev, &rs->sc_dkdev, NULL);
3940 }
3941
3942 /*
3943 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3944 * We end up returning whatever error was returned by the first cache flush
3945 * that fails.
3946 */
3947
3948 int
3949 rf_sync_component_caches(RF_Raid_t *raidPtr)
3950 {
3951 int c, sparecol;
3952 int e,error;
3953 int force = 1;
3954
3955 error = 0;
3956 for (c = 0; c < raidPtr->numCol; c++) {
3957 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3958 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3959 &force, FWRITE, NOCRED);
3960 if (e) {
3961 if (e != ENODEV)
3962 printf("raid%d: cache flush to component %s failed.\n",
3963 raidPtr->raidid, raidPtr->Disks[c].devname);
3964 if (error == 0) {
3965 error = e;
3966 }
3967 }
3968 }
3969 }
3970
3971 for( c = 0; c < raidPtr->numSpare ; c++) {
3972 sparecol = raidPtr->numCol + c;
3973 /* Need to ensure that the reconstruct actually completed! */
3974 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3975 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3976 DIOCCACHESYNC, &force, FWRITE, NOCRED);
3977 if (e) {
3978 if (e != ENODEV)
3979 printf("raid%d: cache flush to component %s failed.\n",
3980 raidPtr->raidid, raidPtr->Disks[sparecol].devname);
3981 if (error == 0) {
3982 error = e;
3983 }
3984 }
3985 }
3986 }
3987 return error;
3988 }
3989