rf_netbsdkintf.c revision 1.313 1 /* $NetBSD: rf_netbsdkintf.c,v 1.313 2014/10/11 12:01:27 mlelstv Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: cd.c 1.6 90/11/28$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
89 * School of Computer Science
90 * Carnegie Mellon University
91 * Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97 /***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.313 2014/10/11 12:01:27 mlelstv Exp $");
105
106 #ifdef _KERNEL_OPT
107 #include "opt_compat_netbsd.h"
108 #include "opt_raid_autoconfig.h"
109 #endif
110
111 #include <sys/param.h>
112 #include <sys/errno.h>
113 #include <sys/pool.h>
114 #include <sys/proc.h>
115 #include <sys/queue.h>
116 #include <sys/disk.h>
117 #include <sys/device.h>
118 #include <sys/stat.h>
119 #include <sys/ioctl.h>
120 #include <sys/fcntl.h>
121 #include <sys/systm.h>
122 #include <sys/vnode.h>
123 #include <sys/disklabel.h>
124 #include <sys/conf.h>
125 #include <sys/buf.h>
126 #include <sys/bufq.h>
127 #include <sys/reboot.h>
128 #include <sys/kauth.h>
129
130 #include <prop/proplib.h>
131
132 #include <dev/raidframe/raidframevar.h>
133 #include <dev/raidframe/raidframeio.h>
134 #include <dev/raidframe/rf_paritymap.h>
135
136 #include "rf_raid.h"
137 #include "rf_copyback.h"
138 #include "rf_dag.h"
139 #include "rf_dagflags.h"
140 #include "rf_desc.h"
141 #include "rf_diskqueue.h"
142 #include "rf_etimer.h"
143 #include "rf_general.h"
144 #include "rf_kintf.h"
145 #include "rf_options.h"
146 #include "rf_driver.h"
147 #include "rf_parityscan.h"
148 #include "rf_threadstuff.h"
149
150 #ifdef COMPAT_50
151 #include "rf_compat50.h"
152 #endif
153
154 #ifdef DEBUG
155 int rf_kdebug_level = 0;
156 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
157 #else /* DEBUG */
158 #define db1_printf(a) { }
159 #endif /* DEBUG */
160
161 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
162 static rf_declare_mutex2(rf_sparet_wait_mutex);
163 static rf_declare_cond2(rf_sparet_wait_cv);
164 static rf_declare_cond2(rf_sparet_resp_cv);
165
166 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
167 * spare table */
168 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
169 * installation process */
170 #endif
171
172 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
173
174 /* prototypes */
175 static void KernelWakeupFunc(struct buf *);
176 static void InitBP(struct buf *, struct vnode *, unsigned,
177 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
178 void *, int, struct proc *);
179 struct raid_softc;
180 static void raidinit(struct raid_softc *);
181
182 void raidattach(int);
183 static int raid_match(device_t, cfdata_t, void *);
184 static void raid_attach(device_t, device_t, void *);
185 static int raid_detach(device_t, int);
186
187 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
188 daddr_t, daddr_t);
189 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
190 daddr_t, daddr_t, int);
191
192 static int raidwrite_component_label(unsigned,
193 dev_t, struct vnode *, RF_ComponentLabel_t *);
194 static int raidread_component_label(unsigned,
195 dev_t, struct vnode *, RF_ComponentLabel_t *);
196
197
198 dev_type_open(raidopen);
199 dev_type_close(raidclose);
200 dev_type_read(raidread);
201 dev_type_write(raidwrite);
202 dev_type_ioctl(raidioctl);
203 dev_type_strategy(raidstrategy);
204 dev_type_dump(raiddump);
205 dev_type_size(raidsize);
206
207 const struct bdevsw raid_bdevsw = {
208 .d_open = raidopen,
209 .d_close = raidclose,
210 .d_strategy = raidstrategy,
211 .d_ioctl = raidioctl,
212 .d_dump = raiddump,
213 .d_psize = raidsize,
214 .d_discard = nodiscard,
215 .d_flag = D_DISK
216 };
217
218 const struct cdevsw raid_cdevsw = {
219 .d_open = raidopen,
220 .d_close = raidclose,
221 .d_read = raidread,
222 .d_write = raidwrite,
223 .d_ioctl = raidioctl,
224 .d_stop = nostop,
225 .d_tty = notty,
226 .d_poll = nopoll,
227 .d_mmap = nommap,
228 .d_kqfilter = nokqfilter,
229 .d_discard = nodiscard,
230 .d_flag = D_DISK
231 };
232
233 static struct dkdriver rf_dkdriver = { raidstrategy, minphys };
234
235 struct raid_softc {
236 device_t sc_dev;
237 int sc_unit;
238 int sc_flags; /* flags */
239 int sc_cflags; /* configuration flags */
240 uint64_t sc_size; /* size of the raid device */
241 char sc_xname[20]; /* XXX external name */
242 struct disk sc_dkdev; /* generic disk device info */
243 struct bufq_state *buf_queue; /* used for the device queue */
244 RF_Raid_t sc_r;
245 LIST_ENTRY(raid_softc) sc_link;
246 };
247 /* sc_flags */
248 #define RAIDF_INITED 0x01 /* unit has been initialized */
249 #define RAIDF_WLABEL 0x02 /* label area is writable */
250 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
251 #define RAIDF_SHUTDOWN 0x08 /* unit is being shutdown */
252 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
253 #define RAIDF_LOCKED 0x80 /* unit is locked */
254
255 #define raidunit(x) DISKUNIT(x)
256
257 extern struct cfdriver raid_cd;
258 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
259 raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
260 DVF_DETACH_SHUTDOWN);
261
262 /*
263 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
264 * Be aware that large numbers can allow the driver to consume a lot of
265 * kernel memory, especially on writes, and in degraded mode reads.
266 *
267 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
268 * a single 64K write will typically require 64K for the old data,
269 * 64K for the old parity, and 64K for the new parity, for a total
270 * of 192K (if the parity buffer is not re-used immediately).
271 * Even it if is used immediately, that's still 128K, which when multiplied
272 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
273 *
274 * Now in degraded mode, for example, a 64K read on the above setup may
275 * require data reconstruction, which will require *all* of the 4 remaining
276 * disks to participate -- 4 * 32K/disk == 128K again.
277 */
278
279 #ifndef RAIDOUTSTANDING
280 #define RAIDOUTSTANDING 6
281 #endif
282
283 #define RAIDLABELDEV(dev) \
284 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
285
286 /* declared here, and made public, for the benefit of KVM stuff.. */
287
288 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
289 struct disklabel *);
290 static void raidgetdisklabel(dev_t);
291 static void raidmakedisklabel(struct raid_softc *);
292
293 static int raidlock(struct raid_softc *);
294 static void raidunlock(struct raid_softc *);
295
296 static int raid_detach_unlocked(struct raid_softc *);
297
298 static void rf_markalldirty(RF_Raid_t *);
299 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
300
301 void rf_ReconThread(struct rf_recon_req *);
302 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
303 void rf_CopybackThread(RF_Raid_t *raidPtr);
304 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
305 int rf_autoconfig(device_t);
306 void rf_buildroothack(RF_ConfigSet_t *);
307
308 RF_AutoConfig_t *rf_find_raid_components(void);
309 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
310 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
311 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
312 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
313 int rf_set_autoconfig(RF_Raid_t *, int);
314 int rf_set_rootpartition(RF_Raid_t *, int);
315 void rf_release_all_vps(RF_ConfigSet_t *);
316 void rf_cleanup_config_set(RF_ConfigSet_t *);
317 int rf_have_enough_components(RF_ConfigSet_t *);
318 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
319 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
320
321 /*
322 * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
323 * Note that this is overridden by having RAID_AUTOCONFIG as an option
324 * in the kernel config file.
325 */
326 #ifdef RAID_AUTOCONFIG
327 int raidautoconfig = 1;
328 #else
329 int raidautoconfig = 0;
330 #endif
331 static bool raidautoconfigdone = false;
332
333 struct RF_Pools_s rf_pools;
334
335 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
336 static kmutex_t raid_lock;
337
338 static struct raid_softc *
339 raidcreate(int unit) {
340 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
341 if (sc == NULL) {
342 #ifdef DIAGNOSTIC
343 printf("%s: out of memory\n", __func__);
344 #endif
345 return NULL;
346 }
347 sc->sc_unit = unit;
348 bufq_alloc(&sc->buf_queue, "fcfs", BUFQ_SORT_RAWBLOCK);
349 return sc;
350 }
351
352 static void
353 raiddestroy(struct raid_softc *sc) {
354 bufq_free(sc->buf_queue);
355 kmem_free(sc, sizeof(*sc));
356 }
357
358 static struct raid_softc *
359 raidget(int unit) {
360 struct raid_softc *sc;
361 if (unit < 0) {
362 #ifdef DIAGNOSTIC
363 panic("%s: unit %d!", __func__, unit);
364 #endif
365 return NULL;
366 }
367 mutex_enter(&raid_lock);
368 LIST_FOREACH(sc, &raids, sc_link) {
369 if (sc->sc_unit == unit) {
370 mutex_exit(&raid_lock);
371 return sc;
372 }
373 }
374 mutex_exit(&raid_lock);
375 if ((sc = raidcreate(unit)) == NULL)
376 return NULL;
377 mutex_enter(&raid_lock);
378 LIST_INSERT_HEAD(&raids, sc, sc_link);
379 mutex_exit(&raid_lock);
380 return sc;
381 }
382
383 static void
384 raidput(struct raid_softc *sc) {
385 mutex_enter(&raid_lock);
386 LIST_REMOVE(sc, sc_link);
387 mutex_exit(&raid_lock);
388 raiddestroy(sc);
389 }
390
391 void
392 raidattach(int num)
393 {
394 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
395 /* This is where all the initialization stuff gets done. */
396
397 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
398 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
399 rf_init_cond2(rf_sparet_wait_cv, "sparetw");
400 rf_init_cond2(rf_sparet_resp_cv, "rfgst");
401
402 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
403 #endif
404
405 if (rf_BootRaidframe() == 0)
406 aprint_verbose("Kernelized RAIDframe activated\n");
407 else
408 panic("Serious error booting RAID!!");
409
410 if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) {
411 aprint_error("raidattach: config_cfattach_attach failed?\n");
412 }
413
414 raidautoconfigdone = false;
415
416 /*
417 * Register a finalizer which will be used to auto-config RAID
418 * sets once all real hardware devices have been found.
419 */
420 if (config_finalize_register(NULL, rf_autoconfig) != 0)
421 aprint_error("WARNING: unable to register RAIDframe finalizer\n");
422 }
423
424 int
425 rf_autoconfig(device_t self)
426 {
427 RF_AutoConfig_t *ac_list;
428 RF_ConfigSet_t *config_sets;
429
430 if (!raidautoconfig || raidautoconfigdone == true)
431 return (0);
432
433 /* XXX This code can only be run once. */
434 raidautoconfigdone = true;
435
436 #ifdef __HAVE_CPU_BOOTCONF
437 /*
438 * 0. find the boot device if needed first so we can use it later
439 * this needs to be done before we autoconfigure any raid sets,
440 * because if we use wedges we are not going to be able to open
441 * the boot device later
442 */
443 if (booted_device == NULL)
444 cpu_bootconf();
445 #endif
446 /* 1. locate all RAID components on the system */
447 aprint_debug("Searching for RAID components...\n");
448 ac_list = rf_find_raid_components();
449
450 /* 2. Sort them into their respective sets. */
451 config_sets = rf_create_auto_sets(ac_list);
452
453 /*
454 * 3. Evaluate each set and configure the valid ones.
455 * This gets done in rf_buildroothack().
456 */
457 rf_buildroothack(config_sets);
458
459 return 1;
460 }
461
462 static int
463 rf_containsboot(RF_Raid_t *r, device_t bdv) {
464 const char *bootname = device_xname(bdv);
465 size_t len = strlen(bootname);
466
467 for (int col = 0; col < r->numCol; col++) {
468 const char *devname = r->Disks[col].devname;
469 devname += sizeof("/dev/") - 1;
470 if (strncmp(devname, "dk", 2) == 0) {
471 const char *parent =
472 dkwedge_get_parent_name(r->Disks[col].dev);
473 if (parent != NULL)
474 devname = parent;
475 }
476 if (strncmp(devname, bootname, len) == 0) {
477 struct raid_softc *sc = r->softc;
478 aprint_debug("raid%d includes boot device %s\n",
479 sc->sc_unit, devname);
480 return 1;
481 }
482 }
483 return 0;
484 }
485
486 void
487 rf_buildroothack(RF_ConfigSet_t *config_sets)
488 {
489 RF_ConfigSet_t *cset;
490 RF_ConfigSet_t *next_cset;
491 int num_root;
492 struct raid_softc *sc, *rsc;
493
494 sc = rsc = NULL;
495 num_root = 0;
496 cset = config_sets;
497 while (cset != NULL) {
498 next_cset = cset->next;
499 if (rf_have_enough_components(cset) &&
500 cset->ac->clabel->autoconfigure == 1) {
501 sc = rf_auto_config_set(cset);
502 if (sc != NULL) {
503 aprint_debug("raid%d: configured ok\n",
504 sc->sc_unit);
505 if (cset->rootable) {
506 rsc = sc;
507 num_root++;
508 }
509 } else {
510 /* The autoconfig didn't work :( */
511 aprint_debug("Autoconfig failed\n");
512 rf_release_all_vps(cset);
513 }
514 } else {
515 /* we're not autoconfiguring this set...
516 release the associated resources */
517 rf_release_all_vps(cset);
518 }
519 /* cleanup */
520 rf_cleanup_config_set(cset);
521 cset = next_cset;
522 }
523
524 /* if the user has specified what the root device should be
525 then we don't touch booted_device or boothowto... */
526
527 if (rootspec != NULL)
528 return;
529
530 /* we found something bootable... */
531
532 /*
533 * XXX: The following code assumes that the root raid
534 * is the first ('a') partition. This is about the best
535 * we can do with a BSD disklabel, but we might be able
536 * to do better with a GPT label, by setting a specified
537 * attribute to indicate the root partition. We can then
538 * stash the partition number in the r->root_partition
539 * high bits (the bottom 2 bits are already used). For
540 * now we just set booted_partition to 0 when we override
541 * root.
542 */
543 if (num_root == 1) {
544 device_t candidate_root;
545 if (rsc->sc_dkdev.dk_nwedges != 0) {
546 char cname[sizeof(cset->ac->devname)];
547 /* XXX: assume 'a' */
548 snprintf(cname, sizeof(cname), "%s%c",
549 device_xname(rsc->sc_dev), 'a');
550 candidate_root = dkwedge_find_by_wname(cname);
551 } else
552 candidate_root = rsc->sc_dev;
553 if (booted_device == NULL ||
554 rsc->sc_r.root_partition == 1 ||
555 rf_containsboot(&rsc->sc_r, booted_device)) {
556 booted_device = candidate_root;
557 booted_partition = 0; /* XXX assume 'a' */
558 }
559 } else if (num_root > 1) {
560
561 /*
562 * Maybe the MD code can help. If it cannot, then
563 * setroot() will discover that we have no
564 * booted_device and will ask the user if nothing was
565 * hardwired in the kernel config file
566 */
567 if (booted_device == NULL)
568 return;
569
570 num_root = 0;
571 mutex_enter(&raid_lock);
572 LIST_FOREACH(sc, &raids, sc_link) {
573 RF_Raid_t *r = &sc->sc_r;
574 if (r->valid == 0)
575 continue;
576
577 if (r->root_partition == 0)
578 continue;
579
580 if (rf_containsboot(r, booted_device)) {
581 num_root++;
582 rsc = sc;
583 }
584 }
585 mutex_exit(&raid_lock);
586
587 if (num_root == 1) {
588 booted_device = rsc->sc_dev;
589 booted_partition = 0; /* XXX assume 'a' */
590 } else {
591 /* we can't guess.. require the user to answer... */
592 boothowto |= RB_ASKNAME;
593 }
594 }
595 }
596
597
598 int
599 raidsize(dev_t dev)
600 {
601 struct raid_softc *rs;
602 struct disklabel *lp;
603 int part, unit, omask, size;
604
605 unit = raidunit(dev);
606 if ((rs = raidget(unit)) == NULL)
607 return -1;
608 if ((rs->sc_flags & RAIDF_INITED) == 0)
609 return (-1);
610
611 part = DISKPART(dev);
612 omask = rs->sc_dkdev.dk_openmask & (1 << part);
613 lp = rs->sc_dkdev.dk_label;
614
615 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
616 return (-1);
617
618 if (lp->d_partitions[part].p_fstype != FS_SWAP)
619 size = -1;
620 else
621 size = lp->d_partitions[part].p_size *
622 (lp->d_secsize / DEV_BSIZE);
623
624 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
625 return (-1);
626
627 return (size);
628
629 }
630
631 int
632 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
633 {
634 int unit = raidunit(dev);
635 struct raid_softc *rs;
636 const struct bdevsw *bdev;
637 struct disklabel *lp;
638 RF_Raid_t *raidPtr;
639 daddr_t offset;
640 int part, c, sparecol, j, scol, dumpto;
641 int error = 0;
642
643 if ((rs = raidget(unit)) == NULL)
644 return ENXIO;
645
646 raidPtr = &rs->sc_r;
647
648 if ((rs->sc_flags & RAIDF_INITED) == 0)
649 return ENXIO;
650
651 /* we only support dumping to RAID 1 sets */
652 if (raidPtr->Layout.numDataCol != 1 ||
653 raidPtr->Layout.numParityCol != 1)
654 return EINVAL;
655
656
657 if ((error = raidlock(rs)) != 0)
658 return error;
659
660 if (size % DEV_BSIZE != 0) {
661 error = EINVAL;
662 goto out;
663 }
664
665 if (blkno + size / DEV_BSIZE > rs->sc_size) {
666 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
667 "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
668 size / DEV_BSIZE, rs->sc_size);
669 error = EINVAL;
670 goto out;
671 }
672
673 part = DISKPART(dev);
674 lp = rs->sc_dkdev.dk_label;
675 offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
676
677 /* figure out what device is alive.. */
678
679 /*
680 Look for a component to dump to. The preference for the
681 component to dump to is as follows:
682 1) the master
683 2) a used_spare of the master
684 3) the slave
685 4) a used_spare of the slave
686 */
687
688 dumpto = -1;
689 for (c = 0; c < raidPtr->numCol; c++) {
690 if (raidPtr->Disks[c].status == rf_ds_optimal) {
691 /* this might be the one */
692 dumpto = c;
693 break;
694 }
695 }
696
697 /*
698 At this point we have possibly selected a live master or a
699 live slave. We now check to see if there is a spared
700 master (or a spared slave), if we didn't find a live master
701 or a live slave.
702 */
703
704 for (c = 0; c < raidPtr->numSpare; c++) {
705 sparecol = raidPtr->numCol + c;
706 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
707 /* How about this one? */
708 scol = -1;
709 for(j=0;j<raidPtr->numCol;j++) {
710 if (raidPtr->Disks[j].spareCol == sparecol) {
711 scol = j;
712 break;
713 }
714 }
715 if (scol == 0) {
716 /*
717 We must have found a spared master!
718 We'll take that over anything else
719 found so far. (We couldn't have
720 found a real master before, since
721 this is a used spare, and it's
722 saying that it's replacing the
723 master.) On reboot (with
724 autoconfiguration turned on)
725 sparecol will become the 1st
726 component (component0) of this set.
727 */
728 dumpto = sparecol;
729 break;
730 } else if (scol != -1) {
731 /*
732 Must be a spared slave. We'll dump
733 to that if we havn't found anything
734 else so far.
735 */
736 if (dumpto == -1)
737 dumpto = sparecol;
738 }
739 }
740 }
741
742 if (dumpto == -1) {
743 /* we couldn't find any live components to dump to!?!?
744 */
745 error = EINVAL;
746 goto out;
747 }
748
749 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
750
751 /*
752 Note that blkno is relative to this particular partition.
753 By adding the offset of this partition in the RAID
754 set, and also adding RF_PROTECTED_SECTORS, we get a
755 value that is relative to the partition used for the
756 underlying component.
757 */
758
759 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
760 blkno + offset, va, size);
761
762 out:
763 raidunlock(rs);
764
765 return error;
766 }
767 /* ARGSUSED */
768 int
769 raidopen(dev_t dev, int flags, int fmt,
770 struct lwp *l)
771 {
772 int unit = raidunit(dev);
773 struct raid_softc *rs;
774 struct disklabel *lp;
775 int part, pmask;
776 int error = 0;
777
778 if ((rs = raidget(unit)) == NULL)
779 return ENXIO;
780 if ((error = raidlock(rs)) != 0)
781 return (error);
782
783 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
784 error = EBUSY;
785 goto bad;
786 }
787
788 lp = rs->sc_dkdev.dk_label;
789
790 part = DISKPART(dev);
791
792 /*
793 * If there are wedges, and this is not RAW_PART, then we
794 * need to fail.
795 */
796 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
797 error = EBUSY;
798 goto bad;
799 }
800 pmask = (1 << part);
801
802 if ((rs->sc_flags & RAIDF_INITED) &&
803 (rs->sc_dkdev.dk_openmask == 0))
804 raidgetdisklabel(dev);
805
806 /* make sure that this partition exists */
807
808 if (part != RAW_PART) {
809 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
810 ((part >= lp->d_npartitions) ||
811 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
812 error = ENXIO;
813 goto bad;
814 }
815 }
816 /* Prevent this unit from being unconfigured while open. */
817 switch (fmt) {
818 case S_IFCHR:
819 rs->sc_dkdev.dk_copenmask |= pmask;
820 break;
821
822 case S_IFBLK:
823 rs->sc_dkdev.dk_bopenmask |= pmask;
824 break;
825 }
826
827 if ((rs->sc_dkdev.dk_openmask == 0) &&
828 ((rs->sc_flags & RAIDF_INITED) != 0)) {
829 /* First one... mark things as dirty... Note that we *MUST*
830 have done a configure before this. I DO NOT WANT TO BE
831 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
832 THAT THEY BELONG TOGETHER!!!!! */
833 /* XXX should check to see if we're only open for reading
834 here... If so, we needn't do this, but then need some
835 other way of keeping track of what's happened.. */
836
837 rf_markalldirty(&rs->sc_r);
838 }
839
840
841 rs->sc_dkdev.dk_openmask =
842 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
843
844 bad:
845 raidunlock(rs);
846
847 return (error);
848
849
850 }
851 /* ARGSUSED */
852 int
853 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
854 {
855 int unit = raidunit(dev);
856 struct raid_softc *rs;
857 int error = 0;
858 int part;
859
860 if ((rs = raidget(unit)) == NULL)
861 return ENXIO;
862
863 if ((error = raidlock(rs)) != 0)
864 return (error);
865
866 part = DISKPART(dev);
867
868 /* ...that much closer to allowing unconfiguration... */
869 switch (fmt) {
870 case S_IFCHR:
871 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
872 break;
873
874 case S_IFBLK:
875 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
876 break;
877 }
878 rs->sc_dkdev.dk_openmask =
879 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
880
881 if ((rs->sc_dkdev.dk_openmask == 0) &&
882 ((rs->sc_flags & RAIDF_INITED) != 0)) {
883 /* Last one... device is not unconfigured yet.
884 Device shutdown has taken care of setting the
885 clean bits if RAIDF_INITED is not set
886 mark things as clean... */
887
888 rf_update_component_labels(&rs->sc_r,
889 RF_FINAL_COMPONENT_UPDATE);
890
891 /* If the kernel is shutting down, it will detach
892 * this RAID set soon enough.
893 */
894 }
895
896 raidunlock(rs);
897 return (0);
898
899 }
900
901 void
902 raidstrategy(struct buf *bp)
903 {
904 unsigned int unit = raidunit(bp->b_dev);
905 RF_Raid_t *raidPtr;
906 int wlabel;
907 struct raid_softc *rs;
908
909 if ((rs = raidget(unit)) == NULL) {
910 bp->b_error = ENXIO;
911 goto done;
912 }
913 if ((rs->sc_flags & RAIDF_INITED) == 0) {
914 bp->b_error = ENXIO;
915 goto done;
916 }
917 raidPtr = &rs->sc_r;
918 if (!raidPtr->valid) {
919 bp->b_error = ENODEV;
920 goto done;
921 }
922 if (bp->b_bcount == 0) {
923 db1_printf(("b_bcount is zero..\n"));
924 goto done;
925 }
926
927 /*
928 * Do bounds checking and adjust transfer. If there's an
929 * error, the bounds check will flag that for us.
930 */
931
932 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
933 if (DISKPART(bp->b_dev) == RAW_PART) {
934 uint64_t size; /* device size in DEV_BSIZE unit */
935
936 if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
937 size = raidPtr->totalSectors <<
938 (raidPtr->logBytesPerSector - DEV_BSHIFT);
939 } else {
940 size = raidPtr->totalSectors >>
941 (DEV_BSHIFT - raidPtr->logBytesPerSector);
942 }
943 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
944 goto done;
945 }
946 } else {
947 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
948 db1_printf(("Bounds check failed!!:%d %d\n",
949 (int) bp->b_blkno, (int) wlabel));
950 goto done;
951 }
952 }
953
954 rf_lock_mutex2(raidPtr->iodone_lock);
955
956 bp->b_resid = 0;
957
958 /* stuff it onto our queue */
959 bufq_put(rs->buf_queue, bp);
960
961 /* scheduled the IO to happen at the next convenient time */
962 rf_signal_cond2(raidPtr->iodone_cv);
963 rf_unlock_mutex2(raidPtr->iodone_lock);
964
965 return;
966
967 done:
968 bp->b_resid = bp->b_bcount;
969 biodone(bp);
970 }
971 /* ARGSUSED */
972 int
973 raidread(dev_t dev, struct uio *uio, int flags)
974 {
975 int unit = raidunit(dev);
976 struct raid_softc *rs;
977
978 if ((rs = raidget(unit)) == NULL)
979 return ENXIO;
980
981 if ((rs->sc_flags & RAIDF_INITED) == 0)
982 return (ENXIO);
983
984 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
985
986 }
987 /* ARGSUSED */
988 int
989 raidwrite(dev_t dev, struct uio *uio, int flags)
990 {
991 int unit = raidunit(dev);
992 struct raid_softc *rs;
993
994 if ((rs = raidget(unit)) == NULL)
995 return ENXIO;
996
997 if ((rs->sc_flags & RAIDF_INITED) == 0)
998 return (ENXIO);
999
1000 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
1001
1002 }
1003
1004 static int
1005 raid_detach_unlocked(struct raid_softc *rs)
1006 {
1007 int error;
1008 RF_Raid_t *raidPtr;
1009
1010 raidPtr = &rs->sc_r;
1011
1012 /*
1013 * If somebody has a partition mounted, we shouldn't
1014 * shutdown.
1015 */
1016 if (rs->sc_dkdev.dk_openmask != 0)
1017 return EBUSY;
1018
1019 if ((rs->sc_flags & RAIDF_INITED) == 0)
1020 ; /* not initialized: nothing to do */
1021 else if ((error = rf_Shutdown(raidPtr)) != 0)
1022 return error;
1023 else
1024 rs->sc_flags &= ~(RAIDF_INITED|RAIDF_SHUTDOWN);
1025
1026 /* Detach the disk. */
1027 dkwedge_delall(&rs->sc_dkdev);
1028 disk_detach(&rs->sc_dkdev);
1029 disk_destroy(&rs->sc_dkdev);
1030
1031 aprint_normal_dev(rs->sc_dev, "detached\n");
1032
1033 return 0;
1034 }
1035
1036 int
1037 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1038 {
1039 int unit = raidunit(dev);
1040 int error = 0;
1041 int part, pmask, s;
1042 cfdata_t cf;
1043 struct raid_softc *rs;
1044 RF_Config_t *k_cfg, *u_cfg;
1045 RF_Raid_t *raidPtr;
1046 RF_RaidDisk_t *diskPtr;
1047 RF_AccTotals_t *totals;
1048 RF_DeviceConfig_t *d_cfg, **ucfgp;
1049 u_char *specific_buf;
1050 int retcode = 0;
1051 int column;
1052 /* int raidid; */
1053 struct rf_recon_req *rrcopy, *rr;
1054 RF_ComponentLabel_t *clabel;
1055 RF_ComponentLabel_t *ci_label;
1056 RF_ComponentLabel_t **clabel_ptr;
1057 RF_SingleComponent_t *sparePtr,*componentPtr;
1058 RF_SingleComponent_t component;
1059 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
1060 int i, j, d;
1061 #ifdef __HAVE_OLD_DISKLABEL
1062 struct disklabel newlabel;
1063 #endif
1064 struct dkwedge_info *dkw;
1065
1066 if ((rs = raidget(unit)) == NULL)
1067 return ENXIO;
1068 raidPtr = &rs->sc_r;
1069
1070 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1071 (int) DISKPART(dev), (int) unit, cmd));
1072
1073 /* Must be open for writes for these commands... */
1074 switch (cmd) {
1075 #ifdef DIOCGSECTORSIZE
1076 case DIOCGSECTORSIZE:
1077 *(u_int *)data = raidPtr->bytesPerSector;
1078 return 0;
1079 case DIOCGMEDIASIZE:
1080 *(off_t *)data =
1081 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
1082 return 0;
1083 #endif
1084 case DIOCSDINFO:
1085 case DIOCWDINFO:
1086 #ifdef __HAVE_OLD_DISKLABEL
1087 case ODIOCWDINFO:
1088 case ODIOCSDINFO:
1089 #endif
1090 case DIOCWLABEL:
1091 case DIOCAWEDGE:
1092 case DIOCDWEDGE:
1093 case DIOCSSTRATEGY:
1094 if ((flag & FWRITE) == 0)
1095 return (EBADF);
1096 }
1097
1098 /* Must be initialized for these... */
1099 switch (cmd) {
1100 case DIOCGDINFO:
1101 case DIOCSDINFO:
1102 case DIOCWDINFO:
1103 #ifdef __HAVE_OLD_DISKLABEL
1104 case ODIOCGDINFO:
1105 case ODIOCWDINFO:
1106 case ODIOCSDINFO:
1107 case ODIOCGDEFLABEL:
1108 #endif
1109 case DIOCGPART:
1110 case DIOCWLABEL:
1111 case DIOCGDEFLABEL:
1112 case DIOCAWEDGE:
1113 case DIOCDWEDGE:
1114 case DIOCLWEDGES:
1115 case DIOCCACHESYNC:
1116 case RAIDFRAME_SHUTDOWN:
1117 case RAIDFRAME_REWRITEPARITY:
1118 case RAIDFRAME_GET_INFO:
1119 case RAIDFRAME_RESET_ACCTOTALS:
1120 case RAIDFRAME_GET_ACCTOTALS:
1121 case RAIDFRAME_KEEP_ACCTOTALS:
1122 case RAIDFRAME_GET_SIZE:
1123 case RAIDFRAME_FAIL_DISK:
1124 case RAIDFRAME_COPYBACK:
1125 case RAIDFRAME_CHECK_RECON_STATUS:
1126 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1127 case RAIDFRAME_GET_COMPONENT_LABEL:
1128 case RAIDFRAME_SET_COMPONENT_LABEL:
1129 case RAIDFRAME_ADD_HOT_SPARE:
1130 case RAIDFRAME_REMOVE_HOT_SPARE:
1131 case RAIDFRAME_INIT_LABELS:
1132 case RAIDFRAME_REBUILD_IN_PLACE:
1133 case RAIDFRAME_CHECK_PARITY:
1134 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1135 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1136 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1137 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1138 case RAIDFRAME_SET_AUTOCONFIG:
1139 case RAIDFRAME_SET_ROOT:
1140 case RAIDFRAME_DELETE_COMPONENT:
1141 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1142 case RAIDFRAME_PARITYMAP_STATUS:
1143 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1144 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1145 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1146 case DIOCGSTRATEGY:
1147 case DIOCSSTRATEGY:
1148 if ((rs->sc_flags & RAIDF_INITED) == 0)
1149 return (ENXIO);
1150 }
1151
1152 switch (cmd) {
1153 #ifdef COMPAT_50
1154 case RAIDFRAME_GET_INFO50:
1155 return rf_get_info50(raidPtr, data);
1156
1157 case RAIDFRAME_CONFIGURE50:
1158 if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0)
1159 return retcode;
1160 goto config;
1161 #endif
1162 /* configure the system */
1163 case RAIDFRAME_CONFIGURE:
1164
1165 if (raidPtr->valid) {
1166 /* There is a valid RAID set running on this unit! */
1167 printf("raid%d: Device already configured!\n",unit);
1168 return(EINVAL);
1169 }
1170
1171 /* copy-in the configuration information */
1172 /* data points to a pointer to the configuration structure */
1173
1174 u_cfg = *((RF_Config_t **) data);
1175 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1176 if (k_cfg == NULL) {
1177 return (ENOMEM);
1178 }
1179 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1180 if (retcode) {
1181 RF_Free(k_cfg, sizeof(RF_Config_t));
1182 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1183 retcode));
1184 return (retcode);
1185 }
1186 goto config;
1187 config:
1188 /* allocate a buffer for the layout-specific data, and copy it
1189 * in */
1190 if (k_cfg->layoutSpecificSize) {
1191 if (k_cfg->layoutSpecificSize > 10000) {
1192 /* sanity check */
1193 RF_Free(k_cfg, sizeof(RF_Config_t));
1194 return (EINVAL);
1195 }
1196 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1197 (u_char *));
1198 if (specific_buf == NULL) {
1199 RF_Free(k_cfg, sizeof(RF_Config_t));
1200 return (ENOMEM);
1201 }
1202 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1203 k_cfg->layoutSpecificSize);
1204 if (retcode) {
1205 RF_Free(k_cfg, sizeof(RF_Config_t));
1206 RF_Free(specific_buf,
1207 k_cfg->layoutSpecificSize);
1208 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1209 retcode));
1210 return (retcode);
1211 }
1212 } else
1213 specific_buf = NULL;
1214 k_cfg->layoutSpecific = specific_buf;
1215
1216 /* should do some kind of sanity check on the configuration.
1217 * Store the sum of all the bytes in the last byte? */
1218
1219 /* configure the system */
1220
1221 /*
1222 * Clear the entire RAID descriptor, just to make sure
1223 * there is no stale data left in the case of a
1224 * reconfiguration
1225 */
1226 memset(raidPtr, 0, sizeof(*raidPtr));
1227 raidPtr->softc = rs;
1228 raidPtr->raidid = unit;
1229
1230 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1231
1232 if (retcode == 0) {
1233
1234 /* allow this many simultaneous IO's to
1235 this RAID device */
1236 raidPtr->openings = RAIDOUTSTANDING;
1237
1238 raidinit(rs);
1239 rf_markalldirty(raidPtr);
1240 }
1241 /* free the buffers. No return code here. */
1242 if (k_cfg->layoutSpecificSize) {
1243 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1244 }
1245 RF_Free(k_cfg, sizeof(RF_Config_t));
1246
1247 return (retcode);
1248
1249 /* shutdown the system */
1250 case RAIDFRAME_SHUTDOWN:
1251
1252 part = DISKPART(dev);
1253 pmask = (1 << part);
1254
1255 if ((error = raidlock(rs)) != 0)
1256 return (error);
1257
1258 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
1259 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1260 (rs->sc_dkdev.dk_copenmask & pmask)))
1261 retcode = EBUSY;
1262 else {
1263 rs->sc_flags |= RAIDF_SHUTDOWN;
1264 rs->sc_dkdev.dk_copenmask &= ~pmask;
1265 rs->sc_dkdev.dk_bopenmask &= ~pmask;
1266 rs->sc_dkdev.dk_openmask &= ~pmask;
1267 retcode = 0;
1268 }
1269
1270 raidunlock(rs);
1271
1272 if (retcode != 0)
1273 return retcode;
1274
1275 /* free the pseudo device attach bits */
1276
1277 cf = device_cfdata(rs->sc_dev);
1278 if ((retcode = config_detach(rs->sc_dev, DETACH_QUIET)) == 0)
1279 free(cf, M_RAIDFRAME);
1280
1281 return (retcode);
1282 case RAIDFRAME_GET_COMPONENT_LABEL:
1283 clabel_ptr = (RF_ComponentLabel_t **) data;
1284 /* need to read the component label for the disk indicated
1285 by row,column in clabel */
1286
1287 /*
1288 * Perhaps there should be an option to skip the in-core
1289 * copy and hit the disk, as with disklabel(8).
1290 */
1291 RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *));
1292
1293 retcode = copyin(*clabel_ptr, clabel, sizeof(*clabel));
1294
1295 if (retcode) {
1296 RF_Free(clabel, sizeof(*clabel));
1297 return retcode;
1298 }
1299
1300 clabel->row = 0; /* Don't allow looking at anything else.*/
1301
1302 column = clabel->column;
1303
1304 if ((column < 0) || (column >= raidPtr->numCol +
1305 raidPtr->numSpare)) {
1306 RF_Free(clabel, sizeof(*clabel));
1307 return EINVAL;
1308 }
1309
1310 RF_Free(clabel, sizeof(*clabel));
1311
1312 clabel = raidget_component_label(raidPtr, column);
1313
1314 return copyout(clabel, *clabel_ptr, sizeof(**clabel_ptr));
1315
1316 #if 0
1317 case RAIDFRAME_SET_COMPONENT_LABEL:
1318 clabel = (RF_ComponentLabel_t *) data;
1319
1320 /* XXX check the label for valid stuff... */
1321 /* Note that some things *should not* get modified --
1322 the user should be re-initing the labels instead of
1323 trying to patch things.
1324 */
1325
1326 raidid = raidPtr->raidid;
1327 #ifdef DEBUG
1328 printf("raid%d: Got component label:\n", raidid);
1329 printf("raid%d: Version: %d\n", raidid, clabel->version);
1330 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1331 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1332 printf("raid%d: Column: %d\n", raidid, clabel->column);
1333 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1334 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1335 printf("raid%d: Status: %d\n", raidid, clabel->status);
1336 #endif
1337 clabel->row = 0;
1338 column = clabel->column;
1339
1340 if ((column < 0) || (column >= raidPtr->numCol)) {
1341 return(EINVAL);
1342 }
1343
1344 /* XXX this isn't allowed to do anything for now :-) */
1345
1346 /* XXX and before it is, we need to fill in the rest
1347 of the fields!?!?!?! */
1348 memcpy(raidget_component_label(raidPtr, column),
1349 clabel, sizeof(*clabel));
1350 raidflush_component_label(raidPtr, column);
1351 return (0);
1352 #endif
1353
1354 case RAIDFRAME_INIT_LABELS:
1355 clabel = (RF_ComponentLabel_t *) data;
1356 /*
1357 we only want the serial number from
1358 the above. We get all the rest of the information
1359 from the config that was used to create this RAID
1360 set.
1361 */
1362
1363 raidPtr->serial_number = clabel->serial_number;
1364
1365 for(column=0;column<raidPtr->numCol;column++) {
1366 diskPtr = &raidPtr->Disks[column];
1367 if (!RF_DEAD_DISK(diskPtr->status)) {
1368 ci_label = raidget_component_label(raidPtr,
1369 column);
1370 /* Zeroing this is important. */
1371 memset(ci_label, 0, sizeof(*ci_label));
1372 raid_init_component_label(raidPtr, ci_label);
1373 ci_label->serial_number =
1374 raidPtr->serial_number;
1375 ci_label->row = 0; /* we dont' pretend to support more */
1376 rf_component_label_set_partitionsize(ci_label,
1377 diskPtr->partitionSize);
1378 ci_label->column = column;
1379 raidflush_component_label(raidPtr, column);
1380 }
1381 /* XXXjld what about the spares? */
1382 }
1383
1384 return (retcode);
1385 case RAIDFRAME_SET_AUTOCONFIG:
1386 d = rf_set_autoconfig(raidPtr, *(int *) data);
1387 printf("raid%d: New autoconfig value is: %d\n",
1388 raidPtr->raidid, d);
1389 *(int *) data = d;
1390 return (retcode);
1391
1392 case RAIDFRAME_SET_ROOT:
1393 d = rf_set_rootpartition(raidPtr, *(int *) data);
1394 printf("raid%d: New rootpartition value is: %d\n",
1395 raidPtr->raidid, d);
1396 *(int *) data = d;
1397 return (retcode);
1398
1399 /* initialize all parity */
1400 case RAIDFRAME_REWRITEPARITY:
1401
1402 if (raidPtr->Layout.map->faultsTolerated == 0) {
1403 /* Parity for RAID 0 is trivially correct */
1404 raidPtr->parity_good = RF_RAID_CLEAN;
1405 return(0);
1406 }
1407
1408 if (raidPtr->parity_rewrite_in_progress == 1) {
1409 /* Re-write is already in progress! */
1410 return(EINVAL);
1411 }
1412
1413 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1414 rf_RewriteParityThread,
1415 raidPtr,"raid_parity");
1416 return (retcode);
1417
1418
1419 case RAIDFRAME_ADD_HOT_SPARE:
1420 sparePtr = (RF_SingleComponent_t *) data;
1421 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1422 retcode = rf_add_hot_spare(raidPtr, &component);
1423 return(retcode);
1424
1425 case RAIDFRAME_REMOVE_HOT_SPARE:
1426 return(retcode);
1427
1428 case RAIDFRAME_DELETE_COMPONENT:
1429 componentPtr = (RF_SingleComponent_t *)data;
1430 memcpy( &component, componentPtr,
1431 sizeof(RF_SingleComponent_t));
1432 retcode = rf_delete_component(raidPtr, &component);
1433 return(retcode);
1434
1435 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1436 componentPtr = (RF_SingleComponent_t *)data;
1437 memcpy( &component, componentPtr,
1438 sizeof(RF_SingleComponent_t));
1439 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1440 return(retcode);
1441
1442 case RAIDFRAME_REBUILD_IN_PLACE:
1443
1444 if (raidPtr->Layout.map->faultsTolerated == 0) {
1445 /* Can't do this on a RAID 0!! */
1446 return(EINVAL);
1447 }
1448
1449 if (raidPtr->recon_in_progress == 1) {
1450 /* a reconstruct is already in progress! */
1451 return(EINVAL);
1452 }
1453
1454 componentPtr = (RF_SingleComponent_t *) data;
1455 memcpy( &component, componentPtr,
1456 sizeof(RF_SingleComponent_t));
1457 component.row = 0; /* we don't support any more */
1458 column = component.column;
1459
1460 if ((column < 0) || (column >= raidPtr->numCol)) {
1461 return(EINVAL);
1462 }
1463
1464 rf_lock_mutex2(raidPtr->mutex);
1465 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1466 (raidPtr->numFailures > 0)) {
1467 /* XXX 0 above shouldn't be constant!!! */
1468 /* some component other than this has failed.
1469 Let's not make things worse than they already
1470 are... */
1471 printf("raid%d: Unable to reconstruct to disk at:\n",
1472 raidPtr->raidid);
1473 printf("raid%d: Col: %d Too many failures.\n",
1474 raidPtr->raidid, column);
1475 rf_unlock_mutex2(raidPtr->mutex);
1476 return (EINVAL);
1477 }
1478 if (raidPtr->Disks[column].status ==
1479 rf_ds_reconstructing) {
1480 printf("raid%d: Unable to reconstruct to disk at:\n",
1481 raidPtr->raidid);
1482 printf("raid%d: Col: %d Reconstruction already occurring!\n", raidPtr->raidid, column);
1483
1484 rf_unlock_mutex2(raidPtr->mutex);
1485 return (EINVAL);
1486 }
1487 if (raidPtr->Disks[column].status == rf_ds_spared) {
1488 rf_unlock_mutex2(raidPtr->mutex);
1489 return (EINVAL);
1490 }
1491 rf_unlock_mutex2(raidPtr->mutex);
1492
1493 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1494 if (rrcopy == NULL)
1495 return(ENOMEM);
1496
1497 rrcopy->raidPtr = (void *) raidPtr;
1498 rrcopy->col = column;
1499
1500 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1501 rf_ReconstructInPlaceThread,
1502 rrcopy,"raid_reconip");
1503 return(retcode);
1504
1505 case RAIDFRAME_GET_INFO:
1506 if (!raidPtr->valid)
1507 return (ENODEV);
1508 ucfgp = (RF_DeviceConfig_t **) data;
1509 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1510 (RF_DeviceConfig_t *));
1511 if (d_cfg == NULL)
1512 return (ENOMEM);
1513 d_cfg->rows = 1; /* there is only 1 row now */
1514 d_cfg->cols = raidPtr->numCol;
1515 d_cfg->ndevs = raidPtr->numCol;
1516 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1517 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1518 return (ENOMEM);
1519 }
1520 d_cfg->nspares = raidPtr->numSpare;
1521 if (d_cfg->nspares >= RF_MAX_DISKS) {
1522 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1523 return (ENOMEM);
1524 }
1525 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1526 d = 0;
1527 for (j = 0; j < d_cfg->cols; j++) {
1528 d_cfg->devs[d] = raidPtr->Disks[j];
1529 d++;
1530 }
1531 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1532 d_cfg->spares[i] = raidPtr->Disks[j];
1533 }
1534 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1535 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1536
1537 return (retcode);
1538
1539 case RAIDFRAME_CHECK_PARITY:
1540 *(int *) data = raidPtr->parity_good;
1541 return (0);
1542
1543 case RAIDFRAME_PARITYMAP_STATUS:
1544 if (rf_paritymap_ineligible(raidPtr))
1545 return EINVAL;
1546 rf_paritymap_status(raidPtr->parity_map,
1547 (struct rf_pmstat *)data);
1548 return 0;
1549
1550 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1551 if (rf_paritymap_ineligible(raidPtr))
1552 return EINVAL;
1553 if (raidPtr->parity_map == NULL)
1554 return ENOENT; /* ??? */
1555 if (0 != rf_paritymap_set_params(raidPtr->parity_map,
1556 (struct rf_pmparams *)data, 1))
1557 return EINVAL;
1558 return 0;
1559
1560 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1561 if (rf_paritymap_ineligible(raidPtr))
1562 return EINVAL;
1563 *(int *) data = rf_paritymap_get_disable(raidPtr);
1564 return 0;
1565
1566 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1567 if (rf_paritymap_ineligible(raidPtr))
1568 return EINVAL;
1569 rf_paritymap_set_disable(raidPtr, *(int *)data);
1570 /* XXX should errors be passed up? */
1571 return 0;
1572
1573 case RAIDFRAME_RESET_ACCTOTALS:
1574 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1575 return (0);
1576
1577 case RAIDFRAME_GET_ACCTOTALS:
1578 totals = (RF_AccTotals_t *) data;
1579 *totals = raidPtr->acc_totals;
1580 return (0);
1581
1582 case RAIDFRAME_KEEP_ACCTOTALS:
1583 raidPtr->keep_acc_totals = *(int *)data;
1584 return (0);
1585
1586 case RAIDFRAME_GET_SIZE:
1587 *(int *) data = raidPtr->totalSectors;
1588 return (0);
1589
1590 /* fail a disk & optionally start reconstruction */
1591 case RAIDFRAME_FAIL_DISK:
1592
1593 if (raidPtr->Layout.map->faultsTolerated == 0) {
1594 /* Can't do this on a RAID 0!! */
1595 return(EINVAL);
1596 }
1597
1598 rr = (struct rf_recon_req *) data;
1599 rr->row = 0;
1600 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1601 return (EINVAL);
1602
1603
1604 rf_lock_mutex2(raidPtr->mutex);
1605 if (raidPtr->status == rf_rs_reconstructing) {
1606 /* you can't fail a disk while we're reconstructing! */
1607 /* XXX wrong for RAID6 */
1608 rf_unlock_mutex2(raidPtr->mutex);
1609 return (EINVAL);
1610 }
1611 if ((raidPtr->Disks[rr->col].status ==
1612 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1613 /* some other component has failed. Let's not make
1614 things worse. XXX wrong for RAID6 */
1615 rf_unlock_mutex2(raidPtr->mutex);
1616 return (EINVAL);
1617 }
1618 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1619 /* Can't fail a spared disk! */
1620 rf_unlock_mutex2(raidPtr->mutex);
1621 return (EINVAL);
1622 }
1623 rf_unlock_mutex2(raidPtr->mutex);
1624
1625 /* make a copy of the recon request so that we don't rely on
1626 * the user's buffer */
1627 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1628 if (rrcopy == NULL)
1629 return(ENOMEM);
1630 memcpy(rrcopy, rr, sizeof(*rr));
1631 rrcopy->raidPtr = (void *) raidPtr;
1632
1633 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1634 rf_ReconThread,
1635 rrcopy,"raid_recon");
1636 return (0);
1637
1638 /* invoke a copyback operation after recon on whatever disk
1639 * needs it, if any */
1640 case RAIDFRAME_COPYBACK:
1641
1642 if (raidPtr->Layout.map->faultsTolerated == 0) {
1643 /* This makes no sense on a RAID 0!! */
1644 return(EINVAL);
1645 }
1646
1647 if (raidPtr->copyback_in_progress == 1) {
1648 /* Copyback is already in progress! */
1649 return(EINVAL);
1650 }
1651
1652 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1653 rf_CopybackThread,
1654 raidPtr,"raid_copyback");
1655 return (retcode);
1656
1657 /* return the percentage completion of reconstruction */
1658 case RAIDFRAME_CHECK_RECON_STATUS:
1659 if (raidPtr->Layout.map->faultsTolerated == 0) {
1660 /* This makes no sense on a RAID 0, so tell the
1661 user it's done. */
1662 *(int *) data = 100;
1663 return(0);
1664 }
1665 if (raidPtr->status != rf_rs_reconstructing)
1666 *(int *) data = 100;
1667 else {
1668 if (raidPtr->reconControl->numRUsTotal > 0) {
1669 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1670 } else {
1671 *(int *) data = 0;
1672 }
1673 }
1674 return (0);
1675 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1676 progressInfoPtr = (RF_ProgressInfo_t **) data;
1677 if (raidPtr->status != rf_rs_reconstructing) {
1678 progressInfo.remaining = 0;
1679 progressInfo.completed = 100;
1680 progressInfo.total = 100;
1681 } else {
1682 progressInfo.total =
1683 raidPtr->reconControl->numRUsTotal;
1684 progressInfo.completed =
1685 raidPtr->reconControl->numRUsComplete;
1686 progressInfo.remaining = progressInfo.total -
1687 progressInfo.completed;
1688 }
1689 retcode = copyout(&progressInfo, *progressInfoPtr,
1690 sizeof(RF_ProgressInfo_t));
1691 return (retcode);
1692
1693 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1694 if (raidPtr->Layout.map->faultsTolerated == 0) {
1695 /* This makes no sense on a RAID 0, so tell the
1696 user it's done. */
1697 *(int *) data = 100;
1698 return(0);
1699 }
1700 if (raidPtr->parity_rewrite_in_progress == 1) {
1701 *(int *) data = 100 *
1702 raidPtr->parity_rewrite_stripes_done /
1703 raidPtr->Layout.numStripe;
1704 } else {
1705 *(int *) data = 100;
1706 }
1707 return (0);
1708
1709 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1710 progressInfoPtr = (RF_ProgressInfo_t **) data;
1711 if (raidPtr->parity_rewrite_in_progress == 1) {
1712 progressInfo.total = raidPtr->Layout.numStripe;
1713 progressInfo.completed =
1714 raidPtr->parity_rewrite_stripes_done;
1715 progressInfo.remaining = progressInfo.total -
1716 progressInfo.completed;
1717 } else {
1718 progressInfo.remaining = 0;
1719 progressInfo.completed = 100;
1720 progressInfo.total = 100;
1721 }
1722 retcode = copyout(&progressInfo, *progressInfoPtr,
1723 sizeof(RF_ProgressInfo_t));
1724 return (retcode);
1725
1726 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1727 if (raidPtr->Layout.map->faultsTolerated == 0) {
1728 /* This makes no sense on a RAID 0 */
1729 *(int *) data = 100;
1730 return(0);
1731 }
1732 if (raidPtr->copyback_in_progress == 1) {
1733 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1734 raidPtr->Layout.numStripe;
1735 } else {
1736 *(int *) data = 100;
1737 }
1738 return (0);
1739
1740 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1741 progressInfoPtr = (RF_ProgressInfo_t **) data;
1742 if (raidPtr->copyback_in_progress == 1) {
1743 progressInfo.total = raidPtr->Layout.numStripe;
1744 progressInfo.completed =
1745 raidPtr->copyback_stripes_done;
1746 progressInfo.remaining = progressInfo.total -
1747 progressInfo.completed;
1748 } else {
1749 progressInfo.remaining = 0;
1750 progressInfo.completed = 100;
1751 progressInfo.total = 100;
1752 }
1753 retcode = copyout(&progressInfo, *progressInfoPtr,
1754 sizeof(RF_ProgressInfo_t));
1755 return (retcode);
1756
1757 /* the sparetable daemon calls this to wait for the kernel to
1758 * need a spare table. this ioctl does not return until a
1759 * spare table is needed. XXX -- calling mpsleep here in the
1760 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1761 * -- I should either compute the spare table in the kernel,
1762 * or have a different -- XXX XXX -- interface (a different
1763 * character device) for delivering the table -- XXX */
1764 #if 0
1765 case RAIDFRAME_SPARET_WAIT:
1766 rf_lock_mutex2(rf_sparet_wait_mutex);
1767 while (!rf_sparet_wait_queue)
1768 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1769 waitreq = rf_sparet_wait_queue;
1770 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1771 rf_unlock_mutex2(rf_sparet_wait_mutex);
1772
1773 /* structure assignment */
1774 *((RF_SparetWait_t *) data) = *waitreq;
1775
1776 RF_Free(waitreq, sizeof(*waitreq));
1777 return (0);
1778
1779 /* wakes up a process waiting on SPARET_WAIT and puts an error
1780 * code in it that will cause the dameon to exit */
1781 case RAIDFRAME_ABORT_SPARET_WAIT:
1782 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1783 waitreq->fcol = -1;
1784 rf_lock_mutex2(rf_sparet_wait_mutex);
1785 waitreq->next = rf_sparet_wait_queue;
1786 rf_sparet_wait_queue = waitreq;
1787 rf_broadcast_conf2(rf_sparet_wait_cv);
1788 rf_unlock_mutex2(rf_sparet_wait_mutex);
1789 return (0);
1790
1791 /* used by the spare table daemon to deliver a spare table
1792 * into the kernel */
1793 case RAIDFRAME_SEND_SPARET:
1794
1795 /* install the spare table */
1796 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1797
1798 /* respond to the requestor. the return status of the spare
1799 * table installation is passed in the "fcol" field */
1800 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1801 waitreq->fcol = retcode;
1802 rf_lock_mutex2(rf_sparet_wait_mutex);
1803 waitreq->next = rf_sparet_resp_queue;
1804 rf_sparet_resp_queue = waitreq;
1805 rf_broadcast_cond2(rf_sparet_resp_cv);
1806 rf_unlock_mutex2(rf_sparet_wait_mutex);
1807
1808 return (retcode);
1809 #endif
1810
1811 default:
1812 break; /* fall through to the os-specific code below */
1813
1814 }
1815
1816 if (!raidPtr->valid)
1817 return (EINVAL);
1818
1819 /*
1820 * Add support for "regular" device ioctls here.
1821 */
1822
1823 error = disk_ioctl(&rs->sc_dkdev, cmd, data, flag, l);
1824 if (error != EPASSTHROUGH)
1825 return (error);
1826
1827 switch (cmd) {
1828 case DIOCGDINFO:
1829 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1830 break;
1831 #ifdef __HAVE_OLD_DISKLABEL
1832 case ODIOCGDINFO:
1833 newlabel = *(rs->sc_dkdev.dk_label);
1834 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1835 return ENOTTY;
1836 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1837 break;
1838 #endif
1839
1840 case DIOCGPART:
1841 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1842 ((struct partinfo *) data)->part =
1843 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1844 break;
1845
1846 case DIOCWDINFO:
1847 case DIOCSDINFO:
1848 #ifdef __HAVE_OLD_DISKLABEL
1849 case ODIOCWDINFO:
1850 case ODIOCSDINFO:
1851 #endif
1852 {
1853 struct disklabel *lp;
1854 #ifdef __HAVE_OLD_DISKLABEL
1855 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1856 memset(&newlabel, 0, sizeof newlabel);
1857 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1858 lp = &newlabel;
1859 } else
1860 #endif
1861 lp = (struct disklabel *)data;
1862
1863 if ((error = raidlock(rs)) != 0)
1864 return (error);
1865
1866 rs->sc_flags |= RAIDF_LABELLING;
1867
1868 error = setdisklabel(rs->sc_dkdev.dk_label,
1869 lp, 0, rs->sc_dkdev.dk_cpulabel);
1870 if (error == 0) {
1871 if (cmd == DIOCWDINFO
1872 #ifdef __HAVE_OLD_DISKLABEL
1873 || cmd == ODIOCWDINFO
1874 #endif
1875 )
1876 error = writedisklabel(RAIDLABELDEV(dev),
1877 raidstrategy, rs->sc_dkdev.dk_label,
1878 rs->sc_dkdev.dk_cpulabel);
1879 }
1880 rs->sc_flags &= ~RAIDF_LABELLING;
1881
1882 raidunlock(rs);
1883
1884 if (error)
1885 return (error);
1886 break;
1887 }
1888
1889 case DIOCWLABEL:
1890 if (*(int *) data != 0)
1891 rs->sc_flags |= RAIDF_WLABEL;
1892 else
1893 rs->sc_flags &= ~RAIDF_WLABEL;
1894 break;
1895
1896 case DIOCGDEFLABEL:
1897 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1898 break;
1899
1900 #ifdef __HAVE_OLD_DISKLABEL
1901 case ODIOCGDEFLABEL:
1902 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1903 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1904 return ENOTTY;
1905 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1906 break;
1907 #endif
1908
1909 case DIOCAWEDGE:
1910 case DIOCDWEDGE:
1911 dkw = (void *)data;
1912
1913 /* If the ioctl happens here, the parent is us. */
1914 (void)strcpy(dkw->dkw_parent, rs->sc_xname);
1915 return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw);
1916
1917 case DIOCLWEDGES:
1918 return dkwedge_list(&rs->sc_dkdev,
1919 (struct dkwedge_list *)data, l);
1920 case DIOCCACHESYNC:
1921 return rf_sync_component_caches(raidPtr);
1922
1923 case DIOCGSTRATEGY:
1924 {
1925 struct disk_strategy *dks = (void *)data;
1926
1927 s = splbio();
1928 strlcpy(dks->dks_name, bufq_getstrategyname(rs->buf_queue),
1929 sizeof(dks->dks_name));
1930 splx(s);
1931 dks->dks_paramlen = 0;
1932
1933 return 0;
1934 }
1935
1936 case DIOCSSTRATEGY:
1937 {
1938 struct disk_strategy *dks = (void *)data;
1939 struct bufq_state *new;
1940 struct bufq_state *old;
1941
1942 if (dks->dks_param != NULL) {
1943 return EINVAL;
1944 }
1945 dks->dks_name[sizeof(dks->dks_name) - 1] = 0; /* ensure term */
1946 error = bufq_alloc(&new, dks->dks_name,
1947 BUFQ_EXACT|BUFQ_SORT_RAWBLOCK);
1948 if (error) {
1949 return error;
1950 }
1951 s = splbio();
1952 old = rs->buf_queue;
1953 bufq_move(new, old);
1954 rs->buf_queue = new;
1955 splx(s);
1956 bufq_free(old);
1957
1958 return 0;
1959 }
1960
1961 default:
1962 retcode = ENOTTY;
1963 }
1964 return (retcode);
1965
1966 }
1967
1968
1969 /* raidinit -- complete the rest of the initialization for the
1970 RAIDframe device. */
1971
1972
1973 static void
1974 raidinit(struct raid_softc *rs)
1975 {
1976 cfdata_t cf;
1977 int unit;
1978 RF_Raid_t *raidPtr = &rs->sc_r;
1979
1980 unit = raidPtr->raidid;
1981
1982
1983 /* XXX should check return code first... */
1984 rs->sc_flags |= RAIDF_INITED;
1985
1986 /* XXX doesn't check bounds. */
1987 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1988
1989 /* attach the pseudo device */
1990 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1991 cf->cf_name = raid_cd.cd_name;
1992 cf->cf_atname = raid_cd.cd_name;
1993 cf->cf_unit = unit;
1994 cf->cf_fstate = FSTATE_STAR;
1995
1996 rs->sc_dev = config_attach_pseudo(cf);
1997
1998 if (rs->sc_dev == NULL) {
1999 printf("raid%d: config_attach_pseudo failed\n",
2000 raidPtr->raidid);
2001 rs->sc_flags &= ~RAIDF_INITED;
2002 free(cf, M_RAIDFRAME);
2003 return;
2004 }
2005
2006 /* disk_attach actually creates space for the CPU disklabel, among
2007 * other things, so it's critical to call this *BEFORE* we try putzing
2008 * with disklabels. */
2009
2010 disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver);
2011 disk_attach(&rs->sc_dkdev);
2012 disk_blocksize(&rs->sc_dkdev, raidPtr->bytesPerSector);
2013
2014 /* XXX There may be a weird interaction here between this, and
2015 * protectedSectors, as used in RAIDframe. */
2016
2017 rs->sc_size = raidPtr->totalSectors;
2018
2019 dkwedge_discover(&rs->sc_dkdev);
2020
2021 rf_set_geometry(rs, raidPtr);
2022
2023 }
2024 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
2025 /* wake up the daemon & tell it to get us a spare table
2026 * XXX
2027 * the entries in the queues should be tagged with the raidPtr
2028 * so that in the extremely rare case that two recons happen at once,
2029 * we know for which device were requesting a spare table
2030 * XXX
2031 *
2032 * XXX This code is not currently used. GO
2033 */
2034 int
2035 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
2036 {
2037 int retcode;
2038
2039 rf_lock_mutex2(rf_sparet_wait_mutex);
2040 req->next = rf_sparet_wait_queue;
2041 rf_sparet_wait_queue = req;
2042 rf_broadcast_cond2(rf_sparet_wait_cv);
2043
2044 /* mpsleep unlocks the mutex */
2045 while (!rf_sparet_resp_queue) {
2046 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
2047 }
2048 req = rf_sparet_resp_queue;
2049 rf_sparet_resp_queue = req->next;
2050 rf_unlock_mutex2(rf_sparet_wait_mutex);
2051
2052 retcode = req->fcol;
2053 RF_Free(req, sizeof(*req)); /* this is not the same req as we
2054 * alloc'd */
2055 return (retcode);
2056 }
2057 #endif
2058
2059 /* a wrapper around rf_DoAccess that extracts appropriate info from the
2060 * bp & passes it down.
2061 * any calls originating in the kernel must use non-blocking I/O
2062 * do some extra sanity checking to return "appropriate" error values for
2063 * certain conditions (to make some standard utilities work)
2064 *
2065 * Formerly known as: rf_DoAccessKernel
2066 */
2067 void
2068 raidstart(RF_Raid_t *raidPtr)
2069 {
2070 RF_SectorCount_t num_blocks, pb, sum;
2071 RF_RaidAddr_t raid_addr;
2072 struct partition *pp;
2073 daddr_t blocknum;
2074 struct raid_softc *rs;
2075 int do_async;
2076 struct buf *bp;
2077 int rc;
2078
2079 rs = raidPtr->softc;
2080 /* quick check to see if anything has died recently */
2081 rf_lock_mutex2(raidPtr->mutex);
2082 if (raidPtr->numNewFailures > 0) {
2083 rf_unlock_mutex2(raidPtr->mutex);
2084 rf_update_component_labels(raidPtr,
2085 RF_NORMAL_COMPONENT_UPDATE);
2086 rf_lock_mutex2(raidPtr->mutex);
2087 raidPtr->numNewFailures--;
2088 }
2089
2090 /* Check to see if we're at the limit... */
2091 while (raidPtr->openings > 0) {
2092 rf_unlock_mutex2(raidPtr->mutex);
2093
2094 /* get the next item, if any, from the queue */
2095 if ((bp = bufq_get(rs->buf_queue)) == NULL) {
2096 /* nothing more to do */
2097 return;
2098 }
2099
2100 /* Ok, for the bp we have here, bp->b_blkno is relative to the
2101 * partition.. Need to make it absolute to the underlying
2102 * device.. */
2103
2104 blocknum = bp->b_blkno << DEV_BSHIFT >> raidPtr->logBytesPerSector;
2105 if (DISKPART(bp->b_dev) != RAW_PART) {
2106 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
2107 blocknum += pp->p_offset;
2108 }
2109
2110 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
2111 (int) blocknum));
2112
2113 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
2114 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
2115
2116 /* *THIS* is where we adjust what block we're going to...
2117 * but DO NOT TOUCH bp->b_blkno!!! */
2118 raid_addr = blocknum;
2119
2120 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
2121 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
2122 sum = raid_addr + num_blocks + pb;
2123 if (1 || rf_debugKernelAccess) {
2124 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
2125 (int) raid_addr, (int) sum, (int) num_blocks,
2126 (int) pb, (int) bp->b_resid));
2127 }
2128 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
2129 || (sum < num_blocks) || (sum < pb)) {
2130 bp->b_error = ENOSPC;
2131 bp->b_resid = bp->b_bcount;
2132 biodone(bp);
2133 rf_lock_mutex2(raidPtr->mutex);
2134 continue;
2135 }
2136 /*
2137 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
2138 */
2139
2140 if (bp->b_bcount & raidPtr->sectorMask) {
2141 bp->b_error = EINVAL;
2142 bp->b_resid = bp->b_bcount;
2143 biodone(bp);
2144 rf_lock_mutex2(raidPtr->mutex);
2145 continue;
2146
2147 }
2148 db1_printf(("Calling DoAccess..\n"));
2149
2150
2151 rf_lock_mutex2(raidPtr->mutex);
2152 raidPtr->openings--;
2153 rf_unlock_mutex2(raidPtr->mutex);
2154
2155 /*
2156 * Everything is async.
2157 */
2158 do_async = 1;
2159
2160 disk_busy(&rs->sc_dkdev);
2161
2162 /* XXX we're still at splbio() here... do we *really*
2163 need to be? */
2164
2165 /* don't ever condition on bp->b_flags & B_WRITE.
2166 * always condition on B_READ instead */
2167
2168 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
2169 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
2170 do_async, raid_addr, num_blocks,
2171 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
2172
2173 if (rc) {
2174 bp->b_error = rc;
2175 bp->b_resid = bp->b_bcount;
2176 biodone(bp);
2177 /* continue loop */
2178 }
2179
2180 rf_lock_mutex2(raidPtr->mutex);
2181 }
2182 rf_unlock_mutex2(raidPtr->mutex);
2183 }
2184
2185
2186
2187
2188 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
2189
2190 int
2191 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
2192 {
2193 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2194 struct buf *bp;
2195
2196 req->queue = queue;
2197 bp = req->bp;
2198
2199 switch (req->type) {
2200 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2201 /* XXX need to do something extra here.. */
2202 /* I'm leaving this in, as I've never actually seen it used,
2203 * and I'd like folks to report it... GO */
2204 printf(("WAKEUP CALLED\n"));
2205 queue->numOutstanding++;
2206
2207 bp->b_flags = 0;
2208 bp->b_private = req;
2209
2210 KernelWakeupFunc(bp);
2211 break;
2212
2213 case RF_IO_TYPE_READ:
2214 case RF_IO_TYPE_WRITE:
2215 #if RF_ACC_TRACE > 0
2216 if (req->tracerec) {
2217 RF_ETIMER_START(req->tracerec->timer);
2218 }
2219 #endif
2220 InitBP(bp, queue->rf_cinfo->ci_vp,
2221 op, queue->rf_cinfo->ci_dev,
2222 req->sectorOffset, req->numSector,
2223 req->buf, KernelWakeupFunc, (void *) req,
2224 queue->raidPtr->logBytesPerSector, req->b_proc);
2225
2226 if (rf_debugKernelAccess) {
2227 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2228 (long) bp->b_blkno));
2229 }
2230 queue->numOutstanding++;
2231 queue->last_deq_sector = req->sectorOffset;
2232 /* acc wouldn't have been let in if there were any pending
2233 * reqs at any other priority */
2234 queue->curPriority = req->priority;
2235
2236 db1_printf(("Going for %c to unit %d col %d\n",
2237 req->type, queue->raidPtr->raidid,
2238 queue->col));
2239 db1_printf(("sector %d count %d (%d bytes) %d\n",
2240 (int) req->sectorOffset, (int) req->numSector,
2241 (int) (req->numSector <<
2242 queue->raidPtr->logBytesPerSector),
2243 (int) queue->raidPtr->logBytesPerSector));
2244
2245 /*
2246 * XXX: drop lock here since this can block at
2247 * least with backing SCSI devices. Retake it
2248 * to minimize fuss with calling interfaces.
2249 */
2250
2251 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2252 bdev_strategy(bp);
2253 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2254 break;
2255
2256 default:
2257 panic("bad req->type in rf_DispatchKernelIO");
2258 }
2259 db1_printf(("Exiting from DispatchKernelIO\n"));
2260
2261 return (0);
2262 }
2263 /* this is the callback function associated with a I/O invoked from
2264 kernel code.
2265 */
2266 static void
2267 KernelWakeupFunc(struct buf *bp)
2268 {
2269 RF_DiskQueueData_t *req = NULL;
2270 RF_DiskQueue_t *queue;
2271
2272 db1_printf(("recovering the request queue:\n"));
2273
2274 req = bp->b_private;
2275
2276 queue = (RF_DiskQueue_t *) req->queue;
2277
2278 rf_lock_mutex2(queue->raidPtr->iodone_lock);
2279
2280 #if RF_ACC_TRACE > 0
2281 if (req->tracerec) {
2282 RF_ETIMER_STOP(req->tracerec->timer);
2283 RF_ETIMER_EVAL(req->tracerec->timer);
2284 rf_lock_mutex2(rf_tracing_mutex);
2285 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2286 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2287 req->tracerec->num_phys_ios++;
2288 rf_unlock_mutex2(rf_tracing_mutex);
2289 }
2290 #endif
2291
2292 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2293 * ballistic, and mark the component as hosed... */
2294
2295 if (bp->b_error != 0) {
2296 /* Mark the disk as dead */
2297 /* but only mark it once... */
2298 /* and only if it wouldn't leave this RAID set
2299 completely broken */
2300 if (((queue->raidPtr->Disks[queue->col].status ==
2301 rf_ds_optimal) ||
2302 (queue->raidPtr->Disks[queue->col].status ==
2303 rf_ds_used_spare)) &&
2304 (queue->raidPtr->numFailures <
2305 queue->raidPtr->Layout.map->faultsTolerated)) {
2306 printf("raid%d: IO Error. Marking %s as failed.\n",
2307 queue->raidPtr->raidid,
2308 queue->raidPtr->Disks[queue->col].devname);
2309 queue->raidPtr->Disks[queue->col].status =
2310 rf_ds_failed;
2311 queue->raidPtr->status = rf_rs_degraded;
2312 queue->raidPtr->numFailures++;
2313 queue->raidPtr->numNewFailures++;
2314 } else { /* Disk is already dead... */
2315 /* printf("Disk already marked as dead!\n"); */
2316 }
2317
2318 }
2319
2320 /* Fill in the error value */
2321 req->error = bp->b_error;
2322
2323 /* Drop this one on the "finished" queue... */
2324 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2325
2326 /* Let the raidio thread know there is work to be done. */
2327 rf_signal_cond2(queue->raidPtr->iodone_cv);
2328
2329 rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2330 }
2331
2332
2333 /*
2334 * initialize a buf structure for doing an I/O in the kernel.
2335 */
2336 static void
2337 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2338 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2339 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2340 struct proc *b_proc)
2341 {
2342 /* bp->b_flags = B_PHYS | rw_flag; */
2343 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */
2344 bp->b_oflags = 0;
2345 bp->b_cflags = 0;
2346 bp->b_bcount = numSect << logBytesPerSector;
2347 bp->b_bufsize = bp->b_bcount;
2348 bp->b_error = 0;
2349 bp->b_dev = dev;
2350 bp->b_data = bf;
2351 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2352 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2353 if (bp->b_bcount == 0) {
2354 panic("bp->b_bcount is zero in InitBP!!");
2355 }
2356 bp->b_proc = b_proc;
2357 bp->b_iodone = cbFunc;
2358 bp->b_private = cbArg;
2359 }
2360
2361 static void
2362 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2363 struct disklabel *lp)
2364 {
2365 memset(lp, 0, sizeof(*lp));
2366
2367 /* fabricate a label... */
2368 if (raidPtr->totalSectors > UINT32_MAX)
2369 lp->d_secperunit = UINT32_MAX;
2370 else
2371 lp->d_secperunit = raidPtr->totalSectors;
2372 lp->d_secsize = raidPtr->bytesPerSector;
2373 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2374 lp->d_ntracks = 4 * raidPtr->numCol;
2375 lp->d_ncylinders = raidPtr->totalSectors /
2376 (lp->d_nsectors * lp->d_ntracks);
2377 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2378
2379 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2380 lp->d_type = DTYPE_RAID;
2381 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2382 lp->d_rpm = 3600;
2383 lp->d_interleave = 1;
2384 lp->d_flags = 0;
2385
2386 lp->d_partitions[RAW_PART].p_offset = 0;
2387 lp->d_partitions[RAW_PART].p_size = lp->d_secperunit;
2388 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2389 lp->d_npartitions = RAW_PART + 1;
2390
2391 lp->d_magic = DISKMAGIC;
2392 lp->d_magic2 = DISKMAGIC;
2393 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2394
2395 }
2396 /*
2397 * Read the disklabel from the raid device. If one is not present, fake one
2398 * up.
2399 */
2400 static void
2401 raidgetdisklabel(dev_t dev)
2402 {
2403 int unit = raidunit(dev);
2404 struct raid_softc *rs;
2405 const char *errstring;
2406 struct disklabel *lp;
2407 struct cpu_disklabel *clp;
2408 RF_Raid_t *raidPtr;
2409
2410 if ((rs = raidget(unit)) == NULL)
2411 return;
2412
2413 lp = rs->sc_dkdev.dk_label;
2414 clp = rs->sc_dkdev.dk_cpulabel;
2415
2416 db1_printf(("Getting the disklabel...\n"));
2417
2418 memset(clp, 0, sizeof(*clp));
2419
2420 raidPtr = &rs->sc_r;
2421
2422 raidgetdefaultlabel(raidPtr, rs, lp);
2423
2424 /*
2425 * Call the generic disklabel extraction routine.
2426 */
2427 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2428 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2429 if (errstring)
2430 raidmakedisklabel(rs);
2431 else {
2432 int i;
2433 struct partition *pp;
2434
2435 /*
2436 * Sanity check whether the found disklabel is valid.
2437 *
2438 * This is necessary since total size of the raid device
2439 * may vary when an interleave is changed even though exactly
2440 * same components are used, and old disklabel may used
2441 * if that is found.
2442 */
2443 if (lp->d_secperunit != rs->sc_size)
2444 printf("raid%d: WARNING: %s: "
2445 "total sector size in disklabel (%" PRIu32 ") != "
2446 "the size of raid (%" PRIu64 ")\n", unit, rs->sc_xname,
2447 lp->d_secperunit, rs->sc_size);
2448 for (i = 0; i < lp->d_npartitions; i++) {
2449 pp = &lp->d_partitions[i];
2450 if (pp->p_offset + pp->p_size > rs->sc_size)
2451 printf("raid%d: WARNING: %s: end of partition `%c' "
2452 "exceeds the size of raid (%" PRIu64 ")\n",
2453 unit, rs->sc_xname, 'a' + i, rs->sc_size);
2454 }
2455 }
2456
2457 }
2458 /*
2459 * Take care of things one might want to take care of in the event
2460 * that a disklabel isn't present.
2461 */
2462 static void
2463 raidmakedisklabel(struct raid_softc *rs)
2464 {
2465 struct disklabel *lp = rs->sc_dkdev.dk_label;
2466 db1_printf(("Making a label..\n"));
2467
2468 /*
2469 * For historical reasons, if there's no disklabel present
2470 * the raw partition must be marked FS_BSDFFS.
2471 */
2472
2473 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2474
2475 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2476
2477 lp->d_checksum = dkcksum(lp);
2478 }
2479 /*
2480 * Wait interruptibly for an exclusive lock.
2481 *
2482 * XXX
2483 * Several drivers do this; it should be abstracted and made MP-safe.
2484 * (Hmm... where have we seen this warning before :-> GO )
2485 */
2486 static int
2487 raidlock(struct raid_softc *rs)
2488 {
2489 int error;
2490
2491 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2492 rs->sc_flags |= RAIDF_WANTED;
2493 if ((error =
2494 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2495 return (error);
2496 }
2497 rs->sc_flags |= RAIDF_LOCKED;
2498 return (0);
2499 }
2500 /*
2501 * Unlock and wake up any waiters.
2502 */
2503 static void
2504 raidunlock(struct raid_softc *rs)
2505 {
2506
2507 rs->sc_flags &= ~RAIDF_LOCKED;
2508 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2509 rs->sc_flags &= ~RAIDF_WANTED;
2510 wakeup(rs);
2511 }
2512 }
2513
2514
2515 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2516 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2517 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2518
2519 static daddr_t
2520 rf_component_info_offset(void)
2521 {
2522
2523 return RF_COMPONENT_INFO_OFFSET;
2524 }
2525
2526 static daddr_t
2527 rf_component_info_size(unsigned secsize)
2528 {
2529 daddr_t info_size;
2530
2531 KASSERT(secsize);
2532 if (secsize > RF_COMPONENT_INFO_SIZE)
2533 info_size = secsize;
2534 else
2535 info_size = RF_COMPONENT_INFO_SIZE;
2536
2537 return info_size;
2538 }
2539
2540 static daddr_t
2541 rf_parity_map_offset(RF_Raid_t *raidPtr)
2542 {
2543 daddr_t map_offset;
2544
2545 KASSERT(raidPtr->bytesPerSector);
2546 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2547 map_offset = raidPtr->bytesPerSector;
2548 else
2549 map_offset = RF_COMPONENT_INFO_SIZE;
2550 map_offset += rf_component_info_offset();
2551
2552 return map_offset;
2553 }
2554
2555 static daddr_t
2556 rf_parity_map_size(RF_Raid_t *raidPtr)
2557 {
2558 daddr_t map_size;
2559
2560 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2561 map_size = raidPtr->bytesPerSector;
2562 else
2563 map_size = RF_PARITY_MAP_SIZE;
2564
2565 return map_size;
2566 }
2567
2568 int
2569 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2570 {
2571 RF_ComponentLabel_t *clabel;
2572
2573 clabel = raidget_component_label(raidPtr, col);
2574 clabel->clean = RF_RAID_CLEAN;
2575 raidflush_component_label(raidPtr, col);
2576 return(0);
2577 }
2578
2579
2580 int
2581 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2582 {
2583 RF_ComponentLabel_t *clabel;
2584
2585 clabel = raidget_component_label(raidPtr, col);
2586 clabel->clean = RF_RAID_DIRTY;
2587 raidflush_component_label(raidPtr, col);
2588 return(0);
2589 }
2590
2591 int
2592 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2593 {
2594 KASSERT(raidPtr->bytesPerSector);
2595 return raidread_component_label(raidPtr->bytesPerSector,
2596 raidPtr->Disks[col].dev,
2597 raidPtr->raid_cinfo[col].ci_vp,
2598 &raidPtr->raid_cinfo[col].ci_label);
2599 }
2600
2601 RF_ComponentLabel_t *
2602 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2603 {
2604 return &raidPtr->raid_cinfo[col].ci_label;
2605 }
2606
2607 int
2608 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2609 {
2610 RF_ComponentLabel_t *label;
2611
2612 label = &raidPtr->raid_cinfo[col].ci_label;
2613 label->mod_counter = raidPtr->mod_counter;
2614 #ifndef RF_NO_PARITY_MAP
2615 label->parity_map_modcount = label->mod_counter;
2616 #endif
2617 return raidwrite_component_label(raidPtr->bytesPerSector,
2618 raidPtr->Disks[col].dev,
2619 raidPtr->raid_cinfo[col].ci_vp, label);
2620 }
2621
2622
2623 static int
2624 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2625 RF_ComponentLabel_t *clabel)
2626 {
2627 return raidread_component_area(dev, b_vp, clabel,
2628 sizeof(RF_ComponentLabel_t),
2629 rf_component_info_offset(),
2630 rf_component_info_size(secsize));
2631 }
2632
2633 /* ARGSUSED */
2634 static int
2635 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2636 size_t msize, daddr_t offset, daddr_t dsize)
2637 {
2638 struct buf *bp;
2639 const struct bdevsw *bdev;
2640 int error;
2641
2642 /* XXX should probably ensure that we don't try to do this if
2643 someone has changed rf_protected_sectors. */
2644
2645 if (b_vp == NULL) {
2646 /* For whatever reason, this component is not valid.
2647 Don't try to read a component label from it. */
2648 return(EINVAL);
2649 }
2650
2651 /* get a block of the appropriate size... */
2652 bp = geteblk((int)dsize);
2653 bp->b_dev = dev;
2654
2655 /* get our ducks in a row for the read */
2656 bp->b_blkno = offset / DEV_BSIZE;
2657 bp->b_bcount = dsize;
2658 bp->b_flags |= B_READ;
2659 bp->b_resid = dsize;
2660
2661 bdev = bdevsw_lookup(bp->b_dev);
2662 if (bdev == NULL)
2663 return (ENXIO);
2664 (*bdev->d_strategy)(bp);
2665
2666 error = biowait(bp);
2667
2668 if (!error) {
2669 memcpy(data, bp->b_data, msize);
2670 }
2671
2672 brelse(bp, 0);
2673 return(error);
2674 }
2675
2676
2677 static int
2678 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2679 RF_ComponentLabel_t *clabel)
2680 {
2681 return raidwrite_component_area(dev, b_vp, clabel,
2682 sizeof(RF_ComponentLabel_t),
2683 rf_component_info_offset(),
2684 rf_component_info_size(secsize), 0);
2685 }
2686
2687 /* ARGSUSED */
2688 static int
2689 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2690 size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2691 {
2692 struct buf *bp;
2693 const struct bdevsw *bdev;
2694 int error;
2695
2696 /* get a block of the appropriate size... */
2697 bp = geteblk((int)dsize);
2698 bp->b_dev = dev;
2699
2700 /* get our ducks in a row for the write */
2701 bp->b_blkno = offset / DEV_BSIZE;
2702 bp->b_bcount = dsize;
2703 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2704 bp->b_resid = dsize;
2705
2706 memset(bp->b_data, 0, dsize);
2707 memcpy(bp->b_data, data, msize);
2708
2709 bdev = bdevsw_lookup(bp->b_dev);
2710 if (bdev == NULL)
2711 return (ENXIO);
2712 (*bdev->d_strategy)(bp);
2713 if (asyncp)
2714 return 0;
2715 error = biowait(bp);
2716 brelse(bp, 0);
2717 if (error) {
2718 #if 1
2719 printf("Failed to write RAID component info!\n");
2720 #endif
2721 }
2722
2723 return(error);
2724 }
2725
2726 void
2727 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2728 {
2729 int c;
2730
2731 for (c = 0; c < raidPtr->numCol; c++) {
2732 /* Skip dead disks. */
2733 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2734 continue;
2735 /* XXXjld: what if an error occurs here? */
2736 raidwrite_component_area(raidPtr->Disks[c].dev,
2737 raidPtr->raid_cinfo[c].ci_vp, map,
2738 RF_PARITYMAP_NBYTE,
2739 rf_parity_map_offset(raidPtr),
2740 rf_parity_map_size(raidPtr), 0);
2741 }
2742 }
2743
2744 void
2745 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2746 {
2747 struct rf_paritymap_ondisk tmp;
2748 int c,first;
2749
2750 first=1;
2751 for (c = 0; c < raidPtr->numCol; c++) {
2752 /* Skip dead disks. */
2753 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2754 continue;
2755 raidread_component_area(raidPtr->Disks[c].dev,
2756 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2757 RF_PARITYMAP_NBYTE,
2758 rf_parity_map_offset(raidPtr),
2759 rf_parity_map_size(raidPtr));
2760 if (first) {
2761 memcpy(map, &tmp, sizeof(*map));
2762 first = 0;
2763 } else {
2764 rf_paritymap_merge(map, &tmp);
2765 }
2766 }
2767 }
2768
2769 void
2770 rf_markalldirty(RF_Raid_t *raidPtr)
2771 {
2772 RF_ComponentLabel_t *clabel;
2773 int sparecol;
2774 int c;
2775 int j;
2776 int scol = -1;
2777
2778 raidPtr->mod_counter++;
2779 for (c = 0; c < raidPtr->numCol; c++) {
2780 /* we don't want to touch (at all) a disk that has
2781 failed */
2782 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2783 clabel = raidget_component_label(raidPtr, c);
2784 if (clabel->status == rf_ds_spared) {
2785 /* XXX do something special...
2786 but whatever you do, don't
2787 try to access it!! */
2788 } else {
2789 raidmarkdirty(raidPtr, c);
2790 }
2791 }
2792 }
2793
2794 for( c = 0; c < raidPtr->numSpare ; c++) {
2795 sparecol = raidPtr->numCol + c;
2796 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2797 /*
2798
2799 we claim this disk is "optimal" if it's
2800 rf_ds_used_spare, as that means it should be
2801 directly substitutable for the disk it replaced.
2802 We note that too...
2803
2804 */
2805
2806 for(j=0;j<raidPtr->numCol;j++) {
2807 if (raidPtr->Disks[j].spareCol == sparecol) {
2808 scol = j;
2809 break;
2810 }
2811 }
2812
2813 clabel = raidget_component_label(raidPtr, sparecol);
2814 /* make sure status is noted */
2815
2816 raid_init_component_label(raidPtr, clabel);
2817
2818 clabel->row = 0;
2819 clabel->column = scol;
2820 /* Note: we *don't* change status from rf_ds_used_spare
2821 to rf_ds_optimal */
2822 /* clabel.status = rf_ds_optimal; */
2823
2824 raidmarkdirty(raidPtr, sparecol);
2825 }
2826 }
2827 }
2828
2829
2830 void
2831 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2832 {
2833 RF_ComponentLabel_t *clabel;
2834 int sparecol;
2835 int c;
2836 int j;
2837 int scol;
2838
2839 scol = -1;
2840
2841 /* XXX should do extra checks to make sure things really are clean,
2842 rather than blindly setting the clean bit... */
2843
2844 raidPtr->mod_counter++;
2845
2846 for (c = 0; c < raidPtr->numCol; c++) {
2847 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2848 clabel = raidget_component_label(raidPtr, c);
2849 /* make sure status is noted */
2850 clabel->status = rf_ds_optimal;
2851
2852 /* note what unit we are configured as */
2853 clabel->last_unit = raidPtr->raidid;
2854
2855 raidflush_component_label(raidPtr, c);
2856 if (final == RF_FINAL_COMPONENT_UPDATE) {
2857 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2858 raidmarkclean(raidPtr, c);
2859 }
2860 }
2861 }
2862 /* else we don't touch it.. */
2863 }
2864
2865 for( c = 0; c < raidPtr->numSpare ; c++) {
2866 sparecol = raidPtr->numCol + c;
2867 /* Need to ensure that the reconstruct actually completed! */
2868 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2869 /*
2870
2871 we claim this disk is "optimal" if it's
2872 rf_ds_used_spare, as that means it should be
2873 directly substitutable for the disk it replaced.
2874 We note that too...
2875
2876 */
2877
2878 for(j=0;j<raidPtr->numCol;j++) {
2879 if (raidPtr->Disks[j].spareCol == sparecol) {
2880 scol = j;
2881 break;
2882 }
2883 }
2884
2885 /* XXX shouldn't *really* need this... */
2886 clabel = raidget_component_label(raidPtr, sparecol);
2887 /* make sure status is noted */
2888
2889 raid_init_component_label(raidPtr, clabel);
2890
2891 clabel->column = scol;
2892 clabel->status = rf_ds_optimal;
2893 clabel->last_unit = raidPtr->raidid;
2894
2895 raidflush_component_label(raidPtr, sparecol);
2896 if (final == RF_FINAL_COMPONENT_UPDATE) {
2897 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2898 raidmarkclean(raidPtr, sparecol);
2899 }
2900 }
2901 }
2902 }
2903 }
2904
2905 void
2906 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2907 {
2908
2909 if (vp != NULL) {
2910 if (auto_configured == 1) {
2911 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2912 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2913 vput(vp);
2914
2915 } else {
2916 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2917 }
2918 }
2919 }
2920
2921
2922 void
2923 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2924 {
2925 int r,c;
2926 struct vnode *vp;
2927 int acd;
2928
2929
2930 /* We take this opportunity to close the vnodes like we should.. */
2931
2932 for (c = 0; c < raidPtr->numCol; c++) {
2933 vp = raidPtr->raid_cinfo[c].ci_vp;
2934 acd = raidPtr->Disks[c].auto_configured;
2935 rf_close_component(raidPtr, vp, acd);
2936 raidPtr->raid_cinfo[c].ci_vp = NULL;
2937 raidPtr->Disks[c].auto_configured = 0;
2938 }
2939
2940 for (r = 0; r < raidPtr->numSpare; r++) {
2941 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2942 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2943 rf_close_component(raidPtr, vp, acd);
2944 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2945 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2946 }
2947 }
2948
2949
2950 void
2951 rf_ReconThread(struct rf_recon_req *req)
2952 {
2953 int s;
2954 RF_Raid_t *raidPtr;
2955
2956 s = splbio();
2957 raidPtr = (RF_Raid_t *) req->raidPtr;
2958 raidPtr->recon_in_progress = 1;
2959
2960 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2961 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2962
2963 RF_Free(req, sizeof(*req));
2964
2965 raidPtr->recon_in_progress = 0;
2966 splx(s);
2967
2968 /* That's all... */
2969 kthread_exit(0); /* does not return */
2970 }
2971
2972 void
2973 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2974 {
2975 int retcode;
2976 int s;
2977
2978 raidPtr->parity_rewrite_stripes_done = 0;
2979 raidPtr->parity_rewrite_in_progress = 1;
2980 s = splbio();
2981 retcode = rf_RewriteParity(raidPtr);
2982 splx(s);
2983 if (retcode) {
2984 printf("raid%d: Error re-writing parity (%d)!\n",
2985 raidPtr->raidid, retcode);
2986 } else {
2987 /* set the clean bit! If we shutdown correctly,
2988 the clean bit on each component label will get
2989 set */
2990 raidPtr->parity_good = RF_RAID_CLEAN;
2991 }
2992 raidPtr->parity_rewrite_in_progress = 0;
2993
2994 /* Anyone waiting for us to stop? If so, inform them... */
2995 if (raidPtr->waitShutdown) {
2996 wakeup(&raidPtr->parity_rewrite_in_progress);
2997 }
2998
2999 /* That's all... */
3000 kthread_exit(0); /* does not return */
3001 }
3002
3003
3004 void
3005 rf_CopybackThread(RF_Raid_t *raidPtr)
3006 {
3007 int s;
3008
3009 raidPtr->copyback_in_progress = 1;
3010 s = splbio();
3011 rf_CopybackReconstructedData(raidPtr);
3012 splx(s);
3013 raidPtr->copyback_in_progress = 0;
3014
3015 /* That's all... */
3016 kthread_exit(0); /* does not return */
3017 }
3018
3019
3020 void
3021 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
3022 {
3023 int s;
3024 RF_Raid_t *raidPtr;
3025
3026 s = splbio();
3027 raidPtr = req->raidPtr;
3028 raidPtr->recon_in_progress = 1;
3029 rf_ReconstructInPlace(raidPtr, req->col);
3030 RF_Free(req, sizeof(*req));
3031 raidPtr->recon_in_progress = 0;
3032 splx(s);
3033
3034 /* That's all... */
3035 kthread_exit(0); /* does not return */
3036 }
3037
3038 static RF_AutoConfig_t *
3039 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
3040 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
3041 unsigned secsize)
3042 {
3043 int good_one = 0;
3044 RF_ComponentLabel_t *clabel;
3045 RF_AutoConfig_t *ac;
3046
3047 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
3048 if (clabel == NULL) {
3049 oomem:
3050 while(ac_list) {
3051 ac = ac_list;
3052 if (ac->clabel)
3053 free(ac->clabel, M_RAIDFRAME);
3054 ac_list = ac_list->next;
3055 free(ac, M_RAIDFRAME);
3056 }
3057 printf("RAID auto config: out of memory!\n");
3058 return NULL; /* XXX probably should panic? */
3059 }
3060
3061 if (!raidread_component_label(secsize, dev, vp, clabel)) {
3062 /* Got the label. Does it look reasonable? */
3063 if (rf_reasonable_label(clabel, numsecs) &&
3064 (rf_component_label_partitionsize(clabel) <= size)) {
3065 #ifdef DEBUG
3066 printf("Component on: %s: %llu\n",
3067 cname, (unsigned long long)size);
3068 rf_print_component_label(clabel);
3069 #endif
3070 /* if it's reasonable, add it, else ignore it. */
3071 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
3072 M_NOWAIT);
3073 if (ac == NULL) {
3074 free(clabel, M_RAIDFRAME);
3075 goto oomem;
3076 }
3077 strlcpy(ac->devname, cname, sizeof(ac->devname));
3078 ac->dev = dev;
3079 ac->vp = vp;
3080 ac->clabel = clabel;
3081 ac->next = ac_list;
3082 ac_list = ac;
3083 good_one = 1;
3084 }
3085 }
3086 if (!good_one) {
3087 /* cleanup */
3088 free(clabel, M_RAIDFRAME);
3089 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3090 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3091 vput(vp);
3092 }
3093 return ac_list;
3094 }
3095
3096 RF_AutoConfig_t *
3097 rf_find_raid_components(void)
3098 {
3099 struct vnode *vp;
3100 struct disklabel label;
3101 device_t dv;
3102 deviter_t di;
3103 dev_t dev;
3104 int bmajor, bminor, wedge, rf_part_found;
3105 int error;
3106 int i;
3107 RF_AutoConfig_t *ac_list;
3108 uint64_t numsecs;
3109 unsigned secsize;
3110
3111 /* initialize the AutoConfig list */
3112 ac_list = NULL;
3113
3114 /* we begin by trolling through *all* the devices on the system */
3115
3116 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
3117 dv = deviter_next(&di)) {
3118
3119 /* we are only interested in disks... */
3120 if (device_class(dv) != DV_DISK)
3121 continue;
3122
3123 /* we don't care about floppies... */
3124 if (device_is_a(dv, "fd")) {
3125 continue;
3126 }
3127
3128 /* we don't care about CD's... */
3129 if (device_is_a(dv, "cd")) {
3130 continue;
3131 }
3132
3133 /* we don't care about md's... */
3134 if (device_is_a(dv, "md")) {
3135 continue;
3136 }
3137
3138 /* hdfd is the Atari/Hades floppy driver */
3139 if (device_is_a(dv, "hdfd")) {
3140 continue;
3141 }
3142
3143 /* fdisa is the Atari/Milan floppy driver */
3144 if (device_is_a(dv, "fdisa")) {
3145 continue;
3146 }
3147
3148 /* need to find the device_name_to_block_device_major stuff */
3149 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
3150
3151 rf_part_found = 0; /*No raid partition as yet*/
3152
3153 /* get a vnode for the raw partition of this disk */
3154
3155 wedge = device_is_a(dv, "dk");
3156 bminor = minor(device_unit(dv));
3157 dev = wedge ? makedev(bmajor, bminor) :
3158 MAKEDISKDEV(bmajor, bminor, RAW_PART);
3159 if (bdevvp(dev, &vp))
3160 panic("RAID can't alloc vnode");
3161
3162 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
3163
3164 if (error) {
3165 /* "Who cares." Continue looking
3166 for something that exists*/
3167 vput(vp);
3168 continue;
3169 }
3170
3171 error = getdisksize(vp, &numsecs, &secsize);
3172 if (error) {
3173 vput(vp);
3174 continue;
3175 }
3176 if (wedge) {
3177 struct dkwedge_info dkw;
3178 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
3179 NOCRED);
3180 if (error) {
3181 printf("RAIDframe: can't get wedge info for "
3182 "dev %s (%d)\n", device_xname(dv), error);
3183 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3184 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3185 vput(vp);
3186 continue;
3187 }
3188
3189 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
3190 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3191 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3192 vput(vp);
3193 continue;
3194 }
3195
3196 ac_list = rf_get_component(ac_list, dev, vp,
3197 device_xname(dv), dkw.dkw_size, numsecs, secsize);
3198 rf_part_found = 1; /*There is a raid component on this disk*/
3199 continue;
3200 }
3201
3202 /* Ok, the disk exists. Go get the disklabel. */
3203 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
3204 if (error) {
3205 /*
3206 * XXX can't happen - open() would
3207 * have errored out (or faked up one)
3208 */
3209 if (error != ENOTTY)
3210 printf("RAIDframe: can't get label for dev "
3211 "%s (%d)\n", device_xname(dv), error);
3212 }
3213
3214 /* don't need this any more. We'll allocate it again
3215 a little later if we really do... */
3216 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3217 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3218 vput(vp);
3219
3220 if (error)
3221 continue;
3222
3223 rf_part_found = 0; /*No raid partitions yet*/
3224 for (i = 0; i < label.d_npartitions; i++) {
3225 char cname[sizeof(ac_list->devname)];
3226
3227 /* We only support partitions marked as RAID */
3228 if (label.d_partitions[i].p_fstype != FS_RAID)
3229 continue;
3230
3231 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
3232 if (bdevvp(dev, &vp))
3233 panic("RAID can't alloc vnode");
3234
3235 error = VOP_OPEN(vp, FREAD, NOCRED);
3236 if (error) {
3237 /* Whatever... */
3238 vput(vp);
3239 continue;
3240 }
3241 snprintf(cname, sizeof(cname), "%s%c",
3242 device_xname(dv), 'a' + i);
3243 ac_list = rf_get_component(ac_list, dev, vp, cname,
3244 label.d_partitions[i].p_size, numsecs, secsize);
3245 rf_part_found = 1; /*There is at least one raid partition on this disk*/
3246 }
3247
3248 /*
3249 *If there is no raid component on this disk, either in a
3250 *disklabel or inside a wedge, check the raw partition as well,
3251 *as it is possible to configure raid components on raw disk
3252 *devices.
3253 */
3254
3255 if (!rf_part_found) {
3256 char cname[sizeof(ac_list->devname)];
3257
3258 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
3259 if (bdevvp(dev, &vp))
3260 panic("RAID can't alloc vnode");
3261
3262 error = VOP_OPEN(vp, FREAD, NOCRED);
3263 if (error) {
3264 /* Whatever... */
3265 vput(vp);
3266 continue;
3267 }
3268 snprintf(cname, sizeof(cname), "%s%c",
3269 device_xname(dv), 'a' + RAW_PART);
3270 ac_list = rf_get_component(ac_list, dev, vp, cname,
3271 label.d_partitions[RAW_PART].p_size, numsecs, secsize);
3272 }
3273 }
3274 deviter_release(&di);
3275 return ac_list;
3276 }
3277
3278
3279 int
3280 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3281 {
3282
3283 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
3284 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
3285 ((clabel->clean == RF_RAID_CLEAN) ||
3286 (clabel->clean == RF_RAID_DIRTY)) &&
3287 clabel->row >=0 &&
3288 clabel->column >= 0 &&
3289 clabel->num_rows > 0 &&
3290 clabel->num_columns > 0 &&
3291 clabel->row < clabel->num_rows &&
3292 clabel->column < clabel->num_columns &&
3293 clabel->blockSize > 0 &&
3294 /*
3295 * numBlocksHi may contain garbage, but it is ok since
3296 * the type is unsigned. If it is really garbage,
3297 * rf_fix_old_label_size() will fix it.
3298 */
3299 rf_component_label_numblocks(clabel) > 0) {
3300 /*
3301 * label looks reasonable enough...
3302 * let's make sure it has no old garbage.
3303 */
3304 if (numsecs)
3305 rf_fix_old_label_size(clabel, numsecs);
3306 return(1);
3307 }
3308 return(0);
3309 }
3310
3311
3312 /*
3313 * For reasons yet unknown, some old component labels have garbage in
3314 * the newer numBlocksHi region, and this causes lossage. Since those
3315 * disks will also have numsecs set to less than 32 bits of sectors,
3316 * we can determine when this corruption has occurred, and fix it.
3317 *
3318 * The exact same problem, with the same unknown reason, happens to
3319 * the partitionSizeHi member as well.
3320 */
3321 static void
3322 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3323 {
3324
3325 if (numsecs < ((uint64_t)1 << 32)) {
3326 if (clabel->numBlocksHi) {
3327 printf("WARNING: total sectors < 32 bits, yet "
3328 "numBlocksHi set\n"
3329 "WARNING: resetting numBlocksHi to zero.\n");
3330 clabel->numBlocksHi = 0;
3331 }
3332
3333 if (clabel->partitionSizeHi) {
3334 printf("WARNING: total sectors < 32 bits, yet "
3335 "partitionSizeHi set\n"
3336 "WARNING: resetting partitionSizeHi to zero.\n");
3337 clabel->partitionSizeHi = 0;
3338 }
3339 }
3340 }
3341
3342
3343 #ifdef DEBUG
3344 void
3345 rf_print_component_label(RF_ComponentLabel_t *clabel)
3346 {
3347 uint64_t numBlocks;
3348 static const char *rp[] = {
3349 "No", "Force", "Soft", "*invalid*"
3350 };
3351
3352
3353 numBlocks = rf_component_label_numblocks(clabel);
3354
3355 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3356 clabel->row, clabel->column,
3357 clabel->num_rows, clabel->num_columns);
3358 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3359 clabel->version, clabel->serial_number,
3360 clabel->mod_counter);
3361 printf(" Clean: %s Status: %d\n",
3362 clabel->clean ? "Yes" : "No", clabel->status);
3363 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3364 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3365 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3366 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3367 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3368 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]);
3369 printf(" Last configured as: raid%d\n", clabel->last_unit);
3370 #if 0
3371 printf(" Config order: %d\n", clabel->config_order);
3372 #endif
3373
3374 }
3375 #endif
3376
3377 RF_ConfigSet_t *
3378 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3379 {
3380 RF_AutoConfig_t *ac;
3381 RF_ConfigSet_t *config_sets;
3382 RF_ConfigSet_t *cset;
3383 RF_AutoConfig_t *ac_next;
3384
3385
3386 config_sets = NULL;
3387
3388 /* Go through the AutoConfig list, and figure out which components
3389 belong to what sets. */
3390 ac = ac_list;
3391 while(ac!=NULL) {
3392 /* we're going to putz with ac->next, so save it here
3393 for use at the end of the loop */
3394 ac_next = ac->next;
3395
3396 if (config_sets == NULL) {
3397 /* will need at least this one... */
3398 config_sets = (RF_ConfigSet_t *)
3399 malloc(sizeof(RF_ConfigSet_t),
3400 M_RAIDFRAME, M_NOWAIT);
3401 if (config_sets == NULL) {
3402 panic("rf_create_auto_sets: No memory!");
3403 }
3404 /* this one is easy :) */
3405 config_sets->ac = ac;
3406 config_sets->next = NULL;
3407 config_sets->rootable = 0;
3408 ac->next = NULL;
3409 } else {
3410 /* which set does this component fit into? */
3411 cset = config_sets;
3412 while(cset!=NULL) {
3413 if (rf_does_it_fit(cset, ac)) {
3414 /* looks like it matches... */
3415 ac->next = cset->ac;
3416 cset->ac = ac;
3417 break;
3418 }
3419 cset = cset->next;
3420 }
3421 if (cset==NULL) {
3422 /* didn't find a match above... new set..*/
3423 cset = (RF_ConfigSet_t *)
3424 malloc(sizeof(RF_ConfigSet_t),
3425 M_RAIDFRAME, M_NOWAIT);
3426 if (cset == NULL) {
3427 panic("rf_create_auto_sets: No memory!");
3428 }
3429 cset->ac = ac;
3430 ac->next = NULL;
3431 cset->next = config_sets;
3432 cset->rootable = 0;
3433 config_sets = cset;
3434 }
3435 }
3436 ac = ac_next;
3437 }
3438
3439
3440 return(config_sets);
3441 }
3442
3443 static int
3444 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3445 {
3446 RF_ComponentLabel_t *clabel1, *clabel2;
3447
3448 /* If this one matches the *first* one in the set, that's good
3449 enough, since the other members of the set would have been
3450 through here too... */
3451 /* note that we are not checking partitionSize here..
3452
3453 Note that we are also not checking the mod_counters here.
3454 If everything else matches except the mod_counter, that's
3455 good enough for this test. We will deal with the mod_counters
3456 a little later in the autoconfiguration process.
3457
3458 (clabel1->mod_counter == clabel2->mod_counter) &&
3459
3460 The reason we don't check for this is that failed disks
3461 will have lower modification counts. If those disks are
3462 not added to the set they used to belong to, then they will
3463 form their own set, which may result in 2 different sets,
3464 for example, competing to be configured at raid0, and
3465 perhaps competing to be the root filesystem set. If the
3466 wrong ones get configured, or both attempt to become /,
3467 weird behaviour and or serious lossage will occur. Thus we
3468 need to bring them into the fold here, and kick them out at
3469 a later point.
3470
3471 */
3472
3473 clabel1 = cset->ac->clabel;
3474 clabel2 = ac->clabel;
3475 if ((clabel1->version == clabel2->version) &&
3476 (clabel1->serial_number == clabel2->serial_number) &&
3477 (clabel1->num_rows == clabel2->num_rows) &&
3478 (clabel1->num_columns == clabel2->num_columns) &&
3479 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3480 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3481 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3482 (clabel1->parityConfig == clabel2->parityConfig) &&
3483 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3484 (clabel1->blockSize == clabel2->blockSize) &&
3485 rf_component_label_numblocks(clabel1) ==
3486 rf_component_label_numblocks(clabel2) &&
3487 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3488 (clabel1->root_partition == clabel2->root_partition) &&
3489 (clabel1->last_unit == clabel2->last_unit) &&
3490 (clabel1->config_order == clabel2->config_order)) {
3491 /* if it get's here, it almost *has* to be a match */
3492 } else {
3493 /* it's not consistent with somebody in the set..
3494 punt */
3495 return(0);
3496 }
3497 /* all was fine.. it must fit... */
3498 return(1);
3499 }
3500
3501 int
3502 rf_have_enough_components(RF_ConfigSet_t *cset)
3503 {
3504 RF_AutoConfig_t *ac;
3505 RF_AutoConfig_t *auto_config;
3506 RF_ComponentLabel_t *clabel;
3507 int c;
3508 int num_cols;
3509 int num_missing;
3510 int mod_counter;
3511 int mod_counter_found;
3512 int even_pair_failed;
3513 char parity_type;
3514
3515
3516 /* check to see that we have enough 'live' components
3517 of this set. If so, we can configure it if necessary */
3518
3519 num_cols = cset->ac->clabel->num_columns;
3520 parity_type = cset->ac->clabel->parityConfig;
3521
3522 /* XXX Check for duplicate components!?!?!? */
3523
3524 /* Determine what the mod_counter is supposed to be for this set. */
3525
3526 mod_counter_found = 0;
3527 mod_counter = 0;
3528 ac = cset->ac;
3529 while(ac!=NULL) {
3530 if (mod_counter_found==0) {
3531 mod_counter = ac->clabel->mod_counter;
3532 mod_counter_found = 1;
3533 } else {
3534 if (ac->clabel->mod_counter > mod_counter) {
3535 mod_counter = ac->clabel->mod_counter;
3536 }
3537 }
3538 ac = ac->next;
3539 }
3540
3541 num_missing = 0;
3542 auto_config = cset->ac;
3543
3544 even_pair_failed = 0;
3545 for(c=0; c<num_cols; c++) {
3546 ac = auto_config;
3547 while(ac!=NULL) {
3548 if ((ac->clabel->column == c) &&
3549 (ac->clabel->mod_counter == mod_counter)) {
3550 /* it's this one... */
3551 #ifdef DEBUG
3552 printf("Found: %s at %d\n",
3553 ac->devname,c);
3554 #endif
3555 break;
3556 }
3557 ac=ac->next;
3558 }
3559 if (ac==NULL) {
3560 /* Didn't find one here! */
3561 /* special case for RAID 1, especially
3562 where there are more than 2
3563 components (where RAIDframe treats
3564 things a little differently :( ) */
3565 if (parity_type == '1') {
3566 if (c%2 == 0) { /* even component */
3567 even_pair_failed = 1;
3568 } else { /* odd component. If
3569 we're failed, and
3570 so is the even
3571 component, it's
3572 "Good Night, Charlie" */
3573 if (even_pair_failed == 1) {
3574 return(0);
3575 }
3576 }
3577 } else {
3578 /* normal accounting */
3579 num_missing++;
3580 }
3581 }
3582 if ((parity_type == '1') && (c%2 == 1)) {
3583 /* Just did an even component, and we didn't
3584 bail.. reset the even_pair_failed flag,
3585 and go on to the next component.... */
3586 even_pair_failed = 0;
3587 }
3588 }
3589
3590 clabel = cset->ac->clabel;
3591
3592 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3593 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3594 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3595 /* XXX this needs to be made *much* more general */
3596 /* Too many failures */
3597 return(0);
3598 }
3599 /* otherwise, all is well, and we've got enough to take a kick
3600 at autoconfiguring this set */
3601 return(1);
3602 }
3603
3604 void
3605 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3606 RF_Raid_t *raidPtr)
3607 {
3608 RF_ComponentLabel_t *clabel;
3609 int i;
3610
3611 clabel = ac->clabel;
3612
3613 /* 1. Fill in the common stuff */
3614 config->numRow = clabel->num_rows = 1;
3615 config->numCol = clabel->num_columns;
3616 config->numSpare = 0; /* XXX should this be set here? */
3617 config->sectPerSU = clabel->sectPerSU;
3618 config->SUsPerPU = clabel->SUsPerPU;
3619 config->SUsPerRU = clabel->SUsPerRU;
3620 config->parityConfig = clabel->parityConfig;
3621 /* XXX... */
3622 strcpy(config->diskQueueType,"fifo");
3623 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3624 config->layoutSpecificSize = 0; /* XXX ?? */
3625
3626 while(ac!=NULL) {
3627 /* row/col values will be in range due to the checks
3628 in reasonable_label() */
3629 strcpy(config->devnames[0][ac->clabel->column],
3630 ac->devname);
3631 ac = ac->next;
3632 }
3633
3634 for(i=0;i<RF_MAXDBGV;i++) {
3635 config->debugVars[i][0] = 0;
3636 }
3637 }
3638
3639 int
3640 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3641 {
3642 RF_ComponentLabel_t *clabel;
3643 int column;
3644 int sparecol;
3645
3646 raidPtr->autoconfigure = new_value;
3647
3648 for(column=0; column<raidPtr->numCol; column++) {
3649 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3650 clabel = raidget_component_label(raidPtr, column);
3651 clabel->autoconfigure = new_value;
3652 raidflush_component_label(raidPtr, column);
3653 }
3654 }
3655 for(column = 0; column < raidPtr->numSpare ; column++) {
3656 sparecol = raidPtr->numCol + column;
3657 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3658 clabel = raidget_component_label(raidPtr, sparecol);
3659 clabel->autoconfigure = new_value;
3660 raidflush_component_label(raidPtr, sparecol);
3661 }
3662 }
3663 return(new_value);
3664 }
3665
3666 int
3667 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3668 {
3669 RF_ComponentLabel_t *clabel;
3670 int column;
3671 int sparecol;
3672
3673 raidPtr->root_partition = new_value;
3674 for(column=0; column<raidPtr->numCol; column++) {
3675 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3676 clabel = raidget_component_label(raidPtr, column);
3677 clabel->root_partition = new_value;
3678 raidflush_component_label(raidPtr, column);
3679 }
3680 }
3681 for(column = 0; column < raidPtr->numSpare ; column++) {
3682 sparecol = raidPtr->numCol + column;
3683 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3684 clabel = raidget_component_label(raidPtr, sparecol);
3685 clabel->root_partition = new_value;
3686 raidflush_component_label(raidPtr, sparecol);
3687 }
3688 }
3689 return(new_value);
3690 }
3691
3692 void
3693 rf_release_all_vps(RF_ConfigSet_t *cset)
3694 {
3695 RF_AutoConfig_t *ac;
3696
3697 ac = cset->ac;
3698 while(ac!=NULL) {
3699 /* Close the vp, and give it back */
3700 if (ac->vp) {
3701 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3702 VOP_CLOSE(ac->vp, FREAD, NOCRED);
3703 vput(ac->vp);
3704 ac->vp = NULL;
3705 }
3706 ac = ac->next;
3707 }
3708 }
3709
3710
3711 void
3712 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3713 {
3714 RF_AutoConfig_t *ac;
3715 RF_AutoConfig_t *next_ac;
3716
3717 ac = cset->ac;
3718 while(ac!=NULL) {
3719 next_ac = ac->next;
3720 /* nuke the label */
3721 free(ac->clabel, M_RAIDFRAME);
3722 /* cleanup the config structure */
3723 free(ac, M_RAIDFRAME);
3724 /* "next.." */
3725 ac = next_ac;
3726 }
3727 /* and, finally, nuke the config set */
3728 free(cset, M_RAIDFRAME);
3729 }
3730
3731
3732 void
3733 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3734 {
3735 /* current version number */
3736 clabel->version = RF_COMPONENT_LABEL_VERSION;
3737 clabel->serial_number = raidPtr->serial_number;
3738 clabel->mod_counter = raidPtr->mod_counter;
3739
3740 clabel->num_rows = 1;
3741 clabel->num_columns = raidPtr->numCol;
3742 clabel->clean = RF_RAID_DIRTY; /* not clean */
3743 clabel->status = rf_ds_optimal; /* "It's good!" */
3744
3745 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3746 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3747 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3748
3749 clabel->blockSize = raidPtr->bytesPerSector;
3750 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3751
3752 /* XXX not portable */
3753 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3754 clabel->maxOutstanding = raidPtr->maxOutstanding;
3755 clabel->autoconfigure = raidPtr->autoconfigure;
3756 clabel->root_partition = raidPtr->root_partition;
3757 clabel->last_unit = raidPtr->raidid;
3758 clabel->config_order = raidPtr->config_order;
3759
3760 #ifndef RF_NO_PARITY_MAP
3761 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3762 #endif
3763 }
3764
3765 struct raid_softc *
3766 rf_auto_config_set(RF_ConfigSet_t *cset)
3767 {
3768 RF_Raid_t *raidPtr;
3769 RF_Config_t *config;
3770 int raidID;
3771 struct raid_softc *sc;
3772
3773 #ifdef DEBUG
3774 printf("RAID autoconfigure\n");
3775 #endif
3776
3777 /* 1. Create a config structure */
3778 config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO);
3779 if (config == NULL) {
3780 printf("Out of mem!?!?\n");
3781 /* XXX do something more intelligent here. */
3782 return NULL;
3783 }
3784
3785 /*
3786 2. Figure out what RAID ID this one is supposed to live at
3787 See if we can get the same RAID dev that it was configured
3788 on last time..
3789 */
3790
3791 raidID = cset->ac->clabel->last_unit;
3792 for (sc = raidget(raidID); sc->sc_r.valid != 0; sc = raidget(++raidID))
3793 continue;
3794 #ifdef DEBUG
3795 printf("Configuring raid%d:\n",raidID);
3796 #endif
3797
3798 raidPtr = &sc->sc_r;
3799
3800 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3801 raidPtr->softc = sc;
3802 raidPtr->raidid = raidID;
3803 raidPtr->openings = RAIDOUTSTANDING;
3804
3805 /* 3. Build the configuration structure */
3806 rf_create_configuration(cset->ac, config, raidPtr);
3807
3808 /* 4. Do the configuration */
3809 if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3810 raidinit(sc);
3811
3812 rf_markalldirty(raidPtr);
3813 raidPtr->autoconfigure = 1; /* XXX do this here? */
3814 switch (cset->ac->clabel->root_partition) {
3815 case 1: /* Force Root */
3816 case 2: /* Soft Root: root when boot partition part of raid */
3817 /*
3818 * everything configured just fine. Make a note
3819 * that this set is eligible to be root,
3820 * or forced to be root
3821 */
3822 cset->rootable = cset->ac->clabel->root_partition;
3823 /* XXX do this here? */
3824 raidPtr->root_partition = cset->rootable;
3825 break;
3826 default:
3827 break;
3828 }
3829 } else {
3830 raidput(sc);
3831 sc = NULL;
3832 }
3833
3834 /* 5. Cleanup */
3835 free(config, M_RAIDFRAME);
3836 return sc;
3837 }
3838
3839 void
3840 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3841 {
3842 struct buf *bp;
3843 struct raid_softc *rs;
3844
3845 bp = (struct buf *)desc->bp;
3846 rs = desc->raidPtr->softc;
3847 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid),
3848 (bp->b_flags & B_READ));
3849 }
3850
3851 void
3852 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3853 size_t xmin, size_t xmax)
3854 {
3855 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3856 pool_sethiwat(p, xmax);
3857 pool_prime(p, xmin);
3858 pool_setlowat(p, xmin);
3859 }
3860
3861 /*
3862 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buf_queue to see
3863 * if there is IO pending and if that IO could possibly be done for a
3864 * given RAID set. Returns 0 if IO is waiting and can be done, 1
3865 * otherwise.
3866 *
3867 */
3868
3869 int
3870 rf_buf_queue_check(RF_Raid_t *raidPtr)
3871 {
3872 struct raid_softc *rs = raidPtr->softc;
3873 if ((bufq_peek(rs->buf_queue) != NULL) && raidPtr->openings > 0) {
3874 /* there is work to do */
3875 return 0;
3876 }
3877 /* default is nothing to do */
3878 return 1;
3879 }
3880
3881 int
3882 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3883 {
3884 uint64_t numsecs;
3885 unsigned secsize;
3886 int error;
3887
3888 error = getdisksize(vp, &numsecs, &secsize);
3889 if (error == 0) {
3890 diskPtr->blockSize = secsize;
3891 diskPtr->numBlocks = numsecs - rf_protectedSectors;
3892 diskPtr->partitionSize = numsecs;
3893 return 0;
3894 }
3895 return error;
3896 }
3897
3898 static int
3899 raid_match(device_t self, cfdata_t cfdata, void *aux)
3900 {
3901 return 1;
3902 }
3903
3904 static void
3905 raid_attach(device_t parent, device_t self, void *aux)
3906 {
3907
3908 }
3909
3910
3911 static int
3912 raid_detach(device_t self, int flags)
3913 {
3914 int error;
3915 struct raid_softc *rs = raidget(device_unit(self));
3916
3917 if (rs == NULL)
3918 return ENXIO;
3919
3920 if ((error = raidlock(rs)) != 0)
3921 return (error);
3922
3923 error = raid_detach_unlocked(rs);
3924
3925 raidunlock(rs);
3926
3927 /* XXXkd: raidput(rs) ??? */
3928
3929 return error;
3930 }
3931
3932 static void
3933 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
3934 {
3935 struct disk_geom *dg = &rs->sc_dkdev.dk_geom;
3936
3937 memset(dg, 0, sizeof(*dg));
3938
3939 dg->dg_secperunit = raidPtr->totalSectors;
3940 dg->dg_secsize = raidPtr->bytesPerSector;
3941 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
3942 dg->dg_ntracks = 4 * raidPtr->numCol;
3943
3944 disk_set_info(rs->sc_dev, &rs->sc_dkdev, NULL);
3945 }
3946
3947 /*
3948 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3949 * We end up returning whatever error was returned by the first cache flush
3950 * that fails.
3951 */
3952
3953 int
3954 rf_sync_component_caches(RF_Raid_t *raidPtr)
3955 {
3956 int c, sparecol;
3957 int e,error;
3958 int force = 1;
3959
3960 error = 0;
3961 for (c = 0; c < raidPtr->numCol; c++) {
3962 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3963 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3964 &force, FWRITE, NOCRED);
3965 if (e) {
3966 if (e != ENODEV)
3967 printf("raid%d: cache flush to component %s failed.\n",
3968 raidPtr->raidid, raidPtr->Disks[c].devname);
3969 if (error == 0) {
3970 error = e;
3971 }
3972 }
3973 }
3974 }
3975
3976 for( c = 0; c < raidPtr->numSpare ; c++) {
3977 sparecol = raidPtr->numCol + c;
3978 /* Need to ensure that the reconstruct actually completed! */
3979 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3980 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3981 DIOCCACHESYNC, &force, FWRITE, NOCRED);
3982 if (e) {
3983 if (e != ENODEV)
3984 printf("raid%d: cache flush to component %s failed.\n",
3985 raidPtr->raidid, raidPtr->Disks[sparecol].devname);
3986 if (error == 0) {
3987 error = e;
3988 }
3989 }
3990 }
3991 }
3992 return error;
3993 }
3994