rf_netbsdkintf.c revision 1.315 1 /* $NetBSD: rf_netbsdkintf.c,v 1.315 2014/11/04 07:51:55 mlelstv Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: cd.c 1.6 90/11/28$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
89 * School of Computer Science
90 * Carnegie Mellon University
91 * Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97 /***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.315 2014/11/04 07:51:55 mlelstv Exp $");
105
106 #ifdef _KERNEL_OPT
107 #include "opt_compat_netbsd.h"
108 #include "opt_raid_autoconfig.h"
109 #endif
110
111 #include <sys/param.h>
112 #include <sys/errno.h>
113 #include <sys/pool.h>
114 #include <sys/proc.h>
115 #include <sys/queue.h>
116 #include <sys/disk.h>
117 #include <sys/device.h>
118 #include <sys/stat.h>
119 #include <sys/ioctl.h>
120 #include <sys/fcntl.h>
121 #include <sys/systm.h>
122 #include <sys/vnode.h>
123 #include <sys/disklabel.h>
124 #include <sys/conf.h>
125 #include <sys/buf.h>
126 #include <sys/bufq.h>
127 #include <sys/reboot.h>
128 #include <sys/kauth.h>
129
130 #include <prop/proplib.h>
131
132 #include <dev/raidframe/raidframevar.h>
133 #include <dev/raidframe/raidframeio.h>
134 #include <dev/raidframe/rf_paritymap.h>
135
136 #include "rf_raid.h"
137 #include "rf_copyback.h"
138 #include "rf_dag.h"
139 #include "rf_dagflags.h"
140 #include "rf_desc.h"
141 #include "rf_diskqueue.h"
142 #include "rf_etimer.h"
143 #include "rf_general.h"
144 #include "rf_kintf.h"
145 #include "rf_options.h"
146 #include "rf_driver.h"
147 #include "rf_parityscan.h"
148 #include "rf_threadstuff.h"
149
150 #ifdef COMPAT_50
151 #include "rf_compat50.h"
152 #endif
153
154 #ifdef DEBUG
155 int rf_kdebug_level = 0;
156 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
157 #else /* DEBUG */
158 #define db1_printf(a) { }
159 #endif /* DEBUG */
160
161 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
162 static rf_declare_mutex2(rf_sparet_wait_mutex);
163 static rf_declare_cond2(rf_sparet_wait_cv);
164 static rf_declare_cond2(rf_sparet_resp_cv);
165
166 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
167 * spare table */
168 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
169 * installation process */
170 #endif
171
172 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
173
174 /* prototypes */
175 static void KernelWakeupFunc(struct buf *);
176 static void InitBP(struct buf *, struct vnode *, unsigned,
177 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
178 void *, int, struct proc *);
179 struct raid_softc;
180 static void raidinit(struct raid_softc *);
181
182 void raidattach(int);
183 static int raid_match(device_t, cfdata_t, void *);
184 static void raid_attach(device_t, device_t, void *);
185 static int raid_detach(device_t, int);
186
187 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
188 daddr_t, daddr_t);
189 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
190 daddr_t, daddr_t, int);
191
192 static int raidwrite_component_label(unsigned,
193 dev_t, struct vnode *, RF_ComponentLabel_t *);
194 static int raidread_component_label(unsigned,
195 dev_t, struct vnode *, RF_ComponentLabel_t *);
196
197
198 dev_type_open(raidopen);
199 dev_type_close(raidclose);
200 dev_type_read(raidread);
201 dev_type_write(raidwrite);
202 dev_type_ioctl(raidioctl);
203 dev_type_strategy(raidstrategy);
204 dev_type_dump(raiddump);
205 dev_type_size(raidsize);
206
207 const struct bdevsw raid_bdevsw = {
208 .d_open = raidopen,
209 .d_close = raidclose,
210 .d_strategy = raidstrategy,
211 .d_ioctl = raidioctl,
212 .d_dump = raiddump,
213 .d_psize = raidsize,
214 .d_discard = nodiscard,
215 .d_flag = D_DISK
216 };
217
218 const struct cdevsw raid_cdevsw = {
219 .d_open = raidopen,
220 .d_close = raidclose,
221 .d_read = raidread,
222 .d_write = raidwrite,
223 .d_ioctl = raidioctl,
224 .d_stop = nostop,
225 .d_tty = notty,
226 .d_poll = nopoll,
227 .d_mmap = nommap,
228 .d_kqfilter = nokqfilter,
229 .d_discard = nodiscard,
230 .d_flag = D_DISK
231 };
232
233 static struct dkdriver rf_dkdriver = { raidstrategy, minphys };
234
235 struct raid_softc {
236 device_t sc_dev;
237 int sc_unit;
238 int sc_flags; /* flags */
239 int sc_cflags; /* configuration flags */
240 uint64_t sc_size; /* size of the raid device */
241 char sc_xname[20]; /* XXX external name */
242 struct disk sc_dkdev; /* generic disk device info */
243 struct bufq_state *buf_queue; /* used for the device queue */
244 RF_Raid_t sc_r;
245 LIST_ENTRY(raid_softc) sc_link;
246 };
247 /* sc_flags */
248 #define RAIDF_INITED 0x01 /* unit has been initialized */
249 #define RAIDF_WLABEL 0x02 /* label area is writable */
250 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
251 #define RAIDF_SHUTDOWN 0x08 /* unit is being shutdown */
252 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
253 #define RAIDF_LOCKED 0x80 /* unit is locked */
254
255 #define raidunit(x) DISKUNIT(x)
256
257 extern struct cfdriver raid_cd;
258 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
259 raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
260 DVF_DETACH_SHUTDOWN);
261
262 /*
263 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
264 * Be aware that large numbers can allow the driver to consume a lot of
265 * kernel memory, especially on writes, and in degraded mode reads.
266 *
267 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
268 * a single 64K write will typically require 64K for the old data,
269 * 64K for the old parity, and 64K for the new parity, for a total
270 * of 192K (if the parity buffer is not re-used immediately).
271 * Even it if is used immediately, that's still 128K, which when multiplied
272 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
273 *
274 * Now in degraded mode, for example, a 64K read on the above setup may
275 * require data reconstruction, which will require *all* of the 4 remaining
276 * disks to participate -- 4 * 32K/disk == 128K again.
277 */
278
279 #ifndef RAIDOUTSTANDING
280 #define RAIDOUTSTANDING 6
281 #endif
282
283 #define RAIDLABELDEV(dev) \
284 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
285
286 /* declared here, and made public, for the benefit of KVM stuff.. */
287
288 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
289 struct disklabel *);
290 static void raidgetdisklabel(dev_t);
291 static void raidmakedisklabel(struct raid_softc *);
292
293 static int raidlock(struct raid_softc *);
294 static void raidunlock(struct raid_softc *);
295
296 static int raid_detach_unlocked(struct raid_softc *);
297
298 static void rf_markalldirty(RF_Raid_t *);
299 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
300
301 void rf_ReconThread(struct rf_recon_req *);
302 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
303 void rf_CopybackThread(RF_Raid_t *raidPtr);
304 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
305 int rf_autoconfig(device_t);
306 void rf_buildroothack(RF_ConfigSet_t *);
307
308 RF_AutoConfig_t *rf_find_raid_components(void);
309 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
310 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
311 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
312 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
313 int rf_set_autoconfig(RF_Raid_t *, int);
314 int rf_set_rootpartition(RF_Raid_t *, int);
315 void rf_release_all_vps(RF_ConfigSet_t *);
316 void rf_cleanup_config_set(RF_ConfigSet_t *);
317 int rf_have_enough_components(RF_ConfigSet_t *);
318 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
319 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
320
321 /*
322 * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
323 * Note that this is overridden by having RAID_AUTOCONFIG as an option
324 * in the kernel config file.
325 */
326 #ifdef RAID_AUTOCONFIG
327 int raidautoconfig = 1;
328 #else
329 int raidautoconfig = 0;
330 #endif
331 static bool raidautoconfigdone = false;
332
333 struct RF_Pools_s rf_pools;
334
335 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
336 static kmutex_t raid_lock;
337
338 static struct raid_softc *
339 raidcreate(int unit) {
340 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
341 if (sc == NULL) {
342 #ifdef DIAGNOSTIC
343 printf("%s: out of memory\n", __func__);
344 #endif
345 return NULL;
346 }
347 sc->sc_unit = unit;
348 bufq_alloc(&sc->buf_queue, "fcfs", BUFQ_SORT_RAWBLOCK);
349 return sc;
350 }
351
352 static void
353 raiddestroy(struct raid_softc *sc) {
354 bufq_free(sc->buf_queue);
355 kmem_free(sc, sizeof(*sc));
356 }
357
358 static struct raid_softc *
359 raidget(int unit) {
360 struct raid_softc *sc;
361 if (unit < 0) {
362 #ifdef DIAGNOSTIC
363 panic("%s: unit %d!", __func__, unit);
364 #endif
365 return NULL;
366 }
367 mutex_enter(&raid_lock);
368 LIST_FOREACH(sc, &raids, sc_link) {
369 if (sc->sc_unit == unit) {
370 mutex_exit(&raid_lock);
371 return sc;
372 }
373 }
374 mutex_exit(&raid_lock);
375 if ((sc = raidcreate(unit)) == NULL)
376 return NULL;
377 mutex_enter(&raid_lock);
378 LIST_INSERT_HEAD(&raids, sc, sc_link);
379 mutex_exit(&raid_lock);
380 return sc;
381 }
382
383 static void
384 raidput(struct raid_softc *sc) {
385 mutex_enter(&raid_lock);
386 LIST_REMOVE(sc, sc_link);
387 mutex_exit(&raid_lock);
388 raiddestroy(sc);
389 }
390
391 void
392 raidattach(int num)
393 {
394 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
395 /* This is where all the initialization stuff gets done. */
396
397 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
398 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
399 rf_init_cond2(rf_sparet_wait_cv, "sparetw");
400 rf_init_cond2(rf_sparet_resp_cv, "rfgst");
401
402 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
403 #endif
404
405 if (rf_BootRaidframe() == 0)
406 aprint_verbose("Kernelized RAIDframe activated\n");
407 else
408 panic("Serious error booting RAID!!");
409
410 if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) {
411 aprint_error("raidattach: config_cfattach_attach failed?\n");
412 }
413
414 raidautoconfigdone = false;
415
416 /*
417 * Register a finalizer which will be used to auto-config RAID
418 * sets once all real hardware devices have been found.
419 */
420 if (config_finalize_register(NULL, rf_autoconfig) != 0)
421 aprint_error("WARNING: unable to register RAIDframe finalizer\n");
422 }
423
424 int
425 rf_autoconfig(device_t self)
426 {
427 RF_AutoConfig_t *ac_list;
428 RF_ConfigSet_t *config_sets;
429
430 if (!raidautoconfig || raidautoconfigdone == true)
431 return (0);
432
433 /* XXX This code can only be run once. */
434 raidautoconfigdone = true;
435
436 #ifdef __HAVE_CPU_BOOTCONF
437 /*
438 * 0. find the boot device if needed first so we can use it later
439 * this needs to be done before we autoconfigure any raid sets,
440 * because if we use wedges we are not going to be able to open
441 * the boot device later
442 */
443 if (booted_device == NULL)
444 cpu_bootconf();
445 #endif
446 /* 1. locate all RAID components on the system */
447 aprint_debug("Searching for RAID components...\n");
448 ac_list = rf_find_raid_components();
449
450 /* 2. Sort them into their respective sets. */
451 config_sets = rf_create_auto_sets(ac_list);
452
453 /*
454 * 3. Evaluate each set and configure the valid ones.
455 * This gets done in rf_buildroothack().
456 */
457 rf_buildroothack(config_sets);
458
459 return 1;
460 }
461
462 static int
463 rf_containsboot(RF_Raid_t *r, device_t bdv) {
464 const char *bootname = device_xname(bdv);
465 size_t len = strlen(bootname);
466
467 for (int col = 0; col < r->numCol; col++) {
468 const char *devname = r->Disks[col].devname;
469 devname += sizeof("/dev/") - 1;
470 if (strncmp(devname, "dk", 2) == 0) {
471 const char *parent =
472 dkwedge_get_parent_name(r->Disks[col].dev);
473 if (parent != NULL)
474 devname = parent;
475 }
476 if (strncmp(devname, bootname, len) == 0) {
477 struct raid_softc *sc = r->softc;
478 aprint_debug("raid%d includes boot device %s\n",
479 sc->sc_unit, devname);
480 return 1;
481 }
482 }
483 return 0;
484 }
485
486 void
487 rf_buildroothack(RF_ConfigSet_t *config_sets)
488 {
489 RF_ConfigSet_t *cset;
490 RF_ConfigSet_t *next_cset;
491 int num_root;
492 struct raid_softc *sc, *rsc;
493
494 sc = rsc = NULL;
495 num_root = 0;
496 cset = config_sets;
497 while (cset != NULL) {
498 next_cset = cset->next;
499 if (rf_have_enough_components(cset) &&
500 cset->ac->clabel->autoconfigure == 1) {
501 sc = rf_auto_config_set(cset);
502 if (sc != NULL) {
503 aprint_debug("raid%d: configured ok\n",
504 sc->sc_unit);
505 if (cset->rootable) {
506 rsc = sc;
507 num_root++;
508 }
509 } else {
510 /* The autoconfig didn't work :( */
511 aprint_debug("Autoconfig failed\n");
512 rf_release_all_vps(cset);
513 }
514 } else {
515 /* we're not autoconfiguring this set...
516 release the associated resources */
517 rf_release_all_vps(cset);
518 }
519 /* cleanup */
520 rf_cleanup_config_set(cset);
521 cset = next_cset;
522 }
523
524 /* if the user has specified what the root device should be
525 then we don't touch booted_device or boothowto... */
526
527 if (rootspec != NULL)
528 return;
529
530 /* we found something bootable... */
531
532 /*
533 * XXX: The following code assumes that the root raid
534 * is the first ('a') partition. This is about the best
535 * we can do with a BSD disklabel, but we might be able
536 * to do better with a GPT label, by setting a specified
537 * attribute to indicate the root partition. We can then
538 * stash the partition number in the r->root_partition
539 * high bits (the bottom 2 bits are already used). For
540 * now we just set booted_partition to 0 when we override
541 * root.
542 */
543 if (num_root == 1) {
544 device_t candidate_root;
545 if (rsc->sc_dkdev.dk_nwedges != 0) {
546 char cname[sizeof(cset->ac->devname)];
547 /* XXX: assume 'a' */
548 snprintf(cname, sizeof(cname), "%s%c",
549 device_xname(rsc->sc_dev), 'a');
550 candidate_root = dkwedge_find_by_wname(cname);
551 } else
552 candidate_root = rsc->sc_dev;
553 if (booted_device == NULL ||
554 rsc->sc_r.root_partition == 1 ||
555 rf_containsboot(&rsc->sc_r, booted_device)) {
556 booted_device = candidate_root;
557 booted_partition = 0; /* XXX assume 'a' */
558 }
559 } else if (num_root > 1) {
560
561 /*
562 * Maybe the MD code can help. If it cannot, then
563 * setroot() will discover that we have no
564 * booted_device and will ask the user if nothing was
565 * hardwired in the kernel config file
566 */
567 if (booted_device == NULL)
568 return;
569
570 num_root = 0;
571 mutex_enter(&raid_lock);
572 LIST_FOREACH(sc, &raids, sc_link) {
573 RF_Raid_t *r = &sc->sc_r;
574 if (r->valid == 0)
575 continue;
576
577 if (r->root_partition == 0)
578 continue;
579
580 if (rf_containsboot(r, booted_device)) {
581 num_root++;
582 rsc = sc;
583 }
584 }
585 mutex_exit(&raid_lock);
586
587 if (num_root == 1) {
588 booted_device = rsc->sc_dev;
589 booted_partition = 0; /* XXX assume 'a' */
590 } else {
591 /* we can't guess.. require the user to answer... */
592 boothowto |= RB_ASKNAME;
593 }
594 }
595 }
596
597
598 int
599 raidsize(dev_t dev)
600 {
601 struct raid_softc *rs;
602 struct disklabel *lp;
603 int part, unit, omask, size;
604
605 unit = raidunit(dev);
606 if ((rs = raidget(unit)) == NULL)
607 return -1;
608 if ((rs->sc_flags & RAIDF_INITED) == 0)
609 return (-1);
610
611 part = DISKPART(dev);
612 omask = rs->sc_dkdev.dk_openmask & (1 << part);
613 lp = rs->sc_dkdev.dk_label;
614
615 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
616 return (-1);
617
618 if (lp->d_partitions[part].p_fstype != FS_SWAP)
619 size = -1;
620 else
621 size = lp->d_partitions[part].p_size *
622 (lp->d_secsize / DEV_BSIZE);
623
624 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
625 return (-1);
626
627 return (size);
628
629 }
630
631 int
632 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
633 {
634 int unit = raidunit(dev);
635 struct raid_softc *rs;
636 const struct bdevsw *bdev;
637 struct disklabel *lp;
638 RF_Raid_t *raidPtr;
639 daddr_t offset;
640 int part, c, sparecol, j, scol, dumpto;
641 int error = 0;
642
643 if ((rs = raidget(unit)) == NULL)
644 return ENXIO;
645
646 raidPtr = &rs->sc_r;
647
648 if ((rs->sc_flags & RAIDF_INITED) == 0)
649 return ENXIO;
650
651 /* we only support dumping to RAID 1 sets */
652 if (raidPtr->Layout.numDataCol != 1 ||
653 raidPtr->Layout.numParityCol != 1)
654 return EINVAL;
655
656
657 if ((error = raidlock(rs)) != 0)
658 return error;
659
660 if (size % DEV_BSIZE != 0) {
661 error = EINVAL;
662 goto out;
663 }
664
665 if (blkno + size / DEV_BSIZE > rs->sc_size) {
666 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
667 "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
668 size / DEV_BSIZE, rs->sc_size);
669 error = EINVAL;
670 goto out;
671 }
672
673 part = DISKPART(dev);
674 lp = rs->sc_dkdev.dk_label;
675 offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
676
677 /* figure out what device is alive.. */
678
679 /*
680 Look for a component to dump to. The preference for the
681 component to dump to is as follows:
682 1) the master
683 2) a used_spare of the master
684 3) the slave
685 4) a used_spare of the slave
686 */
687
688 dumpto = -1;
689 for (c = 0; c < raidPtr->numCol; c++) {
690 if (raidPtr->Disks[c].status == rf_ds_optimal) {
691 /* this might be the one */
692 dumpto = c;
693 break;
694 }
695 }
696
697 /*
698 At this point we have possibly selected a live master or a
699 live slave. We now check to see if there is a spared
700 master (or a spared slave), if we didn't find a live master
701 or a live slave.
702 */
703
704 for (c = 0; c < raidPtr->numSpare; c++) {
705 sparecol = raidPtr->numCol + c;
706 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
707 /* How about this one? */
708 scol = -1;
709 for(j=0;j<raidPtr->numCol;j++) {
710 if (raidPtr->Disks[j].spareCol == sparecol) {
711 scol = j;
712 break;
713 }
714 }
715 if (scol == 0) {
716 /*
717 We must have found a spared master!
718 We'll take that over anything else
719 found so far. (We couldn't have
720 found a real master before, since
721 this is a used spare, and it's
722 saying that it's replacing the
723 master.) On reboot (with
724 autoconfiguration turned on)
725 sparecol will become the 1st
726 component (component0) of this set.
727 */
728 dumpto = sparecol;
729 break;
730 } else if (scol != -1) {
731 /*
732 Must be a spared slave. We'll dump
733 to that if we havn't found anything
734 else so far.
735 */
736 if (dumpto == -1)
737 dumpto = sparecol;
738 }
739 }
740 }
741
742 if (dumpto == -1) {
743 /* we couldn't find any live components to dump to!?!?
744 */
745 error = EINVAL;
746 goto out;
747 }
748
749 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
750
751 /*
752 Note that blkno is relative to this particular partition.
753 By adding the offset of this partition in the RAID
754 set, and also adding RF_PROTECTED_SECTORS, we get a
755 value that is relative to the partition used for the
756 underlying component.
757 */
758
759 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
760 blkno + offset, va, size);
761
762 out:
763 raidunlock(rs);
764
765 return error;
766 }
767 /* ARGSUSED */
768 int
769 raidopen(dev_t dev, int flags, int fmt,
770 struct lwp *l)
771 {
772 int unit = raidunit(dev);
773 struct raid_softc *rs;
774 struct disklabel *lp;
775 int part, pmask;
776 int error = 0;
777
778 if ((rs = raidget(unit)) == NULL)
779 return ENXIO;
780 if ((error = raidlock(rs)) != 0)
781 return (error);
782
783 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
784 error = EBUSY;
785 goto bad;
786 }
787
788 lp = rs->sc_dkdev.dk_label;
789
790 part = DISKPART(dev);
791
792 /*
793 * If there are wedges, and this is not RAW_PART, then we
794 * need to fail.
795 */
796 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
797 error = EBUSY;
798 goto bad;
799 }
800 pmask = (1 << part);
801
802 if ((rs->sc_flags & RAIDF_INITED) &&
803 (rs->sc_dkdev.dk_openmask == 0))
804 raidgetdisklabel(dev);
805
806 /* make sure that this partition exists */
807
808 if (part != RAW_PART) {
809 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
810 ((part >= lp->d_npartitions) ||
811 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
812 error = ENXIO;
813 goto bad;
814 }
815 }
816 /* Prevent this unit from being unconfigured while open. */
817 switch (fmt) {
818 case S_IFCHR:
819 rs->sc_dkdev.dk_copenmask |= pmask;
820 break;
821
822 case S_IFBLK:
823 rs->sc_dkdev.dk_bopenmask |= pmask;
824 break;
825 }
826
827 if ((rs->sc_dkdev.dk_openmask == 0) &&
828 ((rs->sc_flags & RAIDF_INITED) != 0)) {
829 /* First one... mark things as dirty... Note that we *MUST*
830 have done a configure before this. I DO NOT WANT TO BE
831 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
832 THAT THEY BELONG TOGETHER!!!!! */
833 /* XXX should check to see if we're only open for reading
834 here... If so, we needn't do this, but then need some
835 other way of keeping track of what's happened.. */
836
837 rf_markalldirty(&rs->sc_r);
838 }
839
840
841 rs->sc_dkdev.dk_openmask =
842 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
843
844 bad:
845 raidunlock(rs);
846
847 return (error);
848
849
850 }
851 /* ARGSUSED */
852 int
853 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
854 {
855 int unit = raidunit(dev);
856 struct raid_softc *rs;
857 int error = 0;
858 int part;
859
860 if ((rs = raidget(unit)) == NULL)
861 return ENXIO;
862
863 if ((error = raidlock(rs)) != 0)
864 return (error);
865
866 part = DISKPART(dev);
867
868 /* ...that much closer to allowing unconfiguration... */
869 switch (fmt) {
870 case S_IFCHR:
871 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
872 break;
873
874 case S_IFBLK:
875 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
876 break;
877 }
878 rs->sc_dkdev.dk_openmask =
879 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
880
881 if ((rs->sc_dkdev.dk_openmask == 0) &&
882 ((rs->sc_flags & RAIDF_INITED) != 0)) {
883 /* Last one... device is not unconfigured yet.
884 Device shutdown has taken care of setting the
885 clean bits if RAIDF_INITED is not set
886 mark things as clean... */
887
888 rf_update_component_labels(&rs->sc_r,
889 RF_FINAL_COMPONENT_UPDATE);
890
891 /* If the kernel is shutting down, it will detach
892 * this RAID set soon enough.
893 */
894 }
895
896 raidunlock(rs);
897 return (0);
898
899 }
900
901 void
902 raidstrategy(struct buf *bp)
903 {
904 unsigned int unit = raidunit(bp->b_dev);
905 RF_Raid_t *raidPtr;
906 int wlabel;
907 struct raid_softc *rs;
908
909 if ((rs = raidget(unit)) == NULL) {
910 bp->b_error = ENXIO;
911 goto done;
912 }
913 if ((rs->sc_flags & RAIDF_INITED) == 0) {
914 bp->b_error = ENXIO;
915 goto done;
916 }
917 raidPtr = &rs->sc_r;
918 if (!raidPtr->valid) {
919 bp->b_error = ENODEV;
920 goto done;
921 }
922 if (bp->b_bcount == 0) {
923 db1_printf(("b_bcount is zero..\n"));
924 goto done;
925 }
926
927 /*
928 * Do bounds checking and adjust transfer. If there's an
929 * error, the bounds check will flag that for us.
930 */
931
932 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
933 if (DISKPART(bp->b_dev) == RAW_PART) {
934 uint64_t size; /* device size in DEV_BSIZE unit */
935
936 if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
937 size = raidPtr->totalSectors <<
938 (raidPtr->logBytesPerSector - DEV_BSHIFT);
939 } else {
940 size = raidPtr->totalSectors >>
941 (DEV_BSHIFT - raidPtr->logBytesPerSector);
942 }
943 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
944 goto done;
945 }
946 } else {
947 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
948 db1_printf(("Bounds check failed!!:%d %d\n",
949 (int) bp->b_blkno, (int) wlabel));
950 goto done;
951 }
952 }
953
954 rf_lock_mutex2(raidPtr->iodone_lock);
955
956 bp->b_resid = 0;
957
958 /* stuff it onto our queue */
959 bufq_put(rs->buf_queue, bp);
960
961 /* scheduled the IO to happen at the next convenient time */
962 rf_signal_cond2(raidPtr->iodone_cv);
963 rf_unlock_mutex2(raidPtr->iodone_lock);
964
965 return;
966
967 done:
968 bp->b_resid = bp->b_bcount;
969 biodone(bp);
970 }
971 /* ARGSUSED */
972 int
973 raidread(dev_t dev, struct uio *uio, int flags)
974 {
975 int unit = raidunit(dev);
976 struct raid_softc *rs;
977
978 if ((rs = raidget(unit)) == NULL)
979 return ENXIO;
980
981 if ((rs->sc_flags & RAIDF_INITED) == 0)
982 return (ENXIO);
983
984 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
985
986 }
987 /* ARGSUSED */
988 int
989 raidwrite(dev_t dev, struct uio *uio, int flags)
990 {
991 int unit = raidunit(dev);
992 struct raid_softc *rs;
993
994 if ((rs = raidget(unit)) == NULL)
995 return ENXIO;
996
997 if ((rs->sc_flags & RAIDF_INITED) == 0)
998 return (ENXIO);
999
1000 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
1001
1002 }
1003
1004 static int
1005 raid_detach_unlocked(struct raid_softc *rs)
1006 {
1007 int error;
1008 RF_Raid_t *raidPtr;
1009
1010 raidPtr = &rs->sc_r;
1011
1012 /*
1013 * If somebody has a partition mounted, we shouldn't
1014 * shutdown.
1015 */
1016 if (rs->sc_dkdev.dk_openmask != 0)
1017 return EBUSY;
1018
1019 if ((rs->sc_flags & RAIDF_INITED) == 0)
1020 ; /* not initialized: nothing to do */
1021 else if ((error = rf_Shutdown(raidPtr)) != 0)
1022 return error;
1023 else
1024 rs->sc_flags &= ~(RAIDF_INITED|RAIDF_SHUTDOWN);
1025
1026 /* Detach the disk. */
1027 dkwedge_delall(&rs->sc_dkdev);
1028 disk_detach(&rs->sc_dkdev);
1029 disk_destroy(&rs->sc_dkdev);
1030
1031 aprint_normal_dev(rs->sc_dev, "detached\n");
1032
1033 return 0;
1034 }
1035
1036 int
1037 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1038 {
1039 int unit = raidunit(dev);
1040 int error = 0;
1041 int part, pmask, s;
1042 cfdata_t cf;
1043 struct raid_softc *rs;
1044 RF_Config_t *k_cfg, *u_cfg;
1045 RF_Raid_t *raidPtr;
1046 RF_RaidDisk_t *diskPtr;
1047 RF_AccTotals_t *totals;
1048 RF_DeviceConfig_t *d_cfg, **ucfgp;
1049 u_char *specific_buf;
1050 int retcode = 0;
1051 int column;
1052 /* int raidid; */
1053 struct rf_recon_req *rrcopy, *rr;
1054 RF_ComponentLabel_t *clabel;
1055 RF_ComponentLabel_t *ci_label;
1056 RF_ComponentLabel_t **clabel_ptr;
1057 RF_SingleComponent_t *sparePtr,*componentPtr;
1058 RF_SingleComponent_t component;
1059 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
1060 int i, j, d;
1061 #ifdef __HAVE_OLD_DISKLABEL
1062 struct disklabel newlabel;
1063 #endif
1064 struct dkwedge_info *dkw;
1065
1066 if ((rs = raidget(unit)) == NULL)
1067 return ENXIO;
1068 raidPtr = &rs->sc_r;
1069
1070 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1071 (int) DISKPART(dev), (int) unit, cmd));
1072
1073 /* Must be open for writes for these commands... */
1074 switch (cmd) {
1075 #ifdef DIOCGSECTORSIZE
1076 case DIOCGSECTORSIZE:
1077 *(u_int *)data = raidPtr->bytesPerSector;
1078 return 0;
1079 case DIOCGMEDIASIZE:
1080 *(off_t *)data =
1081 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
1082 return 0;
1083 #endif
1084 case DIOCSDINFO:
1085 case DIOCWDINFO:
1086 #ifdef __HAVE_OLD_DISKLABEL
1087 case ODIOCWDINFO:
1088 case ODIOCSDINFO:
1089 #endif
1090 case DIOCWLABEL:
1091 case DIOCAWEDGE:
1092 case DIOCDWEDGE:
1093 case DIOCMWEDGES:
1094 case DIOCSSTRATEGY:
1095 if ((flag & FWRITE) == 0)
1096 return (EBADF);
1097 }
1098
1099 /* Must be initialized for these... */
1100 switch (cmd) {
1101 case DIOCGDINFO:
1102 case DIOCSDINFO:
1103 case DIOCWDINFO:
1104 #ifdef __HAVE_OLD_DISKLABEL
1105 case ODIOCGDINFO:
1106 case ODIOCWDINFO:
1107 case ODIOCSDINFO:
1108 case ODIOCGDEFLABEL:
1109 #endif
1110 case DIOCGPART:
1111 case DIOCWLABEL:
1112 case DIOCGDEFLABEL:
1113 case DIOCAWEDGE:
1114 case DIOCDWEDGE:
1115 case DIOCLWEDGES:
1116 case DIOCMWEDGES:
1117 case DIOCCACHESYNC:
1118 case RAIDFRAME_SHUTDOWN:
1119 case RAIDFRAME_REWRITEPARITY:
1120 case RAIDFRAME_GET_INFO:
1121 case RAIDFRAME_RESET_ACCTOTALS:
1122 case RAIDFRAME_GET_ACCTOTALS:
1123 case RAIDFRAME_KEEP_ACCTOTALS:
1124 case RAIDFRAME_GET_SIZE:
1125 case RAIDFRAME_FAIL_DISK:
1126 case RAIDFRAME_COPYBACK:
1127 case RAIDFRAME_CHECK_RECON_STATUS:
1128 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1129 case RAIDFRAME_GET_COMPONENT_LABEL:
1130 case RAIDFRAME_SET_COMPONENT_LABEL:
1131 case RAIDFRAME_ADD_HOT_SPARE:
1132 case RAIDFRAME_REMOVE_HOT_SPARE:
1133 case RAIDFRAME_INIT_LABELS:
1134 case RAIDFRAME_REBUILD_IN_PLACE:
1135 case RAIDFRAME_CHECK_PARITY:
1136 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1137 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1138 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1139 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1140 case RAIDFRAME_SET_AUTOCONFIG:
1141 case RAIDFRAME_SET_ROOT:
1142 case RAIDFRAME_DELETE_COMPONENT:
1143 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1144 case RAIDFRAME_PARITYMAP_STATUS:
1145 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1146 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1147 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1148 case DIOCGSTRATEGY:
1149 case DIOCSSTRATEGY:
1150 if ((rs->sc_flags & RAIDF_INITED) == 0)
1151 return (ENXIO);
1152 }
1153
1154 switch (cmd) {
1155 #ifdef COMPAT_50
1156 case RAIDFRAME_GET_INFO50:
1157 return rf_get_info50(raidPtr, data);
1158
1159 case RAIDFRAME_CONFIGURE50:
1160 if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0)
1161 return retcode;
1162 goto config;
1163 #endif
1164 /* configure the system */
1165 case RAIDFRAME_CONFIGURE:
1166
1167 if (raidPtr->valid) {
1168 /* There is a valid RAID set running on this unit! */
1169 printf("raid%d: Device already configured!\n",unit);
1170 return(EINVAL);
1171 }
1172
1173 /* copy-in the configuration information */
1174 /* data points to a pointer to the configuration structure */
1175
1176 u_cfg = *((RF_Config_t **) data);
1177 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1178 if (k_cfg == NULL) {
1179 return (ENOMEM);
1180 }
1181 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1182 if (retcode) {
1183 RF_Free(k_cfg, sizeof(RF_Config_t));
1184 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1185 retcode));
1186 return (retcode);
1187 }
1188 goto config;
1189 config:
1190 /* allocate a buffer for the layout-specific data, and copy it
1191 * in */
1192 if (k_cfg->layoutSpecificSize) {
1193 if (k_cfg->layoutSpecificSize > 10000) {
1194 /* sanity check */
1195 RF_Free(k_cfg, sizeof(RF_Config_t));
1196 return (EINVAL);
1197 }
1198 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1199 (u_char *));
1200 if (specific_buf == NULL) {
1201 RF_Free(k_cfg, sizeof(RF_Config_t));
1202 return (ENOMEM);
1203 }
1204 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1205 k_cfg->layoutSpecificSize);
1206 if (retcode) {
1207 RF_Free(k_cfg, sizeof(RF_Config_t));
1208 RF_Free(specific_buf,
1209 k_cfg->layoutSpecificSize);
1210 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1211 retcode));
1212 return (retcode);
1213 }
1214 } else
1215 specific_buf = NULL;
1216 k_cfg->layoutSpecific = specific_buf;
1217
1218 /* should do some kind of sanity check on the configuration.
1219 * Store the sum of all the bytes in the last byte? */
1220
1221 /* configure the system */
1222
1223 /*
1224 * Clear the entire RAID descriptor, just to make sure
1225 * there is no stale data left in the case of a
1226 * reconfiguration
1227 */
1228 memset(raidPtr, 0, sizeof(*raidPtr));
1229 raidPtr->softc = rs;
1230 raidPtr->raidid = unit;
1231
1232 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1233
1234 if (retcode == 0) {
1235
1236 /* allow this many simultaneous IO's to
1237 this RAID device */
1238 raidPtr->openings = RAIDOUTSTANDING;
1239
1240 raidinit(rs);
1241 rf_markalldirty(raidPtr);
1242 }
1243 /* free the buffers. No return code here. */
1244 if (k_cfg->layoutSpecificSize) {
1245 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1246 }
1247 RF_Free(k_cfg, sizeof(RF_Config_t));
1248
1249 return (retcode);
1250
1251 /* shutdown the system */
1252 case RAIDFRAME_SHUTDOWN:
1253
1254 part = DISKPART(dev);
1255 pmask = (1 << part);
1256
1257 if ((error = raidlock(rs)) != 0)
1258 return (error);
1259
1260 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
1261 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1262 (rs->sc_dkdev.dk_copenmask & pmask)))
1263 retcode = EBUSY;
1264 else {
1265 rs->sc_flags |= RAIDF_SHUTDOWN;
1266 rs->sc_dkdev.dk_copenmask &= ~pmask;
1267 rs->sc_dkdev.dk_bopenmask &= ~pmask;
1268 rs->sc_dkdev.dk_openmask &= ~pmask;
1269 retcode = 0;
1270 }
1271
1272 raidunlock(rs);
1273
1274 if (retcode != 0)
1275 return retcode;
1276
1277 /* free the pseudo device attach bits */
1278
1279 cf = device_cfdata(rs->sc_dev);
1280 if ((retcode = config_detach(rs->sc_dev, DETACH_QUIET)) == 0)
1281 free(cf, M_RAIDFRAME);
1282
1283 return (retcode);
1284 case RAIDFRAME_GET_COMPONENT_LABEL:
1285 clabel_ptr = (RF_ComponentLabel_t **) data;
1286 /* need to read the component label for the disk indicated
1287 by row,column in clabel */
1288
1289 /*
1290 * Perhaps there should be an option to skip the in-core
1291 * copy and hit the disk, as with disklabel(8).
1292 */
1293 RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *));
1294
1295 retcode = copyin(*clabel_ptr, clabel, sizeof(*clabel));
1296
1297 if (retcode) {
1298 RF_Free(clabel, sizeof(*clabel));
1299 return retcode;
1300 }
1301
1302 clabel->row = 0; /* Don't allow looking at anything else.*/
1303
1304 column = clabel->column;
1305
1306 if ((column < 0) || (column >= raidPtr->numCol +
1307 raidPtr->numSpare)) {
1308 RF_Free(clabel, sizeof(*clabel));
1309 return EINVAL;
1310 }
1311
1312 RF_Free(clabel, sizeof(*clabel));
1313
1314 clabel = raidget_component_label(raidPtr, column);
1315
1316 return copyout(clabel, *clabel_ptr, sizeof(**clabel_ptr));
1317
1318 #if 0
1319 case RAIDFRAME_SET_COMPONENT_LABEL:
1320 clabel = (RF_ComponentLabel_t *) data;
1321
1322 /* XXX check the label for valid stuff... */
1323 /* Note that some things *should not* get modified --
1324 the user should be re-initing the labels instead of
1325 trying to patch things.
1326 */
1327
1328 raidid = raidPtr->raidid;
1329 #ifdef DEBUG
1330 printf("raid%d: Got component label:\n", raidid);
1331 printf("raid%d: Version: %d\n", raidid, clabel->version);
1332 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1333 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1334 printf("raid%d: Column: %d\n", raidid, clabel->column);
1335 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1336 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1337 printf("raid%d: Status: %d\n", raidid, clabel->status);
1338 #endif
1339 clabel->row = 0;
1340 column = clabel->column;
1341
1342 if ((column < 0) || (column >= raidPtr->numCol)) {
1343 return(EINVAL);
1344 }
1345
1346 /* XXX this isn't allowed to do anything for now :-) */
1347
1348 /* XXX and before it is, we need to fill in the rest
1349 of the fields!?!?!?! */
1350 memcpy(raidget_component_label(raidPtr, column),
1351 clabel, sizeof(*clabel));
1352 raidflush_component_label(raidPtr, column);
1353 return (0);
1354 #endif
1355
1356 case RAIDFRAME_INIT_LABELS:
1357 clabel = (RF_ComponentLabel_t *) data;
1358 /*
1359 we only want the serial number from
1360 the above. We get all the rest of the information
1361 from the config that was used to create this RAID
1362 set.
1363 */
1364
1365 raidPtr->serial_number = clabel->serial_number;
1366
1367 for(column=0;column<raidPtr->numCol;column++) {
1368 diskPtr = &raidPtr->Disks[column];
1369 if (!RF_DEAD_DISK(diskPtr->status)) {
1370 ci_label = raidget_component_label(raidPtr,
1371 column);
1372 /* Zeroing this is important. */
1373 memset(ci_label, 0, sizeof(*ci_label));
1374 raid_init_component_label(raidPtr, ci_label);
1375 ci_label->serial_number =
1376 raidPtr->serial_number;
1377 ci_label->row = 0; /* we dont' pretend to support more */
1378 rf_component_label_set_partitionsize(ci_label,
1379 diskPtr->partitionSize);
1380 ci_label->column = column;
1381 raidflush_component_label(raidPtr, column);
1382 }
1383 /* XXXjld what about the spares? */
1384 }
1385
1386 return (retcode);
1387 case RAIDFRAME_SET_AUTOCONFIG:
1388 d = rf_set_autoconfig(raidPtr, *(int *) data);
1389 printf("raid%d: New autoconfig value is: %d\n",
1390 raidPtr->raidid, d);
1391 *(int *) data = d;
1392 return (retcode);
1393
1394 case RAIDFRAME_SET_ROOT:
1395 d = rf_set_rootpartition(raidPtr, *(int *) data);
1396 printf("raid%d: New rootpartition value is: %d\n",
1397 raidPtr->raidid, d);
1398 *(int *) data = d;
1399 return (retcode);
1400
1401 /* initialize all parity */
1402 case RAIDFRAME_REWRITEPARITY:
1403
1404 if (raidPtr->Layout.map->faultsTolerated == 0) {
1405 /* Parity for RAID 0 is trivially correct */
1406 raidPtr->parity_good = RF_RAID_CLEAN;
1407 return(0);
1408 }
1409
1410 if (raidPtr->parity_rewrite_in_progress == 1) {
1411 /* Re-write is already in progress! */
1412 return(EINVAL);
1413 }
1414
1415 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1416 rf_RewriteParityThread,
1417 raidPtr,"raid_parity");
1418 return (retcode);
1419
1420
1421 case RAIDFRAME_ADD_HOT_SPARE:
1422 sparePtr = (RF_SingleComponent_t *) data;
1423 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1424 retcode = rf_add_hot_spare(raidPtr, &component);
1425 return(retcode);
1426
1427 case RAIDFRAME_REMOVE_HOT_SPARE:
1428 return(retcode);
1429
1430 case RAIDFRAME_DELETE_COMPONENT:
1431 componentPtr = (RF_SingleComponent_t *)data;
1432 memcpy( &component, componentPtr,
1433 sizeof(RF_SingleComponent_t));
1434 retcode = rf_delete_component(raidPtr, &component);
1435 return(retcode);
1436
1437 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1438 componentPtr = (RF_SingleComponent_t *)data;
1439 memcpy( &component, componentPtr,
1440 sizeof(RF_SingleComponent_t));
1441 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1442 return(retcode);
1443
1444 case RAIDFRAME_REBUILD_IN_PLACE:
1445
1446 if (raidPtr->Layout.map->faultsTolerated == 0) {
1447 /* Can't do this on a RAID 0!! */
1448 return(EINVAL);
1449 }
1450
1451 if (raidPtr->recon_in_progress == 1) {
1452 /* a reconstruct is already in progress! */
1453 return(EINVAL);
1454 }
1455
1456 componentPtr = (RF_SingleComponent_t *) data;
1457 memcpy( &component, componentPtr,
1458 sizeof(RF_SingleComponent_t));
1459 component.row = 0; /* we don't support any more */
1460 column = component.column;
1461
1462 if ((column < 0) || (column >= raidPtr->numCol)) {
1463 return(EINVAL);
1464 }
1465
1466 rf_lock_mutex2(raidPtr->mutex);
1467 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1468 (raidPtr->numFailures > 0)) {
1469 /* XXX 0 above shouldn't be constant!!! */
1470 /* some component other than this has failed.
1471 Let's not make things worse than they already
1472 are... */
1473 printf("raid%d: Unable to reconstruct to disk at:\n",
1474 raidPtr->raidid);
1475 printf("raid%d: Col: %d Too many failures.\n",
1476 raidPtr->raidid, column);
1477 rf_unlock_mutex2(raidPtr->mutex);
1478 return (EINVAL);
1479 }
1480 if (raidPtr->Disks[column].status ==
1481 rf_ds_reconstructing) {
1482 printf("raid%d: Unable to reconstruct to disk at:\n",
1483 raidPtr->raidid);
1484 printf("raid%d: Col: %d Reconstruction already occurring!\n", raidPtr->raidid, column);
1485
1486 rf_unlock_mutex2(raidPtr->mutex);
1487 return (EINVAL);
1488 }
1489 if (raidPtr->Disks[column].status == rf_ds_spared) {
1490 rf_unlock_mutex2(raidPtr->mutex);
1491 return (EINVAL);
1492 }
1493 rf_unlock_mutex2(raidPtr->mutex);
1494
1495 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1496 if (rrcopy == NULL)
1497 return(ENOMEM);
1498
1499 rrcopy->raidPtr = (void *) raidPtr;
1500 rrcopy->col = column;
1501
1502 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1503 rf_ReconstructInPlaceThread,
1504 rrcopy,"raid_reconip");
1505 return(retcode);
1506
1507 case RAIDFRAME_GET_INFO:
1508 if (!raidPtr->valid)
1509 return (ENODEV);
1510 ucfgp = (RF_DeviceConfig_t **) data;
1511 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1512 (RF_DeviceConfig_t *));
1513 if (d_cfg == NULL)
1514 return (ENOMEM);
1515 d_cfg->rows = 1; /* there is only 1 row now */
1516 d_cfg->cols = raidPtr->numCol;
1517 d_cfg->ndevs = raidPtr->numCol;
1518 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1519 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1520 return (ENOMEM);
1521 }
1522 d_cfg->nspares = raidPtr->numSpare;
1523 if (d_cfg->nspares >= RF_MAX_DISKS) {
1524 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1525 return (ENOMEM);
1526 }
1527 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1528 d = 0;
1529 for (j = 0; j < d_cfg->cols; j++) {
1530 d_cfg->devs[d] = raidPtr->Disks[j];
1531 d++;
1532 }
1533 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1534 d_cfg->spares[i] = raidPtr->Disks[j];
1535 }
1536 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1537 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1538
1539 return (retcode);
1540
1541 case RAIDFRAME_CHECK_PARITY:
1542 *(int *) data = raidPtr->parity_good;
1543 return (0);
1544
1545 case RAIDFRAME_PARITYMAP_STATUS:
1546 if (rf_paritymap_ineligible(raidPtr))
1547 return EINVAL;
1548 rf_paritymap_status(raidPtr->parity_map,
1549 (struct rf_pmstat *)data);
1550 return 0;
1551
1552 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1553 if (rf_paritymap_ineligible(raidPtr))
1554 return EINVAL;
1555 if (raidPtr->parity_map == NULL)
1556 return ENOENT; /* ??? */
1557 if (0 != rf_paritymap_set_params(raidPtr->parity_map,
1558 (struct rf_pmparams *)data, 1))
1559 return EINVAL;
1560 return 0;
1561
1562 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1563 if (rf_paritymap_ineligible(raidPtr))
1564 return EINVAL;
1565 *(int *) data = rf_paritymap_get_disable(raidPtr);
1566 return 0;
1567
1568 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1569 if (rf_paritymap_ineligible(raidPtr))
1570 return EINVAL;
1571 rf_paritymap_set_disable(raidPtr, *(int *)data);
1572 /* XXX should errors be passed up? */
1573 return 0;
1574
1575 case RAIDFRAME_RESET_ACCTOTALS:
1576 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1577 return (0);
1578
1579 case RAIDFRAME_GET_ACCTOTALS:
1580 totals = (RF_AccTotals_t *) data;
1581 *totals = raidPtr->acc_totals;
1582 return (0);
1583
1584 case RAIDFRAME_KEEP_ACCTOTALS:
1585 raidPtr->keep_acc_totals = *(int *)data;
1586 return (0);
1587
1588 case RAIDFRAME_GET_SIZE:
1589 *(int *) data = raidPtr->totalSectors;
1590 return (0);
1591
1592 /* fail a disk & optionally start reconstruction */
1593 case RAIDFRAME_FAIL_DISK:
1594
1595 if (raidPtr->Layout.map->faultsTolerated == 0) {
1596 /* Can't do this on a RAID 0!! */
1597 return(EINVAL);
1598 }
1599
1600 rr = (struct rf_recon_req *) data;
1601 rr->row = 0;
1602 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1603 return (EINVAL);
1604
1605
1606 rf_lock_mutex2(raidPtr->mutex);
1607 if (raidPtr->status == rf_rs_reconstructing) {
1608 /* you can't fail a disk while we're reconstructing! */
1609 /* XXX wrong for RAID6 */
1610 rf_unlock_mutex2(raidPtr->mutex);
1611 return (EINVAL);
1612 }
1613 if ((raidPtr->Disks[rr->col].status ==
1614 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1615 /* some other component has failed. Let's not make
1616 things worse. XXX wrong for RAID6 */
1617 rf_unlock_mutex2(raidPtr->mutex);
1618 return (EINVAL);
1619 }
1620 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1621 /* Can't fail a spared disk! */
1622 rf_unlock_mutex2(raidPtr->mutex);
1623 return (EINVAL);
1624 }
1625 rf_unlock_mutex2(raidPtr->mutex);
1626
1627 /* make a copy of the recon request so that we don't rely on
1628 * the user's buffer */
1629 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1630 if (rrcopy == NULL)
1631 return(ENOMEM);
1632 memcpy(rrcopy, rr, sizeof(*rr));
1633 rrcopy->raidPtr = (void *) raidPtr;
1634
1635 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1636 rf_ReconThread,
1637 rrcopy,"raid_recon");
1638 return (0);
1639
1640 /* invoke a copyback operation after recon on whatever disk
1641 * needs it, if any */
1642 case RAIDFRAME_COPYBACK:
1643
1644 if (raidPtr->Layout.map->faultsTolerated == 0) {
1645 /* This makes no sense on a RAID 0!! */
1646 return(EINVAL);
1647 }
1648
1649 if (raidPtr->copyback_in_progress == 1) {
1650 /* Copyback is already in progress! */
1651 return(EINVAL);
1652 }
1653
1654 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1655 rf_CopybackThread,
1656 raidPtr,"raid_copyback");
1657 return (retcode);
1658
1659 /* return the percentage completion of reconstruction */
1660 case RAIDFRAME_CHECK_RECON_STATUS:
1661 if (raidPtr->Layout.map->faultsTolerated == 0) {
1662 /* This makes no sense on a RAID 0, so tell the
1663 user it's done. */
1664 *(int *) data = 100;
1665 return(0);
1666 }
1667 if (raidPtr->status != rf_rs_reconstructing)
1668 *(int *) data = 100;
1669 else {
1670 if (raidPtr->reconControl->numRUsTotal > 0) {
1671 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1672 } else {
1673 *(int *) data = 0;
1674 }
1675 }
1676 return (0);
1677 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1678 progressInfoPtr = (RF_ProgressInfo_t **) data;
1679 if (raidPtr->status != rf_rs_reconstructing) {
1680 progressInfo.remaining = 0;
1681 progressInfo.completed = 100;
1682 progressInfo.total = 100;
1683 } else {
1684 progressInfo.total =
1685 raidPtr->reconControl->numRUsTotal;
1686 progressInfo.completed =
1687 raidPtr->reconControl->numRUsComplete;
1688 progressInfo.remaining = progressInfo.total -
1689 progressInfo.completed;
1690 }
1691 retcode = copyout(&progressInfo, *progressInfoPtr,
1692 sizeof(RF_ProgressInfo_t));
1693 return (retcode);
1694
1695 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1696 if (raidPtr->Layout.map->faultsTolerated == 0) {
1697 /* This makes no sense on a RAID 0, so tell the
1698 user it's done. */
1699 *(int *) data = 100;
1700 return(0);
1701 }
1702 if (raidPtr->parity_rewrite_in_progress == 1) {
1703 *(int *) data = 100 *
1704 raidPtr->parity_rewrite_stripes_done /
1705 raidPtr->Layout.numStripe;
1706 } else {
1707 *(int *) data = 100;
1708 }
1709 return (0);
1710
1711 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1712 progressInfoPtr = (RF_ProgressInfo_t **) data;
1713 if (raidPtr->parity_rewrite_in_progress == 1) {
1714 progressInfo.total = raidPtr->Layout.numStripe;
1715 progressInfo.completed =
1716 raidPtr->parity_rewrite_stripes_done;
1717 progressInfo.remaining = progressInfo.total -
1718 progressInfo.completed;
1719 } else {
1720 progressInfo.remaining = 0;
1721 progressInfo.completed = 100;
1722 progressInfo.total = 100;
1723 }
1724 retcode = copyout(&progressInfo, *progressInfoPtr,
1725 sizeof(RF_ProgressInfo_t));
1726 return (retcode);
1727
1728 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1729 if (raidPtr->Layout.map->faultsTolerated == 0) {
1730 /* This makes no sense on a RAID 0 */
1731 *(int *) data = 100;
1732 return(0);
1733 }
1734 if (raidPtr->copyback_in_progress == 1) {
1735 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1736 raidPtr->Layout.numStripe;
1737 } else {
1738 *(int *) data = 100;
1739 }
1740 return (0);
1741
1742 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1743 progressInfoPtr = (RF_ProgressInfo_t **) data;
1744 if (raidPtr->copyback_in_progress == 1) {
1745 progressInfo.total = raidPtr->Layout.numStripe;
1746 progressInfo.completed =
1747 raidPtr->copyback_stripes_done;
1748 progressInfo.remaining = progressInfo.total -
1749 progressInfo.completed;
1750 } else {
1751 progressInfo.remaining = 0;
1752 progressInfo.completed = 100;
1753 progressInfo.total = 100;
1754 }
1755 retcode = copyout(&progressInfo, *progressInfoPtr,
1756 sizeof(RF_ProgressInfo_t));
1757 return (retcode);
1758
1759 /* the sparetable daemon calls this to wait for the kernel to
1760 * need a spare table. this ioctl does not return until a
1761 * spare table is needed. XXX -- calling mpsleep here in the
1762 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1763 * -- I should either compute the spare table in the kernel,
1764 * or have a different -- XXX XXX -- interface (a different
1765 * character device) for delivering the table -- XXX */
1766 #if 0
1767 case RAIDFRAME_SPARET_WAIT:
1768 rf_lock_mutex2(rf_sparet_wait_mutex);
1769 while (!rf_sparet_wait_queue)
1770 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1771 waitreq = rf_sparet_wait_queue;
1772 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1773 rf_unlock_mutex2(rf_sparet_wait_mutex);
1774
1775 /* structure assignment */
1776 *((RF_SparetWait_t *) data) = *waitreq;
1777
1778 RF_Free(waitreq, sizeof(*waitreq));
1779 return (0);
1780
1781 /* wakes up a process waiting on SPARET_WAIT and puts an error
1782 * code in it that will cause the dameon to exit */
1783 case RAIDFRAME_ABORT_SPARET_WAIT:
1784 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1785 waitreq->fcol = -1;
1786 rf_lock_mutex2(rf_sparet_wait_mutex);
1787 waitreq->next = rf_sparet_wait_queue;
1788 rf_sparet_wait_queue = waitreq;
1789 rf_broadcast_conf2(rf_sparet_wait_cv);
1790 rf_unlock_mutex2(rf_sparet_wait_mutex);
1791 return (0);
1792
1793 /* used by the spare table daemon to deliver a spare table
1794 * into the kernel */
1795 case RAIDFRAME_SEND_SPARET:
1796
1797 /* install the spare table */
1798 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1799
1800 /* respond to the requestor. the return status of the spare
1801 * table installation is passed in the "fcol" field */
1802 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1803 waitreq->fcol = retcode;
1804 rf_lock_mutex2(rf_sparet_wait_mutex);
1805 waitreq->next = rf_sparet_resp_queue;
1806 rf_sparet_resp_queue = waitreq;
1807 rf_broadcast_cond2(rf_sparet_resp_cv);
1808 rf_unlock_mutex2(rf_sparet_wait_mutex);
1809
1810 return (retcode);
1811 #endif
1812
1813 default:
1814 break; /* fall through to the os-specific code below */
1815
1816 }
1817
1818 if (!raidPtr->valid)
1819 return (EINVAL);
1820
1821 /*
1822 * Add support for "regular" device ioctls here.
1823 */
1824
1825 error = disk_ioctl(&rs->sc_dkdev, cmd, data, flag, l);
1826 if (error != EPASSTHROUGH)
1827 return (error);
1828
1829 switch (cmd) {
1830 case DIOCGDINFO:
1831 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1832 break;
1833 #ifdef __HAVE_OLD_DISKLABEL
1834 case ODIOCGDINFO:
1835 newlabel = *(rs->sc_dkdev.dk_label);
1836 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1837 return ENOTTY;
1838 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1839 break;
1840 #endif
1841
1842 case DIOCGPART:
1843 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1844 ((struct partinfo *) data)->part =
1845 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1846 break;
1847
1848 case DIOCWDINFO:
1849 case DIOCSDINFO:
1850 #ifdef __HAVE_OLD_DISKLABEL
1851 case ODIOCWDINFO:
1852 case ODIOCSDINFO:
1853 #endif
1854 {
1855 struct disklabel *lp;
1856 #ifdef __HAVE_OLD_DISKLABEL
1857 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1858 memset(&newlabel, 0, sizeof newlabel);
1859 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1860 lp = &newlabel;
1861 } else
1862 #endif
1863 lp = (struct disklabel *)data;
1864
1865 if ((error = raidlock(rs)) != 0)
1866 return (error);
1867
1868 rs->sc_flags |= RAIDF_LABELLING;
1869
1870 error = setdisklabel(rs->sc_dkdev.dk_label,
1871 lp, 0, rs->sc_dkdev.dk_cpulabel);
1872 if (error == 0) {
1873 if (cmd == DIOCWDINFO
1874 #ifdef __HAVE_OLD_DISKLABEL
1875 || cmd == ODIOCWDINFO
1876 #endif
1877 )
1878 error = writedisklabel(RAIDLABELDEV(dev),
1879 raidstrategy, rs->sc_dkdev.dk_label,
1880 rs->sc_dkdev.dk_cpulabel);
1881 }
1882 rs->sc_flags &= ~RAIDF_LABELLING;
1883
1884 raidunlock(rs);
1885
1886 if (error)
1887 return (error);
1888 break;
1889 }
1890
1891 case DIOCWLABEL:
1892 if (*(int *) data != 0)
1893 rs->sc_flags |= RAIDF_WLABEL;
1894 else
1895 rs->sc_flags &= ~RAIDF_WLABEL;
1896 break;
1897
1898 case DIOCGDEFLABEL:
1899 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1900 break;
1901
1902 #ifdef __HAVE_OLD_DISKLABEL
1903 case ODIOCGDEFLABEL:
1904 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1905 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1906 return ENOTTY;
1907 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1908 break;
1909 #endif
1910
1911 case DIOCAWEDGE:
1912 case DIOCDWEDGE:
1913 dkw = (void *)data;
1914
1915 /* If the ioctl happens here, the parent is us. */
1916 (void)strcpy(dkw->dkw_parent, rs->sc_xname);
1917 return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw);
1918
1919 case DIOCLWEDGES:
1920 return dkwedge_list(&rs->sc_dkdev,
1921 (struct dkwedge_list *)data, l);
1922 case DIOCMWEDGES:
1923 dkwedge_discover(&rs->sc_dkdev);
1924 return 0;
1925 case DIOCCACHESYNC:
1926 return rf_sync_component_caches(raidPtr);
1927
1928 case DIOCGSTRATEGY:
1929 {
1930 struct disk_strategy *dks = (void *)data;
1931
1932 s = splbio();
1933 strlcpy(dks->dks_name, bufq_getstrategyname(rs->buf_queue),
1934 sizeof(dks->dks_name));
1935 splx(s);
1936 dks->dks_paramlen = 0;
1937
1938 return 0;
1939 }
1940
1941 case DIOCSSTRATEGY:
1942 {
1943 struct disk_strategy *dks = (void *)data;
1944 struct bufq_state *new;
1945 struct bufq_state *old;
1946
1947 if (dks->dks_param != NULL) {
1948 return EINVAL;
1949 }
1950 dks->dks_name[sizeof(dks->dks_name) - 1] = 0; /* ensure term */
1951 error = bufq_alloc(&new, dks->dks_name,
1952 BUFQ_EXACT|BUFQ_SORT_RAWBLOCK);
1953 if (error) {
1954 return error;
1955 }
1956 s = splbio();
1957 old = rs->buf_queue;
1958 bufq_move(new, old);
1959 rs->buf_queue = new;
1960 splx(s);
1961 bufq_free(old);
1962
1963 return 0;
1964 }
1965
1966 default:
1967 retcode = ENOTTY;
1968 }
1969 return (retcode);
1970
1971 }
1972
1973
1974 /* raidinit -- complete the rest of the initialization for the
1975 RAIDframe device. */
1976
1977
1978 static void
1979 raidinit(struct raid_softc *rs)
1980 {
1981 cfdata_t cf;
1982 int unit;
1983 RF_Raid_t *raidPtr = &rs->sc_r;
1984
1985 unit = raidPtr->raidid;
1986
1987
1988 /* XXX should check return code first... */
1989 rs->sc_flags |= RAIDF_INITED;
1990
1991 /* XXX doesn't check bounds. */
1992 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1993
1994 /* attach the pseudo device */
1995 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1996 cf->cf_name = raid_cd.cd_name;
1997 cf->cf_atname = raid_cd.cd_name;
1998 cf->cf_unit = unit;
1999 cf->cf_fstate = FSTATE_STAR;
2000
2001 rs->sc_dev = config_attach_pseudo(cf);
2002
2003 if (rs->sc_dev == NULL) {
2004 printf("raid%d: config_attach_pseudo failed\n",
2005 raidPtr->raidid);
2006 rs->sc_flags &= ~RAIDF_INITED;
2007 free(cf, M_RAIDFRAME);
2008 return;
2009 }
2010
2011 /* disk_attach actually creates space for the CPU disklabel, among
2012 * other things, so it's critical to call this *BEFORE* we try putzing
2013 * with disklabels. */
2014
2015 disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver);
2016 disk_attach(&rs->sc_dkdev);
2017 disk_blocksize(&rs->sc_dkdev, raidPtr->bytesPerSector);
2018
2019 /* XXX There may be a weird interaction here between this, and
2020 * protectedSectors, as used in RAIDframe. */
2021
2022 rs->sc_size = raidPtr->totalSectors;
2023
2024 dkwedge_discover(&rs->sc_dkdev);
2025
2026 rf_set_geometry(rs, raidPtr);
2027
2028 }
2029 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
2030 /* wake up the daemon & tell it to get us a spare table
2031 * XXX
2032 * the entries in the queues should be tagged with the raidPtr
2033 * so that in the extremely rare case that two recons happen at once,
2034 * we know for which device were requesting a spare table
2035 * XXX
2036 *
2037 * XXX This code is not currently used. GO
2038 */
2039 int
2040 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
2041 {
2042 int retcode;
2043
2044 rf_lock_mutex2(rf_sparet_wait_mutex);
2045 req->next = rf_sparet_wait_queue;
2046 rf_sparet_wait_queue = req;
2047 rf_broadcast_cond2(rf_sparet_wait_cv);
2048
2049 /* mpsleep unlocks the mutex */
2050 while (!rf_sparet_resp_queue) {
2051 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
2052 }
2053 req = rf_sparet_resp_queue;
2054 rf_sparet_resp_queue = req->next;
2055 rf_unlock_mutex2(rf_sparet_wait_mutex);
2056
2057 retcode = req->fcol;
2058 RF_Free(req, sizeof(*req)); /* this is not the same req as we
2059 * alloc'd */
2060 return (retcode);
2061 }
2062 #endif
2063
2064 /* a wrapper around rf_DoAccess that extracts appropriate info from the
2065 * bp & passes it down.
2066 * any calls originating in the kernel must use non-blocking I/O
2067 * do some extra sanity checking to return "appropriate" error values for
2068 * certain conditions (to make some standard utilities work)
2069 *
2070 * Formerly known as: rf_DoAccessKernel
2071 */
2072 void
2073 raidstart(RF_Raid_t *raidPtr)
2074 {
2075 RF_SectorCount_t num_blocks, pb, sum;
2076 RF_RaidAddr_t raid_addr;
2077 struct partition *pp;
2078 daddr_t blocknum;
2079 struct raid_softc *rs;
2080 int do_async;
2081 struct buf *bp;
2082 int rc;
2083
2084 rs = raidPtr->softc;
2085 /* quick check to see if anything has died recently */
2086 rf_lock_mutex2(raidPtr->mutex);
2087 if (raidPtr->numNewFailures > 0) {
2088 rf_unlock_mutex2(raidPtr->mutex);
2089 rf_update_component_labels(raidPtr,
2090 RF_NORMAL_COMPONENT_UPDATE);
2091 rf_lock_mutex2(raidPtr->mutex);
2092 raidPtr->numNewFailures--;
2093 }
2094
2095 /* Check to see if we're at the limit... */
2096 while (raidPtr->openings > 0) {
2097 rf_unlock_mutex2(raidPtr->mutex);
2098
2099 /* get the next item, if any, from the queue */
2100 if ((bp = bufq_get(rs->buf_queue)) == NULL) {
2101 /* nothing more to do */
2102 return;
2103 }
2104
2105 /* Ok, for the bp we have here, bp->b_blkno is relative to the
2106 * partition.. Need to make it absolute to the underlying
2107 * device.. */
2108
2109 blocknum = bp->b_blkno << DEV_BSHIFT >> raidPtr->logBytesPerSector;
2110 if (DISKPART(bp->b_dev) != RAW_PART) {
2111 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
2112 blocknum += pp->p_offset;
2113 }
2114
2115 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
2116 (int) blocknum));
2117
2118 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
2119 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
2120
2121 /* *THIS* is where we adjust what block we're going to...
2122 * but DO NOT TOUCH bp->b_blkno!!! */
2123 raid_addr = blocknum;
2124
2125 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
2126 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
2127 sum = raid_addr + num_blocks + pb;
2128 if (1 || rf_debugKernelAccess) {
2129 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
2130 (int) raid_addr, (int) sum, (int) num_blocks,
2131 (int) pb, (int) bp->b_resid));
2132 }
2133 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
2134 || (sum < num_blocks) || (sum < pb)) {
2135 bp->b_error = ENOSPC;
2136 bp->b_resid = bp->b_bcount;
2137 biodone(bp);
2138 rf_lock_mutex2(raidPtr->mutex);
2139 continue;
2140 }
2141 /*
2142 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
2143 */
2144
2145 if (bp->b_bcount & raidPtr->sectorMask) {
2146 bp->b_error = EINVAL;
2147 bp->b_resid = bp->b_bcount;
2148 biodone(bp);
2149 rf_lock_mutex2(raidPtr->mutex);
2150 continue;
2151
2152 }
2153 db1_printf(("Calling DoAccess..\n"));
2154
2155
2156 rf_lock_mutex2(raidPtr->mutex);
2157 raidPtr->openings--;
2158 rf_unlock_mutex2(raidPtr->mutex);
2159
2160 /*
2161 * Everything is async.
2162 */
2163 do_async = 1;
2164
2165 disk_busy(&rs->sc_dkdev);
2166
2167 /* XXX we're still at splbio() here... do we *really*
2168 need to be? */
2169
2170 /* don't ever condition on bp->b_flags & B_WRITE.
2171 * always condition on B_READ instead */
2172
2173 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
2174 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
2175 do_async, raid_addr, num_blocks,
2176 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
2177
2178 if (rc) {
2179 bp->b_error = rc;
2180 bp->b_resid = bp->b_bcount;
2181 biodone(bp);
2182 /* continue loop */
2183 }
2184
2185 rf_lock_mutex2(raidPtr->mutex);
2186 }
2187 rf_unlock_mutex2(raidPtr->mutex);
2188 }
2189
2190
2191
2192
2193 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
2194
2195 int
2196 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
2197 {
2198 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2199 struct buf *bp;
2200
2201 req->queue = queue;
2202 bp = req->bp;
2203
2204 switch (req->type) {
2205 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2206 /* XXX need to do something extra here.. */
2207 /* I'm leaving this in, as I've never actually seen it used,
2208 * and I'd like folks to report it... GO */
2209 printf(("WAKEUP CALLED\n"));
2210 queue->numOutstanding++;
2211
2212 bp->b_flags = 0;
2213 bp->b_private = req;
2214
2215 KernelWakeupFunc(bp);
2216 break;
2217
2218 case RF_IO_TYPE_READ:
2219 case RF_IO_TYPE_WRITE:
2220 #if RF_ACC_TRACE > 0
2221 if (req->tracerec) {
2222 RF_ETIMER_START(req->tracerec->timer);
2223 }
2224 #endif
2225 InitBP(bp, queue->rf_cinfo->ci_vp,
2226 op, queue->rf_cinfo->ci_dev,
2227 req->sectorOffset, req->numSector,
2228 req->buf, KernelWakeupFunc, (void *) req,
2229 queue->raidPtr->logBytesPerSector, req->b_proc);
2230
2231 if (rf_debugKernelAccess) {
2232 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2233 (long) bp->b_blkno));
2234 }
2235 queue->numOutstanding++;
2236 queue->last_deq_sector = req->sectorOffset;
2237 /* acc wouldn't have been let in if there were any pending
2238 * reqs at any other priority */
2239 queue->curPriority = req->priority;
2240
2241 db1_printf(("Going for %c to unit %d col %d\n",
2242 req->type, queue->raidPtr->raidid,
2243 queue->col));
2244 db1_printf(("sector %d count %d (%d bytes) %d\n",
2245 (int) req->sectorOffset, (int) req->numSector,
2246 (int) (req->numSector <<
2247 queue->raidPtr->logBytesPerSector),
2248 (int) queue->raidPtr->logBytesPerSector));
2249
2250 /*
2251 * XXX: drop lock here since this can block at
2252 * least with backing SCSI devices. Retake it
2253 * to minimize fuss with calling interfaces.
2254 */
2255
2256 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2257 bdev_strategy(bp);
2258 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2259 break;
2260
2261 default:
2262 panic("bad req->type in rf_DispatchKernelIO");
2263 }
2264 db1_printf(("Exiting from DispatchKernelIO\n"));
2265
2266 return (0);
2267 }
2268 /* this is the callback function associated with a I/O invoked from
2269 kernel code.
2270 */
2271 static void
2272 KernelWakeupFunc(struct buf *bp)
2273 {
2274 RF_DiskQueueData_t *req = NULL;
2275 RF_DiskQueue_t *queue;
2276
2277 db1_printf(("recovering the request queue:\n"));
2278
2279 req = bp->b_private;
2280
2281 queue = (RF_DiskQueue_t *) req->queue;
2282
2283 rf_lock_mutex2(queue->raidPtr->iodone_lock);
2284
2285 #if RF_ACC_TRACE > 0
2286 if (req->tracerec) {
2287 RF_ETIMER_STOP(req->tracerec->timer);
2288 RF_ETIMER_EVAL(req->tracerec->timer);
2289 rf_lock_mutex2(rf_tracing_mutex);
2290 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2291 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2292 req->tracerec->num_phys_ios++;
2293 rf_unlock_mutex2(rf_tracing_mutex);
2294 }
2295 #endif
2296
2297 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2298 * ballistic, and mark the component as hosed... */
2299
2300 if (bp->b_error != 0) {
2301 /* Mark the disk as dead */
2302 /* but only mark it once... */
2303 /* and only if it wouldn't leave this RAID set
2304 completely broken */
2305 if (((queue->raidPtr->Disks[queue->col].status ==
2306 rf_ds_optimal) ||
2307 (queue->raidPtr->Disks[queue->col].status ==
2308 rf_ds_used_spare)) &&
2309 (queue->raidPtr->numFailures <
2310 queue->raidPtr->Layout.map->faultsTolerated)) {
2311 printf("raid%d: IO Error. Marking %s as failed.\n",
2312 queue->raidPtr->raidid,
2313 queue->raidPtr->Disks[queue->col].devname);
2314 queue->raidPtr->Disks[queue->col].status =
2315 rf_ds_failed;
2316 queue->raidPtr->status = rf_rs_degraded;
2317 queue->raidPtr->numFailures++;
2318 queue->raidPtr->numNewFailures++;
2319 } else { /* Disk is already dead... */
2320 /* printf("Disk already marked as dead!\n"); */
2321 }
2322
2323 }
2324
2325 /* Fill in the error value */
2326 req->error = bp->b_error;
2327
2328 /* Drop this one on the "finished" queue... */
2329 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2330
2331 /* Let the raidio thread know there is work to be done. */
2332 rf_signal_cond2(queue->raidPtr->iodone_cv);
2333
2334 rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2335 }
2336
2337
2338 /*
2339 * initialize a buf structure for doing an I/O in the kernel.
2340 */
2341 static void
2342 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2343 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2344 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2345 struct proc *b_proc)
2346 {
2347 /* bp->b_flags = B_PHYS | rw_flag; */
2348 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */
2349 bp->b_oflags = 0;
2350 bp->b_cflags = 0;
2351 bp->b_bcount = numSect << logBytesPerSector;
2352 bp->b_bufsize = bp->b_bcount;
2353 bp->b_error = 0;
2354 bp->b_dev = dev;
2355 bp->b_data = bf;
2356 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2357 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2358 if (bp->b_bcount == 0) {
2359 panic("bp->b_bcount is zero in InitBP!!");
2360 }
2361 bp->b_proc = b_proc;
2362 bp->b_iodone = cbFunc;
2363 bp->b_private = cbArg;
2364 }
2365
2366 static void
2367 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2368 struct disklabel *lp)
2369 {
2370 memset(lp, 0, sizeof(*lp));
2371
2372 /* fabricate a label... */
2373 if (raidPtr->totalSectors > UINT32_MAX)
2374 lp->d_secperunit = UINT32_MAX;
2375 else
2376 lp->d_secperunit = raidPtr->totalSectors;
2377 lp->d_secsize = raidPtr->bytesPerSector;
2378 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2379 lp->d_ntracks = 4 * raidPtr->numCol;
2380 lp->d_ncylinders = raidPtr->totalSectors /
2381 (lp->d_nsectors * lp->d_ntracks);
2382 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2383
2384 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2385 lp->d_type = DTYPE_RAID;
2386 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2387 lp->d_rpm = 3600;
2388 lp->d_interleave = 1;
2389 lp->d_flags = 0;
2390
2391 lp->d_partitions[RAW_PART].p_offset = 0;
2392 lp->d_partitions[RAW_PART].p_size = lp->d_secperunit;
2393 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2394 lp->d_npartitions = RAW_PART + 1;
2395
2396 lp->d_magic = DISKMAGIC;
2397 lp->d_magic2 = DISKMAGIC;
2398 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2399
2400 }
2401 /*
2402 * Read the disklabel from the raid device. If one is not present, fake one
2403 * up.
2404 */
2405 static void
2406 raidgetdisklabel(dev_t dev)
2407 {
2408 int unit = raidunit(dev);
2409 struct raid_softc *rs;
2410 const char *errstring;
2411 struct disklabel *lp;
2412 struct cpu_disklabel *clp;
2413 RF_Raid_t *raidPtr;
2414
2415 if ((rs = raidget(unit)) == NULL)
2416 return;
2417
2418 lp = rs->sc_dkdev.dk_label;
2419 clp = rs->sc_dkdev.dk_cpulabel;
2420
2421 db1_printf(("Getting the disklabel...\n"));
2422
2423 memset(clp, 0, sizeof(*clp));
2424
2425 raidPtr = &rs->sc_r;
2426
2427 raidgetdefaultlabel(raidPtr, rs, lp);
2428
2429 /*
2430 * Call the generic disklabel extraction routine.
2431 */
2432 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2433 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2434 if (errstring)
2435 raidmakedisklabel(rs);
2436 else {
2437 int i;
2438 struct partition *pp;
2439
2440 /*
2441 * Sanity check whether the found disklabel is valid.
2442 *
2443 * This is necessary since total size of the raid device
2444 * may vary when an interleave is changed even though exactly
2445 * same components are used, and old disklabel may used
2446 * if that is found.
2447 */
2448 if (lp->d_secperunit < UINT32_MAX ?
2449 lp->d_secperunit != rs->sc_size :
2450 lp->d_secperunit > rs->sc_size)
2451 printf("raid%d: WARNING: %s: "
2452 "total sector size in disklabel (%ju) != "
2453 "the size of raid (%ju)\n", unit, rs->sc_xname,
2454 (uintmax_t)lp->d_secperunit,
2455 (uintmax_t)rs->sc_size);
2456 for (i = 0; i < lp->d_npartitions; i++) {
2457 pp = &lp->d_partitions[i];
2458 if (pp->p_offset + pp->p_size > rs->sc_size)
2459 printf("raid%d: WARNING: %s: end of partition `%c' "
2460 "exceeds the size of raid (%ju)\n",
2461 unit, rs->sc_xname, 'a' + i,
2462 (uintmax_t)rs->sc_size);
2463 }
2464 }
2465
2466 }
2467 /*
2468 * Take care of things one might want to take care of in the event
2469 * that a disklabel isn't present.
2470 */
2471 static void
2472 raidmakedisklabel(struct raid_softc *rs)
2473 {
2474 struct disklabel *lp = rs->sc_dkdev.dk_label;
2475 db1_printf(("Making a label..\n"));
2476
2477 /*
2478 * For historical reasons, if there's no disklabel present
2479 * the raw partition must be marked FS_BSDFFS.
2480 */
2481
2482 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2483
2484 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2485
2486 lp->d_checksum = dkcksum(lp);
2487 }
2488 /*
2489 * Wait interruptibly for an exclusive lock.
2490 *
2491 * XXX
2492 * Several drivers do this; it should be abstracted and made MP-safe.
2493 * (Hmm... where have we seen this warning before :-> GO )
2494 */
2495 static int
2496 raidlock(struct raid_softc *rs)
2497 {
2498 int error;
2499
2500 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2501 rs->sc_flags |= RAIDF_WANTED;
2502 if ((error =
2503 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2504 return (error);
2505 }
2506 rs->sc_flags |= RAIDF_LOCKED;
2507 return (0);
2508 }
2509 /*
2510 * Unlock and wake up any waiters.
2511 */
2512 static void
2513 raidunlock(struct raid_softc *rs)
2514 {
2515
2516 rs->sc_flags &= ~RAIDF_LOCKED;
2517 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2518 rs->sc_flags &= ~RAIDF_WANTED;
2519 wakeup(rs);
2520 }
2521 }
2522
2523
2524 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2525 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2526 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2527
2528 static daddr_t
2529 rf_component_info_offset(void)
2530 {
2531
2532 return RF_COMPONENT_INFO_OFFSET;
2533 }
2534
2535 static daddr_t
2536 rf_component_info_size(unsigned secsize)
2537 {
2538 daddr_t info_size;
2539
2540 KASSERT(secsize);
2541 if (secsize > RF_COMPONENT_INFO_SIZE)
2542 info_size = secsize;
2543 else
2544 info_size = RF_COMPONENT_INFO_SIZE;
2545
2546 return info_size;
2547 }
2548
2549 static daddr_t
2550 rf_parity_map_offset(RF_Raid_t *raidPtr)
2551 {
2552 daddr_t map_offset;
2553
2554 KASSERT(raidPtr->bytesPerSector);
2555 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2556 map_offset = raidPtr->bytesPerSector;
2557 else
2558 map_offset = RF_COMPONENT_INFO_SIZE;
2559 map_offset += rf_component_info_offset();
2560
2561 return map_offset;
2562 }
2563
2564 static daddr_t
2565 rf_parity_map_size(RF_Raid_t *raidPtr)
2566 {
2567 daddr_t map_size;
2568
2569 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2570 map_size = raidPtr->bytesPerSector;
2571 else
2572 map_size = RF_PARITY_MAP_SIZE;
2573
2574 return map_size;
2575 }
2576
2577 int
2578 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2579 {
2580 RF_ComponentLabel_t *clabel;
2581
2582 clabel = raidget_component_label(raidPtr, col);
2583 clabel->clean = RF_RAID_CLEAN;
2584 raidflush_component_label(raidPtr, col);
2585 return(0);
2586 }
2587
2588
2589 int
2590 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2591 {
2592 RF_ComponentLabel_t *clabel;
2593
2594 clabel = raidget_component_label(raidPtr, col);
2595 clabel->clean = RF_RAID_DIRTY;
2596 raidflush_component_label(raidPtr, col);
2597 return(0);
2598 }
2599
2600 int
2601 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2602 {
2603 KASSERT(raidPtr->bytesPerSector);
2604 return raidread_component_label(raidPtr->bytesPerSector,
2605 raidPtr->Disks[col].dev,
2606 raidPtr->raid_cinfo[col].ci_vp,
2607 &raidPtr->raid_cinfo[col].ci_label);
2608 }
2609
2610 RF_ComponentLabel_t *
2611 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2612 {
2613 return &raidPtr->raid_cinfo[col].ci_label;
2614 }
2615
2616 int
2617 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2618 {
2619 RF_ComponentLabel_t *label;
2620
2621 label = &raidPtr->raid_cinfo[col].ci_label;
2622 label->mod_counter = raidPtr->mod_counter;
2623 #ifndef RF_NO_PARITY_MAP
2624 label->parity_map_modcount = label->mod_counter;
2625 #endif
2626 return raidwrite_component_label(raidPtr->bytesPerSector,
2627 raidPtr->Disks[col].dev,
2628 raidPtr->raid_cinfo[col].ci_vp, label);
2629 }
2630
2631
2632 static int
2633 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2634 RF_ComponentLabel_t *clabel)
2635 {
2636 return raidread_component_area(dev, b_vp, clabel,
2637 sizeof(RF_ComponentLabel_t),
2638 rf_component_info_offset(),
2639 rf_component_info_size(secsize));
2640 }
2641
2642 /* ARGSUSED */
2643 static int
2644 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2645 size_t msize, daddr_t offset, daddr_t dsize)
2646 {
2647 struct buf *bp;
2648 const struct bdevsw *bdev;
2649 int error;
2650
2651 /* XXX should probably ensure that we don't try to do this if
2652 someone has changed rf_protected_sectors. */
2653
2654 if (b_vp == NULL) {
2655 /* For whatever reason, this component is not valid.
2656 Don't try to read a component label from it. */
2657 return(EINVAL);
2658 }
2659
2660 /* get a block of the appropriate size... */
2661 bp = geteblk((int)dsize);
2662 bp->b_dev = dev;
2663
2664 /* get our ducks in a row for the read */
2665 bp->b_blkno = offset / DEV_BSIZE;
2666 bp->b_bcount = dsize;
2667 bp->b_flags |= B_READ;
2668 bp->b_resid = dsize;
2669
2670 bdev = bdevsw_lookup(bp->b_dev);
2671 if (bdev == NULL)
2672 return (ENXIO);
2673 (*bdev->d_strategy)(bp);
2674
2675 error = biowait(bp);
2676
2677 if (!error) {
2678 memcpy(data, bp->b_data, msize);
2679 }
2680
2681 brelse(bp, 0);
2682 return(error);
2683 }
2684
2685
2686 static int
2687 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2688 RF_ComponentLabel_t *clabel)
2689 {
2690 return raidwrite_component_area(dev, b_vp, clabel,
2691 sizeof(RF_ComponentLabel_t),
2692 rf_component_info_offset(),
2693 rf_component_info_size(secsize), 0);
2694 }
2695
2696 /* ARGSUSED */
2697 static int
2698 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2699 size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2700 {
2701 struct buf *bp;
2702 const struct bdevsw *bdev;
2703 int error;
2704
2705 /* get a block of the appropriate size... */
2706 bp = geteblk((int)dsize);
2707 bp->b_dev = dev;
2708
2709 /* get our ducks in a row for the write */
2710 bp->b_blkno = offset / DEV_BSIZE;
2711 bp->b_bcount = dsize;
2712 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2713 bp->b_resid = dsize;
2714
2715 memset(bp->b_data, 0, dsize);
2716 memcpy(bp->b_data, data, msize);
2717
2718 bdev = bdevsw_lookup(bp->b_dev);
2719 if (bdev == NULL)
2720 return (ENXIO);
2721 (*bdev->d_strategy)(bp);
2722 if (asyncp)
2723 return 0;
2724 error = biowait(bp);
2725 brelse(bp, 0);
2726 if (error) {
2727 #if 1
2728 printf("Failed to write RAID component info!\n");
2729 #endif
2730 }
2731
2732 return(error);
2733 }
2734
2735 void
2736 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2737 {
2738 int c;
2739
2740 for (c = 0; c < raidPtr->numCol; c++) {
2741 /* Skip dead disks. */
2742 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2743 continue;
2744 /* XXXjld: what if an error occurs here? */
2745 raidwrite_component_area(raidPtr->Disks[c].dev,
2746 raidPtr->raid_cinfo[c].ci_vp, map,
2747 RF_PARITYMAP_NBYTE,
2748 rf_parity_map_offset(raidPtr),
2749 rf_parity_map_size(raidPtr), 0);
2750 }
2751 }
2752
2753 void
2754 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2755 {
2756 struct rf_paritymap_ondisk tmp;
2757 int c,first;
2758
2759 first=1;
2760 for (c = 0; c < raidPtr->numCol; c++) {
2761 /* Skip dead disks. */
2762 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2763 continue;
2764 raidread_component_area(raidPtr->Disks[c].dev,
2765 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2766 RF_PARITYMAP_NBYTE,
2767 rf_parity_map_offset(raidPtr),
2768 rf_parity_map_size(raidPtr));
2769 if (first) {
2770 memcpy(map, &tmp, sizeof(*map));
2771 first = 0;
2772 } else {
2773 rf_paritymap_merge(map, &tmp);
2774 }
2775 }
2776 }
2777
2778 void
2779 rf_markalldirty(RF_Raid_t *raidPtr)
2780 {
2781 RF_ComponentLabel_t *clabel;
2782 int sparecol;
2783 int c;
2784 int j;
2785 int scol = -1;
2786
2787 raidPtr->mod_counter++;
2788 for (c = 0; c < raidPtr->numCol; c++) {
2789 /* we don't want to touch (at all) a disk that has
2790 failed */
2791 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2792 clabel = raidget_component_label(raidPtr, c);
2793 if (clabel->status == rf_ds_spared) {
2794 /* XXX do something special...
2795 but whatever you do, don't
2796 try to access it!! */
2797 } else {
2798 raidmarkdirty(raidPtr, c);
2799 }
2800 }
2801 }
2802
2803 for( c = 0; c < raidPtr->numSpare ; c++) {
2804 sparecol = raidPtr->numCol + c;
2805 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2806 /*
2807
2808 we claim this disk is "optimal" if it's
2809 rf_ds_used_spare, as that means it should be
2810 directly substitutable for the disk it replaced.
2811 We note that too...
2812
2813 */
2814
2815 for(j=0;j<raidPtr->numCol;j++) {
2816 if (raidPtr->Disks[j].spareCol == sparecol) {
2817 scol = j;
2818 break;
2819 }
2820 }
2821
2822 clabel = raidget_component_label(raidPtr, sparecol);
2823 /* make sure status is noted */
2824
2825 raid_init_component_label(raidPtr, clabel);
2826
2827 clabel->row = 0;
2828 clabel->column = scol;
2829 /* Note: we *don't* change status from rf_ds_used_spare
2830 to rf_ds_optimal */
2831 /* clabel.status = rf_ds_optimal; */
2832
2833 raidmarkdirty(raidPtr, sparecol);
2834 }
2835 }
2836 }
2837
2838
2839 void
2840 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2841 {
2842 RF_ComponentLabel_t *clabel;
2843 int sparecol;
2844 int c;
2845 int j;
2846 int scol;
2847
2848 scol = -1;
2849
2850 /* XXX should do extra checks to make sure things really are clean,
2851 rather than blindly setting the clean bit... */
2852
2853 raidPtr->mod_counter++;
2854
2855 for (c = 0; c < raidPtr->numCol; c++) {
2856 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2857 clabel = raidget_component_label(raidPtr, c);
2858 /* make sure status is noted */
2859 clabel->status = rf_ds_optimal;
2860
2861 /* note what unit we are configured as */
2862 clabel->last_unit = raidPtr->raidid;
2863
2864 raidflush_component_label(raidPtr, c);
2865 if (final == RF_FINAL_COMPONENT_UPDATE) {
2866 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2867 raidmarkclean(raidPtr, c);
2868 }
2869 }
2870 }
2871 /* else we don't touch it.. */
2872 }
2873
2874 for( c = 0; c < raidPtr->numSpare ; c++) {
2875 sparecol = raidPtr->numCol + c;
2876 /* Need to ensure that the reconstruct actually completed! */
2877 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2878 /*
2879
2880 we claim this disk is "optimal" if it's
2881 rf_ds_used_spare, as that means it should be
2882 directly substitutable for the disk it replaced.
2883 We note that too...
2884
2885 */
2886
2887 for(j=0;j<raidPtr->numCol;j++) {
2888 if (raidPtr->Disks[j].spareCol == sparecol) {
2889 scol = j;
2890 break;
2891 }
2892 }
2893
2894 /* XXX shouldn't *really* need this... */
2895 clabel = raidget_component_label(raidPtr, sparecol);
2896 /* make sure status is noted */
2897
2898 raid_init_component_label(raidPtr, clabel);
2899
2900 clabel->column = scol;
2901 clabel->status = rf_ds_optimal;
2902 clabel->last_unit = raidPtr->raidid;
2903
2904 raidflush_component_label(raidPtr, sparecol);
2905 if (final == RF_FINAL_COMPONENT_UPDATE) {
2906 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2907 raidmarkclean(raidPtr, sparecol);
2908 }
2909 }
2910 }
2911 }
2912 }
2913
2914 void
2915 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2916 {
2917
2918 if (vp != NULL) {
2919 if (auto_configured == 1) {
2920 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2921 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2922 vput(vp);
2923
2924 } else {
2925 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2926 }
2927 }
2928 }
2929
2930
2931 void
2932 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2933 {
2934 int r,c;
2935 struct vnode *vp;
2936 int acd;
2937
2938
2939 /* We take this opportunity to close the vnodes like we should.. */
2940
2941 for (c = 0; c < raidPtr->numCol; c++) {
2942 vp = raidPtr->raid_cinfo[c].ci_vp;
2943 acd = raidPtr->Disks[c].auto_configured;
2944 rf_close_component(raidPtr, vp, acd);
2945 raidPtr->raid_cinfo[c].ci_vp = NULL;
2946 raidPtr->Disks[c].auto_configured = 0;
2947 }
2948
2949 for (r = 0; r < raidPtr->numSpare; r++) {
2950 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2951 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2952 rf_close_component(raidPtr, vp, acd);
2953 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2954 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2955 }
2956 }
2957
2958
2959 void
2960 rf_ReconThread(struct rf_recon_req *req)
2961 {
2962 int s;
2963 RF_Raid_t *raidPtr;
2964
2965 s = splbio();
2966 raidPtr = (RF_Raid_t *) req->raidPtr;
2967 raidPtr->recon_in_progress = 1;
2968
2969 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2970 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2971
2972 RF_Free(req, sizeof(*req));
2973
2974 raidPtr->recon_in_progress = 0;
2975 splx(s);
2976
2977 /* That's all... */
2978 kthread_exit(0); /* does not return */
2979 }
2980
2981 void
2982 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2983 {
2984 int retcode;
2985 int s;
2986
2987 raidPtr->parity_rewrite_stripes_done = 0;
2988 raidPtr->parity_rewrite_in_progress = 1;
2989 s = splbio();
2990 retcode = rf_RewriteParity(raidPtr);
2991 splx(s);
2992 if (retcode) {
2993 printf("raid%d: Error re-writing parity (%d)!\n",
2994 raidPtr->raidid, retcode);
2995 } else {
2996 /* set the clean bit! If we shutdown correctly,
2997 the clean bit on each component label will get
2998 set */
2999 raidPtr->parity_good = RF_RAID_CLEAN;
3000 }
3001 raidPtr->parity_rewrite_in_progress = 0;
3002
3003 /* Anyone waiting for us to stop? If so, inform them... */
3004 if (raidPtr->waitShutdown) {
3005 wakeup(&raidPtr->parity_rewrite_in_progress);
3006 }
3007
3008 /* That's all... */
3009 kthread_exit(0); /* does not return */
3010 }
3011
3012
3013 void
3014 rf_CopybackThread(RF_Raid_t *raidPtr)
3015 {
3016 int s;
3017
3018 raidPtr->copyback_in_progress = 1;
3019 s = splbio();
3020 rf_CopybackReconstructedData(raidPtr);
3021 splx(s);
3022 raidPtr->copyback_in_progress = 0;
3023
3024 /* That's all... */
3025 kthread_exit(0); /* does not return */
3026 }
3027
3028
3029 void
3030 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
3031 {
3032 int s;
3033 RF_Raid_t *raidPtr;
3034
3035 s = splbio();
3036 raidPtr = req->raidPtr;
3037 raidPtr->recon_in_progress = 1;
3038 rf_ReconstructInPlace(raidPtr, req->col);
3039 RF_Free(req, sizeof(*req));
3040 raidPtr->recon_in_progress = 0;
3041 splx(s);
3042
3043 /* That's all... */
3044 kthread_exit(0); /* does not return */
3045 }
3046
3047 static RF_AutoConfig_t *
3048 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
3049 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
3050 unsigned secsize)
3051 {
3052 int good_one = 0;
3053 RF_ComponentLabel_t *clabel;
3054 RF_AutoConfig_t *ac;
3055
3056 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
3057 if (clabel == NULL) {
3058 oomem:
3059 while(ac_list) {
3060 ac = ac_list;
3061 if (ac->clabel)
3062 free(ac->clabel, M_RAIDFRAME);
3063 ac_list = ac_list->next;
3064 free(ac, M_RAIDFRAME);
3065 }
3066 printf("RAID auto config: out of memory!\n");
3067 return NULL; /* XXX probably should panic? */
3068 }
3069
3070 if (!raidread_component_label(secsize, dev, vp, clabel)) {
3071 /* Got the label. Does it look reasonable? */
3072 if (rf_reasonable_label(clabel, numsecs) &&
3073 (rf_component_label_partitionsize(clabel) <= size)) {
3074 #ifdef DEBUG
3075 printf("Component on: %s: %llu\n",
3076 cname, (unsigned long long)size);
3077 rf_print_component_label(clabel);
3078 #endif
3079 /* if it's reasonable, add it, else ignore it. */
3080 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
3081 M_NOWAIT);
3082 if (ac == NULL) {
3083 free(clabel, M_RAIDFRAME);
3084 goto oomem;
3085 }
3086 strlcpy(ac->devname, cname, sizeof(ac->devname));
3087 ac->dev = dev;
3088 ac->vp = vp;
3089 ac->clabel = clabel;
3090 ac->next = ac_list;
3091 ac_list = ac;
3092 good_one = 1;
3093 }
3094 }
3095 if (!good_one) {
3096 /* cleanup */
3097 free(clabel, M_RAIDFRAME);
3098 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3099 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3100 vput(vp);
3101 }
3102 return ac_list;
3103 }
3104
3105 RF_AutoConfig_t *
3106 rf_find_raid_components(void)
3107 {
3108 struct vnode *vp;
3109 struct disklabel label;
3110 device_t dv;
3111 deviter_t di;
3112 dev_t dev;
3113 int bmajor, bminor, wedge, rf_part_found;
3114 int error;
3115 int i;
3116 RF_AutoConfig_t *ac_list;
3117 uint64_t numsecs;
3118 unsigned secsize;
3119
3120 /* initialize the AutoConfig list */
3121 ac_list = NULL;
3122
3123 /* we begin by trolling through *all* the devices on the system */
3124
3125 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
3126 dv = deviter_next(&di)) {
3127
3128 /* we are only interested in disks... */
3129 if (device_class(dv) != DV_DISK)
3130 continue;
3131
3132 /* we don't care about floppies... */
3133 if (device_is_a(dv, "fd")) {
3134 continue;
3135 }
3136
3137 /* we don't care about CD's... */
3138 if (device_is_a(dv, "cd")) {
3139 continue;
3140 }
3141
3142 /* we don't care about md's... */
3143 if (device_is_a(dv, "md")) {
3144 continue;
3145 }
3146
3147 /* hdfd is the Atari/Hades floppy driver */
3148 if (device_is_a(dv, "hdfd")) {
3149 continue;
3150 }
3151
3152 /* fdisa is the Atari/Milan floppy driver */
3153 if (device_is_a(dv, "fdisa")) {
3154 continue;
3155 }
3156
3157 /* need to find the device_name_to_block_device_major stuff */
3158 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
3159
3160 rf_part_found = 0; /*No raid partition as yet*/
3161
3162 /* get a vnode for the raw partition of this disk */
3163
3164 wedge = device_is_a(dv, "dk");
3165 bminor = minor(device_unit(dv));
3166 dev = wedge ? makedev(bmajor, bminor) :
3167 MAKEDISKDEV(bmajor, bminor, RAW_PART);
3168 if (bdevvp(dev, &vp))
3169 panic("RAID can't alloc vnode");
3170
3171 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
3172
3173 if (error) {
3174 /* "Who cares." Continue looking
3175 for something that exists*/
3176 vput(vp);
3177 continue;
3178 }
3179
3180 error = getdisksize(vp, &numsecs, &secsize);
3181 if (error) {
3182 vput(vp);
3183 continue;
3184 }
3185 if (wedge) {
3186 struct dkwedge_info dkw;
3187 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
3188 NOCRED);
3189 if (error) {
3190 printf("RAIDframe: can't get wedge info for "
3191 "dev %s (%d)\n", device_xname(dv), error);
3192 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3193 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3194 vput(vp);
3195 continue;
3196 }
3197
3198 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
3199 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3200 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3201 vput(vp);
3202 continue;
3203 }
3204
3205 ac_list = rf_get_component(ac_list, dev, vp,
3206 device_xname(dv), dkw.dkw_size, numsecs, secsize);
3207 rf_part_found = 1; /*There is a raid component on this disk*/
3208 continue;
3209 }
3210
3211 /* Ok, the disk exists. Go get the disklabel. */
3212 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
3213 if (error) {
3214 /*
3215 * XXX can't happen - open() would
3216 * have errored out (or faked up one)
3217 */
3218 if (error != ENOTTY)
3219 printf("RAIDframe: can't get label for dev "
3220 "%s (%d)\n", device_xname(dv), error);
3221 }
3222
3223 /* don't need this any more. We'll allocate it again
3224 a little later if we really do... */
3225 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3226 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3227 vput(vp);
3228
3229 if (error)
3230 continue;
3231
3232 rf_part_found = 0; /*No raid partitions yet*/
3233 for (i = 0; i < label.d_npartitions; i++) {
3234 char cname[sizeof(ac_list->devname)];
3235
3236 /* We only support partitions marked as RAID */
3237 if (label.d_partitions[i].p_fstype != FS_RAID)
3238 continue;
3239
3240 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
3241 if (bdevvp(dev, &vp))
3242 panic("RAID can't alloc vnode");
3243
3244 error = VOP_OPEN(vp, FREAD, NOCRED);
3245 if (error) {
3246 /* Whatever... */
3247 vput(vp);
3248 continue;
3249 }
3250 snprintf(cname, sizeof(cname), "%s%c",
3251 device_xname(dv), 'a' + i);
3252 ac_list = rf_get_component(ac_list, dev, vp, cname,
3253 label.d_partitions[i].p_size, numsecs, secsize);
3254 rf_part_found = 1; /*There is at least one raid partition on this disk*/
3255 }
3256
3257 /*
3258 *If there is no raid component on this disk, either in a
3259 *disklabel or inside a wedge, check the raw partition as well,
3260 *as it is possible to configure raid components on raw disk
3261 *devices.
3262 */
3263
3264 if (!rf_part_found) {
3265 char cname[sizeof(ac_list->devname)];
3266
3267 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
3268 if (bdevvp(dev, &vp))
3269 panic("RAID can't alloc vnode");
3270
3271 error = VOP_OPEN(vp, FREAD, NOCRED);
3272 if (error) {
3273 /* Whatever... */
3274 vput(vp);
3275 continue;
3276 }
3277 snprintf(cname, sizeof(cname), "%s%c",
3278 device_xname(dv), 'a' + RAW_PART);
3279 ac_list = rf_get_component(ac_list, dev, vp, cname,
3280 label.d_partitions[RAW_PART].p_size, numsecs, secsize);
3281 }
3282 }
3283 deviter_release(&di);
3284 return ac_list;
3285 }
3286
3287
3288 int
3289 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3290 {
3291
3292 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
3293 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
3294 ((clabel->clean == RF_RAID_CLEAN) ||
3295 (clabel->clean == RF_RAID_DIRTY)) &&
3296 clabel->row >=0 &&
3297 clabel->column >= 0 &&
3298 clabel->num_rows > 0 &&
3299 clabel->num_columns > 0 &&
3300 clabel->row < clabel->num_rows &&
3301 clabel->column < clabel->num_columns &&
3302 clabel->blockSize > 0 &&
3303 /*
3304 * numBlocksHi may contain garbage, but it is ok since
3305 * the type is unsigned. If it is really garbage,
3306 * rf_fix_old_label_size() will fix it.
3307 */
3308 rf_component_label_numblocks(clabel) > 0) {
3309 /*
3310 * label looks reasonable enough...
3311 * let's make sure it has no old garbage.
3312 */
3313 if (numsecs)
3314 rf_fix_old_label_size(clabel, numsecs);
3315 return(1);
3316 }
3317 return(0);
3318 }
3319
3320
3321 /*
3322 * For reasons yet unknown, some old component labels have garbage in
3323 * the newer numBlocksHi region, and this causes lossage. Since those
3324 * disks will also have numsecs set to less than 32 bits of sectors,
3325 * we can determine when this corruption has occurred, and fix it.
3326 *
3327 * The exact same problem, with the same unknown reason, happens to
3328 * the partitionSizeHi member as well.
3329 */
3330 static void
3331 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3332 {
3333
3334 if (numsecs < ((uint64_t)1 << 32)) {
3335 if (clabel->numBlocksHi) {
3336 printf("WARNING: total sectors < 32 bits, yet "
3337 "numBlocksHi set\n"
3338 "WARNING: resetting numBlocksHi to zero.\n");
3339 clabel->numBlocksHi = 0;
3340 }
3341
3342 if (clabel->partitionSizeHi) {
3343 printf("WARNING: total sectors < 32 bits, yet "
3344 "partitionSizeHi set\n"
3345 "WARNING: resetting partitionSizeHi to zero.\n");
3346 clabel->partitionSizeHi = 0;
3347 }
3348 }
3349 }
3350
3351
3352 #ifdef DEBUG
3353 void
3354 rf_print_component_label(RF_ComponentLabel_t *clabel)
3355 {
3356 uint64_t numBlocks;
3357 static const char *rp[] = {
3358 "No", "Force", "Soft", "*invalid*"
3359 };
3360
3361
3362 numBlocks = rf_component_label_numblocks(clabel);
3363
3364 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3365 clabel->row, clabel->column,
3366 clabel->num_rows, clabel->num_columns);
3367 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3368 clabel->version, clabel->serial_number,
3369 clabel->mod_counter);
3370 printf(" Clean: %s Status: %d\n",
3371 clabel->clean ? "Yes" : "No", clabel->status);
3372 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3373 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3374 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3375 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3376 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3377 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]);
3378 printf(" Last configured as: raid%d\n", clabel->last_unit);
3379 #if 0
3380 printf(" Config order: %d\n", clabel->config_order);
3381 #endif
3382
3383 }
3384 #endif
3385
3386 RF_ConfigSet_t *
3387 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3388 {
3389 RF_AutoConfig_t *ac;
3390 RF_ConfigSet_t *config_sets;
3391 RF_ConfigSet_t *cset;
3392 RF_AutoConfig_t *ac_next;
3393
3394
3395 config_sets = NULL;
3396
3397 /* Go through the AutoConfig list, and figure out which components
3398 belong to what sets. */
3399 ac = ac_list;
3400 while(ac!=NULL) {
3401 /* we're going to putz with ac->next, so save it here
3402 for use at the end of the loop */
3403 ac_next = ac->next;
3404
3405 if (config_sets == NULL) {
3406 /* will need at least this one... */
3407 config_sets = (RF_ConfigSet_t *)
3408 malloc(sizeof(RF_ConfigSet_t),
3409 M_RAIDFRAME, M_NOWAIT);
3410 if (config_sets == NULL) {
3411 panic("rf_create_auto_sets: No memory!");
3412 }
3413 /* this one is easy :) */
3414 config_sets->ac = ac;
3415 config_sets->next = NULL;
3416 config_sets->rootable = 0;
3417 ac->next = NULL;
3418 } else {
3419 /* which set does this component fit into? */
3420 cset = config_sets;
3421 while(cset!=NULL) {
3422 if (rf_does_it_fit(cset, ac)) {
3423 /* looks like it matches... */
3424 ac->next = cset->ac;
3425 cset->ac = ac;
3426 break;
3427 }
3428 cset = cset->next;
3429 }
3430 if (cset==NULL) {
3431 /* didn't find a match above... new set..*/
3432 cset = (RF_ConfigSet_t *)
3433 malloc(sizeof(RF_ConfigSet_t),
3434 M_RAIDFRAME, M_NOWAIT);
3435 if (cset == NULL) {
3436 panic("rf_create_auto_sets: No memory!");
3437 }
3438 cset->ac = ac;
3439 ac->next = NULL;
3440 cset->next = config_sets;
3441 cset->rootable = 0;
3442 config_sets = cset;
3443 }
3444 }
3445 ac = ac_next;
3446 }
3447
3448
3449 return(config_sets);
3450 }
3451
3452 static int
3453 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3454 {
3455 RF_ComponentLabel_t *clabel1, *clabel2;
3456
3457 /* If this one matches the *first* one in the set, that's good
3458 enough, since the other members of the set would have been
3459 through here too... */
3460 /* note that we are not checking partitionSize here..
3461
3462 Note that we are also not checking the mod_counters here.
3463 If everything else matches except the mod_counter, that's
3464 good enough for this test. We will deal with the mod_counters
3465 a little later in the autoconfiguration process.
3466
3467 (clabel1->mod_counter == clabel2->mod_counter) &&
3468
3469 The reason we don't check for this is that failed disks
3470 will have lower modification counts. If those disks are
3471 not added to the set they used to belong to, then they will
3472 form their own set, which may result in 2 different sets,
3473 for example, competing to be configured at raid0, and
3474 perhaps competing to be the root filesystem set. If the
3475 wrong ones get configured, or both attempt to become /,
3476 weird behaviour and or serious lossage will occur. Thus we
3477 need to bring them into the fold here, and kick them out at
3478 a later point.
3479
3480 */
3481
3482 clabel1 = cset->ac->clabel;
3483 clabel2 = ac->clabel;
3484 if ((clabel1->version == clabel2->version) &&
3485 (clabel1->serial_number == clabel2->serial_number) &&
3486 (clabel1->num_rows == clabel2->num_rows) &&
3487 (clabel1->num_columns == clabel2->num_columns) &&
3488 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3489 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3490 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3491 (clabel1->parityConfig == clabel2->parityConfig) &&
3492 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3493 (clabel1->blockSize == clabel2->blockSize) &&
3494 rf_component_label_numblocks(clabel1) ==
3495 rf_component_label_numblocks(clabel2) &&
3496 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3497 (clabel1->root_partition == clabel2->root_partition) &&
3498 (clabel1->last_unit == clabel2->last_unit) &&
3499 (clabel1->config_order == clabel2->config_order)) {
3500 /* if it get's here, it almost *has* to be a match */
3501 } else {
3502 /* it's not consistent with somebody in the set..
3503 punt */
3504 return(0);
3505 }
3506 /* all was fine.. it must fit... */
3507 return(1);
3508 }
3509
3510 int
3511 rf_have_enough_components(RF_ConfigSet_t *cset)
3512 {
3513 RF_AutoConfig_t *ac;
3514 RF_AutoConfig_t *auto_config;
3515 RF_ComponentLabel_t *clabel;
3516 int c;
3517 int num_cols;
3518 int num_missing;
3519 int mod_counter;
3520 int mod_counter_found;
3521 int even_pair_failed;
3522 char parity_type;
3523
3524
3525 /* check to see that we have enough 'live' components
3526 of this set. If so, we can configure it if necessary */
3527
3528 num_cols = cset->ac->clabel->num_columns;
3529 parity_type = cset->ac->clabel->parityConfig;
3530
3531 /* XXX Check for duplicate components!?!?!? */
3532
3533 /* Determine what the mod_counter is supposed to be for this set. */
3534
3535 mod_counter_found = 0;
3536 mod_counter = 0;
3537 ac = cset->ac;
3538 while(ac!=NULL) {
3539 if (mod_counter_found==0) {
3540 mod_counter = ac->clabel->mod_counter;
3541 mod_counter_found = 1;
3542 } else {
3543 if (ac->clabel->mod_counter > mod_counter) {
3544 mod_counter = ac->clabel->mod_counter;
3545 }
3546 }
3547 ac = ac->next;
3548 }
3549
3550 num_missing = 0;
3551 auto_config = cset->ac;
3552
3553 even_pair_failed = 0;
3554 for(c=0; c<num_cols; c++) {
3555 ac = auto_config;
3556 while(ac!=NULL) {
3557 if ((ac->clabel->column == c) &&
3558 (ac->clabel->mod_counter == mod_counter)) {
3559 /* it's this one... */
3560 #ifdef DEBUG
3561 printf("Found: %s at %d\n",
3562 ac->devname,c);
3563 #endif
3564 break;
3565 }
3566 ac=ac->next;
3567 }
3568 if (ac==NULL) {
3569 /* Didn't find one here! */
3570 /* special case for RAID 1, especially
3571 where there are more than 2
3572 components (where RAIDframe treats
3573 things a little differently :( ) */
3574 if (parity_type == '1') {
3575 if (c%2 == 0) { /* even component */
3576 even_pair_failed = 1;
3577 } else { /* odd component. If
3578 we're failed, and
3579 so is the even
3580 component, it's
3581 "Good Night, Charlie" */
3582 if (even_pair_failed == 1) {
3583 return(0);
3584 }
3585 }
3586 } else {
3587 /* normal accounting */
3588 num_missing++;
3589 }
3590 }
3591 if ((parity_type == '1') && (c%2 == 1)) {
3592 /* Just did an even component, and we didn't
3593 bail.. reset the even_pair_failed flag,
3594 and go on to the next component.... */
3595 even_pair_failed = 0;
3596 }
3597 }
3598
3599 clabel = cset->ac->clabel;
3600
3601 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3602 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3603 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3604 /* XXX this needs to be made *much* more general */
3605 /* Too many failures */
3606 return(0);
3607 }
3608 /* otherwise, all is well, and we've got enough to take a kick
3609 at autoconfiguring this set */
3610 return(1);
3611 }
3612
3613 void
3614 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3615 RF_Raid_t *raidPtr)
3616 {
3617 RF_ComponentLabel_t *clabel;
3618 int i;
3619
3620 clabel = ac->clabel;
3621
3622 /* 1. Fill in the common stuff */
3623 config->numRow = clabel->num_rows = 1;
3624 config->numCol = clabel->num_columns;
3625 config->numSpare = 0; /* XXX should this be set here? */
3626 config->sectPerSU = clabel->sectPerSU;
3627 config->SUsPerPU = clabel->SUsPerPU;
3628 config->SUsPerRU = clabel->SUsPerRU;
3629 config->parityConfig = clabel->parityConfig;
3630 /* XXX... */
3631 strcpy(config->diskQueueType,"fifo");
3632 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3633 config->layoutSpecificSize = 0; /* XXX ?? */
3634
3635 while(ac!=NULL) {
3636 /* row/col values will be in range due to the checks
3637 in reasonable_label() */
3638 strcpy(config->devnames[0][ac->clabel->column],
3639 ac->devname);
3640 ac = ac->next;
3641 }
3642
3643 for(i=0;i<RF_MAXDBGV;i++) {
3644 config->debugVars[i][0] = 0;
3645 }
3646 }
3647
3648 int
3649 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3650 {
3651 RF_ComponentLabel_t *clabel;
3652 int column;
3653 int sparecol;
3654
3655 raidPtr->autoconfigure = new_value;
3656
3657 for(column=0; column<raidPtr->numCol; column++) {
3658 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3659 clabel = raidget_component_label(raidPtr, column);
3660 clabel->autoconfigure = new_value;
3661 raidflush_component_label(raidPtr, column);
3662 }
3663 }
3664 for(column = 0; column < raidPtr->numSpare ; column++) {
3665 sparecol = raidPtr->numCol + column;
3666 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3667 clabel = raidget_component_label(raidPtr, sparecol);
3668 clabel->autoconfigure = new_value;
3669 raidflush_component_label(raidPtr, sparecol);
3670 }
3671 }
3672 return(new_value);
3673 }
3674
3675 int
3676 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3677 {
3678 RF_ComponentLabel_t *clabel;
3679 int column;
3680 int sparecol;
3681
3682 raidPtr->root_partition = new_value;
3683 for(column=0; column<raidPtr->numCol; column++) {
3684 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3685 clabel = raidget_component_label(raidPtr, column);
3686 clabel->root_partition = new_value;
3687 raidflush_component_label(raidPtr, column);
3688 }
3689 }
3690 for(column = 0; column < raidPtr->numSpare ; column++) {
3691 sparecol = raidPtr->numCol + column;
3692 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3693 clabel = raidget_component_label(raidPtr, sparecol);
3694 clabel->root_partition = new_value;
3695 raidflush_component_label(raidPtr, sparecol);
3696 }
3697 }
3698 return(new_value);
3699 }
3700
3701 void
3702 rf_release_all_vps(RF_ConfigSet_t *cset)
3703 {
3704 RF_AutoConfig_t *ac;
3705
3706 ac = cset->ac;
3707 while(ac!=NULL) {
3708 /* Close the vp, and give it back */
3709 if (ac->vp) {
3710 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3711 VOP_CLOSE(ac->vp, FREAD, NOCRED);
3712 vput(ac->vp);
3713 ac->vp = NULL;
3714 }
3715 ac = ac->next;
3716 }
3717 }
3718
3719
3720 void
3721 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3722 {
3723 RF_AutoConfig_t *ac;
3724 RF_AutoConfig_t *next_ac;
3725
3726 ac = cset->ac;
3727 while(ac!=NULL) {
3728 next_ac = ac->next;
3729 /* nuke the label */
3730 free(ac->clabel, M_RAIDFRAME);
3731 /* cleanup the config structure */
3732 free(ac, M_RAIDFRAME);
3733 /* "next.." */
3734 ac = next_ac;
3735 }
3736 /* and, finally, nuke the config set */
3737 free(cset, M_RAIDFRAME);
3738 }
3739
3740
3741 void
3742 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3743 {
3744 /* current version number */
3745 clabel->version = RF_COMPONENT_LABEL_VERSION;
3746 clabel->serial_number = raidPtr->serial_number;
3747 clabel->mod_counter = raidPtr->mod_counter;
3748
3749 clabel->num_rows = 1;
3750 clabel->num_columns = raidPtr->numCol;
3751 clabel->clean = RF_RAID_DIRTY; /* not clean */
3752 clabel->status = rf_ds_optimal; /* "It's good!" */
3753
3754 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3755 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3756 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3757
3758 clabel->blockSize = raidPtr->bytesPerSector;
3759 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3760
3761 /* XXX not portable */
3762 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3763 clabel->maxOutstanding = raidPtr->maxOutstanding;
3764 clabel->autoconfigure = raidPtr->autoconfigure;
3765 clabel->root_partition = raidPtr->root_partition;
3766 clabel->last_unit = raidPtr->raidid;
3767 clabel->config_order = raidPtr->config_order;
3768
3769 #ifndef RF_NO_PARITY_MAP
3770 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3771 #endif
3772 }
3773
3774 struct raid_softc *
3775 rf_auto_config_set(RF_ConfigSet_t *cset)
3776 {
3777 RF_Raid_t *raidPtr;
3778 RF_Config_t *config;
3779 int raidID;
3780 struct raid_softc *sc;
3781
3782 #ifdef DEBUG
3783 printf("RAID autoconfigure\n");
3784 #endif
3785
3786 /* 1. Create a config structure */
3787 config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO);
3788 if (config == NULL) {
3789 printf("Out of mem!?!?\n");
3790 /* XXX do something more intelligent here. */
3791 return NULL;
3792 }
3793
3794 /*
3795 2. Figure out what RAID ID this one is supposed to live at
3796 See if we can get the same RAID dev that it was configured
3797 on last time..
3798 */
3799
3800 raidID = cset->ac->clabel->last_unit;
3801 for (sc = raidget(raidID); sc->sc_r.valid != 0; sc = raidget(++raidID))
3802 continue;
3803 #ifdef DEBUG
3804 printf("Configuring raid%d:\n",raidID);
3805 #endif
3806
3807 raidPtr = &sc->sc_r;
3808
3809 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3810 raidPtr->softc = sc;
3811 raidPtr->raidid = raidID;
3812 raidPtr->openings = RAIDOUTSTANDING;
3813
3814 /* 3. Build the configuration structure */
3815 rf_create_configuration(cset->ac, config, raidPtr);
3816
3817 /* 4. Do the configuration */
3818 if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3819 raidinit(sc);
3820
3821 rf_markalldirty(raidPtr);
3822 raidPtr->autoconfigure = 1; /* XXX do this here? */
3823 switch (cset->ac->clabel->root_partition) {
3824 case 1: /* Force Root */
3825 case 2: /* Soft Root: root when boot partition part of raid */
3826 /*
3827 * everything configured just fine. Make a note
3828 * that this set is eligible to be root,
3829 * or forced to be root
3830 */
3831 cset->rootable = cset->ac->clabel->root_partition;
3832 /* XXX do this here? */
3833 raidPtr->root_partition = cset->rootable;
3834 break;
3835 default:
3836 break;
3837 }
3838 } else {
3839 raidput(sc);
3840 sc = NULL;
3841 }
3842
3843 /* 5. Cleanup */
3844 free(config, M_RAIDFRAME);
3845 return sc;
3846 }
3847
3848 void
3849 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3850 {
3851 struct buf *bp;
3852 struct raid_softc *rs;
3853
3854 bp = (struct buf *)desc->bp;
3855 rs = desc->raidPtr->softc;
3856 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid),
3857 (bp->b_flags & B_READ));
3858 }
3859
3860 void
3861 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3862 size_t xmin, size_t xmax)
3863 {
3864 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3865 pool_sethiwat(p, xmax);
3866 pool_prime(p, xmin);
3867 pool_setlowat(p, xmin);
3868 }
3869
3870 /*
3871 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buf_queue to see
3872 * if there is IO pending and if that IO could possibly be done for a
3873 * given RAID set. Returns 0 if IO is waiting and can be done, 1
3874 * otherwise.
3875 *
3876 */
3877
3878 int
3879 rf_buf_queue_check(RF_Raid_t *raidPtr)
3880 {
3881 struct raid_softc *rs = raidPtr->softc;
3882 if ((bufq_peek(rs->buf_queue) != NULL) && raidPtr->openings > 0) {
3883 /* there is work to do */
3884 return 0;
3885 }
3886 /* default is nothing to do */
3887 return 1;
3888 }
3889
3890 int
3891 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3892 {
3893 uint64_t numsecs;
3894 unsigned secsize;
3895 int error;
3896
3897 error = getdisksize(vp, &numsecs, &secsize);
3898 if (error == 0) {
3899 diskPtr->blockSize = secsize;
3900 diskPtr->numBlocks = numsecs - rf_protectedSectors;
3901 diskPtr->partitionSize = numsecs;
3902 return 0;
3903 }
3904 return error;
3905 }
3906
3907 static int
3908 raid_match(device_t self, cfdata_t cfdata, void *aux)
3909 {
3910 return 1;
3911 }
3912
3913 static void
3914 raid_attach(device_t parent, device_t self, void *aux)
3915 {
3916
3917 }
3918
3919
3920 static int
3921 raid_detach(device_t self, int flags)
3922 {
3923 int error;
3924 struct raid_softc *rs = raidget(device_unit(self));
3925
3926 if (rs == NULL)
3927 return ENXIO;
3928
3929 if ((error = raidlock(rs)) != 0)
3930 return (error);
3931
3932 error = raid_detach_unlocked(rs);
3933
3934 raidunlock(rs);
3935
3936 /* XXXkd: raidput(rs) ??? */
3937
3938 return error;
3939 }
3940
3941 static void
3942 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
3943 {
3944 struct disk_geom *dg = &rs->sc_dkdev.dk_geom;
3945
3946 memset(dg, 0, sizeof(*dg));
3947
3948 dg->dg_secperunit = raidPtr->totalSectors;
3949 dg->dg_secsize = raidPtr->bytesPerSector;
3950 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
3951 dg->dg_ntracks = 4 * raidPtr->numCol;
3952
3953 disk_set_info(rs->sc_dev, &rs->sc_dkdev, NULL);
3954 }
3955
3956 /*
3957 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3958 * We end up returning whatever error was returned by the first cache flush
3959 * that fails.
3960 */
3961
3962 int
3963 rf_sync_component_caches(RF_Raid_t *raidPtr)
3964 {
3965 int c, sparecol;
3966 int e,error;
3967 int force = 1;
3968
3969 error = 0;
3970 for (c = 0; c < raidPtr->numCol; c++) {
3971 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3972 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3973 &force, FWRITE, NOCRED);
3974 if (e) {
3975 if (e != ENODEV)
3976 printf("raid%d: cache flush to component %s failed.\n",
3977 raidPtr->raidid, raidPtr->Disks[c].devname);
3978 if (error == 0) {
3979 error = e;
3980 }
3981 }
3982 }
3983 }
3984
3985 for( c = 0; c < raidPtr->numSpare ; c++) {
3986 sparecol = raidPtr->numCol + c;
3987 /* Need to ensure that the reconstruct actually completed! */
3988 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3989 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3990 DIOCCACHESYNC, &force, FWRITE, NOCRED);
3991 if (e) {
3992 if (e != ENODEV)
3993 printf("raid%d: cache flush to component %s failed.\n",
3994 raidPtr->raidid, raidPtr->Disks[sparecol].devname);
3995 if (error == 0) {
3996 error = e;
3997 }
3998 }
3999 }
4000 }
4001 return error;
4002 }
4003