rf_netbsdkintf.c revision 1.312.2.4 1 /* $NetBSD: rf_netbsdkintf.c,v 1.312.2.4 2014/12/22 02:19:32 msaitoh Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: cd.c 1.6 90/11/28$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
89 * School of Computer Science
90 * Carnegie Mellon University
91 * Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97 /***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.312.2.4 2014/12/22 02:19:32 msaitoh Exp $");
105
106 #ifdef _KERNEL_OPT
107 #include "opt_compat_netbsd.h"
108 #include "opt_raid_autoconfig.h"
109 #endif
110
111 #include <sys/param.h>
112 #include <sys/errno.h>
113 #include <sys/pool.h>
114 #include <sys/proc.h>
115 #include <sys/queue.h>
116 #include <sys/disk.h>
117 #include <sys/device.h>
118 #include <sys/stat.h>
119 #include <sys/ioctl.h>
120 #include <sys/fcntl.h>
121 #include <sys/systm.h>
122 #include <sys/vnode.h>
123 #include <sys/disklabel.h>
124 #include <sys/conf.h>
125 #include <sys/buf.h>
126 #include <sys/bufq.h>
127 #include <sys/reboot.h>
128 #include <sys/kauth.h>
129
130 #include <prop/proplib.h>
131
132 #include <dev/raidframe/raidframevar.h>
133 #include <dev/raidframe/raidframeio.h>
134 #include <dev/raidframe/rf_paritymap.h>
135
136 #include "rf_raid.h"
137 #include "rf_copyback.h"
138 #include "rf_dag.h"
139 #include "rf_dagflags.h"
140 #include "rf_desc.h"
141 #include "rf_diskqueue.h"
142 #include "rf_etimer.h"
143 #include "rf_general.h"
144 #include "rf_kintf.h"
145 #include "rf_options.h"
146 #include "rf_driver.h"
147 #include "rf_parityscan.h"
148 #include "rf_threadstuff.h"
149
150 #ifdef COMPAT_50
151 #include "rf_compat50.h"
152 #endif
153
154 #ifdef DEBUG
155 int rf_kdebug_level = 0;
156 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
157 #else /* DEBUG */
158 #define db1_printf(a) { }
159 #endif /* DEBUG */
160
161 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
162 static rf_declare_mutex2(rf_sparet_wait_mutex);
163 static rf_declare_cond2(rf_sparet_wait_cv);
164 static rf_declare_cond2(rf_sparet_resp_cv);
165
166 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
167 * spare table */
168 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
169 * installation process */
170 #endif
171
172 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
173
174 /* prototypes */
175 static void KernelWakeupFunc(struct buf *);
176 static void InitBP(struct buf *, struct vnode *, unsigned,
177 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
178 void *, int, struct proc *);
179 struct raid_softc;
180 static void raidinit(struct raid_softc *);
181
182 void raidattach(int);
183 static int raid_match(device_t, cfdata_t, void *);
184 static void raid_attach(device_t, device_t, void *);
185 static int raid_detach(device_t, int);
186
187 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
188 daddr_t, daddr_t);
189 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
190 daddr_t, daddr_t, int);
191
192 static int raidwrite_component_label(unsigned,
193 dev_t, struct vnode *, RF_ComponentLabel_t *);
194 static int raidread_component_label(unsigned,
195 dev_t, struct vnode *, RF_ComponentLabel_t *);
196
197
198 dev_type_open(raidopen);
199 dev_type_close(raidclose);
200 dev_type_read(raidread);
201 dev_type_write(raidwrite);
202 dev_type_ioctl(raidioctl);
203 dev_type_strategy(raidstrategy);
204 dev_type_dump(raiddump);
205 dev_type_size(raidsize);
206
207 const struct bdevsw raid_bdevsw = {
208 .d_open = raidopen,
209 .d_close = raidclose,
210 .d_strategy = raidstrategy,
211 .d_ioctl = raidioctl,
212 .d_dump = raiddump,
213 .d_psize = raidsize,
214 .d_discard = nodiscard,
215 .d_flag = D_DISK
216 };
217
218 const struct cdevsw raid_cdevsw = {
219 .d_open = raidopen,
220 .d_close = raidclose,
221 .d_read = raidread,
222 .d_write = raidwrite,
223 .d_ioctl = raidioctl,
224 .d_stop = nostop,
225 .d_tty = notty,
226 .d_poll = nopoll,
227 .d_mmap = nommap,
228 .d_kqfilter = nokqfilter,
229 .d_discard = nodiscard,
230 .d_flag = D_DISK
231 };
232
233 static struct dkdriver rf_dkdriver = { raidstrategy, minphys };
234
235 struct raid_softc {
236 device_t sc_dev;
237 int sc_unit;
238 int sc_flags; /* flags */
239 int sc_cflags; /* configuration flags */
240 uint64_t sc_size; /* size of the raid device */
241 char sc_xname[20]; /* XXX external name */
242 struct disk sc_dkdev; /* generic disk device info */
243 struct bufq_state *buf_queue; /* used for the device queue */
244 RF_Raid_t sc_r;
245 LIST_ENTRY(raid_softc) sc_link;
246 };
247 /* sc_flags */
248 #define RAIDF_INITED 0x01 /* unit has been initialized */
249 #define RAIDF_WLABEL 0x02 /* label area is writable */
250 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
251 #define RAIDF_SHUTDOWN 0x08 /* unit is being shutdown */
252 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
253 #define RAIDF_LOCKED 0x80 /* unit is locked */
254
255 #define raidunit(x) DISKUNIT(x)
256
257 extern struct cfdriver raid_cd;
258 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
259 raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
260 DVF_DETACH_SHUTDOWN);
261
262 /*
263 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
264 * Be aware that large numbers can allow the driver to consume a lot of
265 * kernel memory, especially on writes, and in degraded mode reads.
266 *
267 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
268 * a single 64K write will typically require 64K for the old data,
269 * 64K for the old parity, and 64K for the new parity, for a total
270 * of 192K (if the parity buffer is not re-used immediately).
271 * Even it if is used immediately, that's still 128K, which when multiplied
272 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
273 *
274 * Now in degraded mode, for example, a 64K read on the above setup may
275 * require data reconstruction, which will require *all* of the 4 remaining
276 * disks to participate -- 4 * 32K/disk == 128K again.
277 */
278
279 #ifndef RAIDOUTSTANDING
280 #define RAIDOUTSTANDING 6
281 #endif
282
283 #define RAIDLABELDEV(dev) \
284 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
285
286 /* declared here, and made public, for the benefit of KVM stuff.. */
287
288 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
289 struct disklabel *);
290 static void raidgetdisklabel(dev_t);
291 static void raidmakedisklabel(struct raid_softc *);
292
293 static int raidlock(struct raid_softc *);
294 static void raidunlock(struct raid_softc *);
295
296 static int raid_detach_unlocked(struct raid_softc *);
297
298 static void rf_markalldirty(RF_Raid_t *);
299 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
300
301 void rf_ReconThread(struct rf_recon_req *);
302 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
303 void rf_CopybackThread(RF_Raid_t *raidPtr);
304 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
305 int rf_autoconfig(device_t);
306 void rf_buildroothack(RF_ConfigSet_t *);
307
308 RF_AutoConfig_t *rf_find_raid_components(void);
309 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
310 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
311 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
312 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
313 int rf_set_autoconfig(RF_Raid_t *, int);
314 int rf_set_rootpartition(RF_Raid_t *, int);
315 void rf_release_all_vps(RF_ConfigSet_t *);
316 void rf_cleanup_config_set(RF_ConfigSet_t *);
317 int rf_have_enough_components(RF_ConfigSet_t *);
318 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
319 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
320
321 /*
322 * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
323 * Note that this is overridden by having RAID_AUTOCONFIG as an option
324 * in the kernel config file.
325 */
326 #ifdef RAID_AUTOCONFIG
327 int raidautoconfig = 1;
328 #else
329 int raidautoconfig = 0;
330 #endif
331 static bool raidautoconfigdone = false;
332
333 struct RF_Pools_s rf_pools;
334
335 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
336 static kmutex_t raid_lock;
337
338 static struct raid_softc *
339 raidcreate(int unit) {
340 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
341 if (sc == NULL) {
342 #ifdef DIAGNOSTIC
343 printf("%s: out of memory\n", __func__);
344 #endif
345 return NULL;
346 }
347 sc->sc_unit = unit;
348 bufq_alloc(&sc->buf_queue, "fcfs", BUFQ_SORT_RAWBLOCK);
349 return sc;
350 }
351
352 static void
353 raiddestroy(struct raid_softc *sc) {
354 bufq_free(sc->buf_queue);
355 kmem_free(sc, sizeof(*sc));
356 }
357
358 static struct raid_softc *
359 raidget(int unit) {
360 struct raid_softc *sc;
361 if (unit < 0) {
362 #ifdef DIAGNOSTIC
363 panic("%s: unit %d!", __func__, unit);
364 #endif
365 return NULL;
366 }
367 mutex_enter(&raid_lock);
368 LIST_FOREACH(sc, &raids, sc_link) {
369 if (sc->sc_unit == unit) {
370 mutex_exit(&raid_lock);
371 return sc;
372 }
373 }
374 mutex_exit(&raid_lock);
375 if ((sc = raidcreate(unit)) == NULL)
376 return NULL;
377 mutex_enter(&raid_lock);
378 LIST_INSERT_HEAD(&raids, sc, sc_link);
379 mutex_exit(&raid_lock);
380 return sc;
381 }
382
383 static void
384 raidput(struct raid_softc *sc) {
385 mutex_enter(&raid_lock);
386 LIST_REMOVE(sc, sc_link);
387 mutex_exit(&raid_lock);
388 raiddestroy(sc);
389 }
390
391 void
392 raidattach(int num)
393 {
394 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
395 /* This is where all the initialization stuff gets done. */
396
397 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
398 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
399 rf_init_cond2(rf_sparet_wait_cv, "sparetw");
400 rf_init_cond2(rf_sparet_resp_cv, "rfgst");
401
402 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
403 #endif
404
405 if (rf_BootRaidframe() == 0)
406 aprint_verbose("Kernelized RAIDframe activated\n");
407 else
408 panic("Serious error booting RAID!!");
409
410 if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) {
411 aprint_error("raidattach: config_cfattach_attach failed?\n");
412 }
413
414 raidautoconfigdone = false;
415
416 /*
417 * Register a finalizer which will be used to auto-config RAID
418 * sets once all real hardware devices have been found.
419 */
420 if (config_finalize_register(NULL, rf_autoconfig) != 0)
421 aprint_error("WARNING: unable to register RAIDframe finalizer\n");
422 }
423
424 int
425 rf_autoconfig(device_t self)
426 {
427 RF_AutoConfig_t *ac_list;
428 RF_ConfigSet_t *config_sets;
429
430 if (!raidautoconfig || raidautoconfigdone == true)
431 return (0);
432
433 /* XXX This code can only be run once. */
434 raidautoconfigdone = true;
435
436 #ifdef __HAVE_CPU_BOOTCONF
437 /*
438 * 0. find the boot device if needed first so we can use it later
439 * this needs to be done before we autoconfigure any raid sets,
440 * because if we use wedges we are not going to be able to open
441 * the boot device later
442 */
443 if (booted_device == NULL)
444 cpu_bootconf();
445 #endif
446 /* 1. locate all RAID components on the system */
447 aprint_debug("Searching for RAID components...\n");
448 ac_list = rf_find_raid_components();
449
450 /* 2. Sort them into their respective sets. */
451 config_sets = rf_create_auto_sets(ac_list);
452
453 /*
454 * 3. Evaluate each set and configure the valid ones.
455 * This gets done in rf_buildroothack().
456 */
457 rf_buildroothack(config_sets);
458
459 return 1;
460 }
461
462 static int
463 rf_containsboot(RF_Raid_t *r, device_t bdv) {
464 const char *bootname = device_xname(bdv);
465 size_t len = strlen(bootname);
466
467 for (int col = 0; col < r->numCol; col++) {
468 const char *devname = r->Disks[col].devname;
469 devname += sizeof("/dev/") - 1;
470 if (strncmp(devname, "dk", 2) == 0) {
471 const char *parent =
472 dkwedge_get_parent_name(r->Disks[col].dev);
473 if (parent != NULL)
474 devname = parent;
475 }
476 if (strncmp(devname, bootname, len) == 0) {
477 struct raid_softc *sc = r->softc;
478 aprint_debug("raid%d includes boot device %s\n",
479 sc->sc_unit, devname);
480 return 1;
481 }
482 }
483 return 0;
484 }
485
486 void
487 rf_buildroothack(RF_ConfigSet_t *config_sets)
488 {
489 RF_ConfigSet_t *cset;
490 RF_ConfigSet_t *next_cset;
491 int num_root;
492 struct raid_softc *sc, *rsc;
493
494 sc = rsc = NULL;
495 num_root = 0;
496 cset = config_sets;
497 while (cset != NULL) {
498 next_cset = cset->next;
499 if (rf_have_enough_components(cset) &&
500 cset->ac->clabel->autoconfigure == 1) {
501 sc = rf_auto_config_set(cset);
502 if (sc != NULL) {
503 aprint_debug("raid%d: configured ok\n",
504 sc->sc_unit);
505 if (cset->rootable) {
506 rsc = sc;
507 num_root++;
508 }
509 } else {
510 /* The autoconfig didn't work :( */
511 aprint_debug("Autoconfig failed\n");
512 rf_release_all_vps(cset);
513 }
514 } else {
515 /* we're not autoconfiguring this set...
516 release the associated resources */
517 rf_release_all_vps(cset);
518 }
519 /* cleanup */
520 rf_cleanup_config_set(cset);
521 cset = next_cset;
522 }
523
524 /* if the user has specified what the root device should be
525 then we don't touch booted_device or boothowto... */
526
527 if (rootspec != NULL)
528 return;
529
530 /* we found something bootable... */
531
532 /*
533 * XXX: The following code assumes that the root raid
534 * is the first ('a') partition. This is about the best
535 * we can do with a BSD disklabel, but we might be able
536 * to do better with a GPT label, by setting a specified
537 * attribute to indicate the root partition. We can then
538 * stash the partition number in the r->root_partition
539 * high bits (the bottom 2 bits are already used). For
540 * now we just set booted_partition to 0 when we override
541 * root.
542 */
543 if (num_root == 1) {
544 device_t candidate_root;
545 if (rsc->sc_dkdev.dk_nwedges != 0) {
546 char cname[sizeof(cset->ac->devname)];
547 /* XXX: assume 'a' */
548 snprintf(cname, sizeof(cname), "%s%c",
549 device_xname(rsc->sc_dev), 'a');
550 candidate_root = dkwedge_find_by_wname(cname);
551 } else
552 candidate_root = rsc->sc_dev;
553 if (booted_device == NULL ||
554 rsc->sc_r.root_partition == 1 ||
555 rf_containsboot(&rsc->sc_r, booted_device)) {
556 booted_device = candidate_root;
557 booted_partition = 0; /* XXX assume 'a' */
558 }
559 } else if (num_root > 1) {
560
561 /*
562 * Maybe the MD code can help. If it cannot, then
563 * setroot() will discover that we have no
564 * booted_device and will ask the user if nothing was
565 * hardwired in the kernel config file
566 */
567 if (booted_device == NULL)
568 return;
569
570 num_root = 0;
571 mutex_enter(&raid_lock);
572 LIST_FOREACH(sc, &raids, sc_link) {
573 RF_Raid_t *r = &sc->sc_r;
574 if (r->valid == 0)
575 continue;
576
577 if (r->root_partition == 0)
578 continue;
579
580 if (rf_containsboot(r, booted_device)) {
581 num_root++;
582 rsc = sc;
583 }
584 }
585 mutex_exit(&raid_lock);
586
587 if (num_root == 1) {
588 booted_device = rsc->sc_dev;
589 booted_partition = 0; /* XXX assume 'a' */
590 } else {
591 /* we can't guess.. require the user to answer... */
592 boothowto |= RB_ASKNAME;
593 }
594 }
595 }
596
597
598 int
599 raidsize(dev_t dev)
600 {
601 struct raid_softc *rs;
602 struct disklabel *lp;
603 int part, unit, omask, size;
604
605 unit = raidunit(dev);
606 if ((rs = raidget(unit)) == NULL)
607 return -1;
608 if ((rs->sc_flags & RAIDF_INITED) == 0)
609 return (-1);
610
611 part = DISKPART(dev);
612 omask = rs->sc_dkdev.dk_openmask & (1 << part);
613 lp = rs->sc_dkdev.dk_label;
614
615 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
616 return (-1);
617
618 if (lp->d_partitions[part].p_fstype != FS_SWAP)
619 size = -1;
620 else
621 size = lp->d_partitions[part].p_size *
622 (lp->d_secsize / DEV_BSIZE);
623
624 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
625 return (-1);
626
627 return (size);
628
629 }
630
631 int
632 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
633 {
634 int unit = raidunit(dev);
635 struct raid_softc *rs;
636 const struct bdevsw *bdev;
637 struct disklabel *lp;
638 RF_Raid_t *raidPtr;
639 daddr_t offset;
640 int part, c, sparecol, j, scol, dumpto;
641 int error = 0;
642
643 if ((rs = raidget(unit)) == NULL)
644 return ENXIO;
645
646 raidPtr = &rs->sc_r;
647
648 if ((rs->sc_flags & RAIDF_INITED) == 0)
649 return ENXIO;
650
651 /* we only support dumping to RAID 1 sets */
652 if (raidPtr->Layout.numDataCol != 1 ||
653 raidPtr->Layout.numParityCol != 1)
654 return EINVAL;
655
656
657 if ((error = raidlock(rs)) != 0)
658 return error;
659
660 if (size % DEV_BSIZE != 0) {
661 error = EINVAL;
662 goto out;
663 }
664
665 if (blkno + size / DEV_BSIZE > rs->sc_size) {
666 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
667 "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
668 size / DEV_BSIZE, rs->sc_size);
669 error = EINVAL;
670 goto out;
671 }
672
673 part = DISKPART(dev);
674 lp = rs->sc_dkdev.dk_label;
675 offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
676
677 /* figure out what device is alive.. */
678
679 /*
680 Look for a component to dump to. The preference for the
681 component to dump to is as follows:
682 1) the master
683 2) a used_spare of the master
684 3) the slave
685 4) a used_spare of the slave
686 */
687
688 dumpto = -1;
689 for (c = 0; c < raidPtr->numCol; c++) {
690 if (raidPtr->Disks[c].status == rf_ds_optimal) {
691 /* this might be the one */
692 dumpto = c;
693 break;
694 }
695 }
696
697 /*
698 At this point we have possibly selected a live master or a
699 live slave. We now check to see if there is a spared
700 master (or a spared slave), if we didn't find a live master
701 or a live slave.
702 */
703
704 for (c = 0; c < raidPtr->numSpare; c++) {
705 sparecol = raidPtr->numCol + c;
706 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
707 /* How about this one? */
708 scol = -1;
709 for(j=0;j<raidPtr->numCol;j++) {
710 if (raidPtr->Disks[j].spareCol == sparecol) {
711 scol = j;
712 break;
713 }
714 }
715 if (scol == 0) {
716 /*
717 We must have found a spared master!
718 We'll take that over anything else
719 found so far. (We couldn't have
720 found a real master before, since
721 this is a used spare, and it's
722 saying that it's replacing the
723 master.) On reboot (with
724 autoconfiguration turned on)
725 sparecol will become the 1st
726 component (component0) of this set.
727 */
728 dumpto = sparecol;
729 break;
730 } else if (scol != -1) {
731 /*
732 Must be a spared slave. We'll dump
733 to that if we havn't found anything
734 else so far.
735 */
736 if (dumpto == -1)
737 dumpto = sparecol;
738 }
739 }
740 }
741
742 if (dumpto == -1) {
743 /* we couldn't find any live components to dump to!?!?
744 */
745 error = EINVAL;
746 goto out;
747 }
748
749 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
750
751 /*
752 Note that blkno is relative to this particular partition.
753 By adding the offset of this partition in the RAID
754 set, and also adding RF_PROTECTED_SECTORS, we get a
755 value that is relative to the partition used for the
756 underlying component.
757 */
758
759 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
760 blkno + offset, va, size);
761
762 out:
763 raidunlock(rs);
764
765 return error;
766 }
767 /* ARGSUSED */
768 int
769 raidopen(dev_t dev, int flags, int fmt,
770 struct lwp *l)
771 {
772 int unit = raidunit(dev);
773 struct raid_softc *rs;
774 struct disklabel *lp;
775 int part, pmask;
776 int error = 0;
777
778 if ((rs = raidget(unit)) == NULL)
779 return ENXIO;
780 if ((error = raidlock(rs)) != 0)
781 return (error);
782
783 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
784 error = EBUSY;
785 goto bad;
786 }
787
788 lp = rs->sc_dkdev.dk_label;
789
790 part = DISKPART(dev);
791
792 /*
793 * If there are wedges, and this is not RAW_PART, then we
794 * need to fail.
795 */
796 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
797 error = EBUSY;
798 goto bad;
799 }
800 pmask = (1 << part);
801
802 if ((rs->sc_flags & RAIDF_INITED) &&
803 (rs->sc_dkdev.dk_nwedges == 0) &&
804 (rs->sc_dkdev.dk_openmask == 0))
805 raidgetdisklabel(dev);
806
807 /* make sure that this partition exists */
808
809 if (part != RAW_PART) {
810 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
811 ((part >= lp->d_npartitions) ||
812 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
813 error = ENXIO;
814 goto bad;
815 }
816 }
817 /* Prevent this unit from being unconfigured while open. */
818 switch (fmt) {
819 case S_IFCHR:
820 rs->sc_dkdev.dk_copenmask |= pmask;
821 break;
822
823 case S_IFBLK:
824 rs->sc_dkdev.dk_bopenmask |= pmask;
825 break;
826 }
827
828 if ((rs->sc_dkdev.dk_openmask == 0) &&
829 ((rs->sc_flags & RAIDF_INITED) != 0)) {
830 /* First one... mark things as dirty... Note that we *MUST*
831 have done a configure before this. I DO NOT WANT TO BE
832 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
833 THAT THEY BELONG TOGETHER!!!!! */
834 /* XXX should check to see if we're only open for reading
835 here... If so, we needn't do this, but then need some
836 other way of keeping track of what's happened.. */
837
838 rf_markalldirty(&rs->sc_r);
839 }
840
841
842 rs->sc_dkdev.dk_openmask =
843 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
844
845 bad:
846 raidunlock(rs);
847
848 return (error);
849
850
851 }
852 /* ARGSUSED */
853 int
854 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
855 {
856 int unit = raidunit(dev);
857 struct raid_softc *rs;
858 int error = 0;
859 int part;
860
861 if ((rs = raidget(unit)) == NULL)
862 return ENXIO;
863
864 if ((error = raidlock(rs)) != 0)
865 return (error);
866
867 part = DISKPART(dev);
868
869 /* ...that much closer to allowing unconfiguration... */
870 switch (fmt) {
871 case S_IFCHR:
872 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
873 break;
874
875 case S_IFBLK:
876 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
877 break;
878 }
879 rs->sc_dkdev.dk_openmask =
880 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
881
882 if ((rs->sc_dkdev.dk_openmask == 0) &&
883 ((rs->sc_flags & RAIDF_INITED) != 0)) {
884 /* Last one... device is not unconfigured yet.
885 Device shutdown has taken care of setting the
886 clean bits if RAIDF_INITED is not set
887 mark things as clean... */
888
889 rf_update_component_labels(&rs->sc_r,
890 RF_FINAL_COMPONENT_UPDATE);
891
892 /* If the kernel is shutting down, it will detach
893 * this RAID set soon enough.
894 */
895 }
896
897 raidunlock(rs);
898 return (0);
899
900 }
901
902 void
903 raidstrategy(struct buf *bp)
904 {
905 unsigned int unit = raidunit(bp->b_dev);
906 RF_Raid_t *raidPtr;
907 int wlabel;
908 struct raid_softc *rs;
909
910 if ((rs = raidget(unit)) == NULL) {
911 bp->b_error = ENXIO;
912 goto done;
913 }
914 if ((rs->sc_flags & RAIDF_INITED) == 0) {
915 bp->b_error = ENXIO;
916 goto done;
917 }
918 raidPtr = &rs->sc_r;
919 if (!raidPtr->valid) {
920 bp->b_error = ENODEV;
921 goto done;
922 }
923 if (bp->b_bcount == 0) {
924 db1_printf(("b_bcount is zero..\n"));
925 goto done;
926 }
927
928 /*
929 * Do bounds checking and adjust transfer. If there's an
930 * error, the bounds check will flag that for us.
931 */
932
933 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
934 if (DISKPART(bp->b_dev) == RAW_PART) {
935 uint64_t size; /* device size in DEV_BSIZE unit */
936
937 if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
938 size = raidPtr->totalSectors <<
939 (raidPtr->logBytesPerSector - DEV_BSHIFT);
940 } else {
941 size = raidPtr->totalSectors >>
942 (DEV_BSHIFT - raidPtr->logBytesPerSector);
943 }
944 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
945 goto done;
946 }
947 } else {
948 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
949 db1_printf(("Bounds check failed!!:%d %d\n",
950 (int) bp->b_blkno, (int) wlabel));
951 goto done;
952 }
953 }
954
955 rf_lock_mutex2(raidPtr->iodone_lock);
956
957 bp->b_resid = 0;
958
959 /* stuff it onto our queue */
960 bufq_put(rs->buf_queue, bp);
961
962 /* scheduled the IO to happen at the next convenient time */
963 rf_signal_cond2(raidPtr->iodone_cv);
964 rf_unlock_mutex2(raidPtr->iodone_lock);
965
966 return;
967
968 done:
969 bp->b_resid = bp->b_bcount;
970 biodone(bp);
971 }
972 /* ARGSUSED */
973 int
974 raidread(dev_t dev, struct uio *uio, int flags)
975 {
976 int unit = raidunit(dev);
977 struct raid_softc *rs;
978
979 if ((rs = raidget(unit)) == NULL)
980 return ENXIO;
981
982 if ((rs->sc_flags & RAIDF_INITED) == 0)
983 return (ENXIO);
984
985 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
986
987 }
988 /* ARGSUSED */
989 int
990 raidwrite(dev_t dev, struct uio *uio, int flags)
991 {
992 int unit = raidunit(dev);
993 struct raid_softc *rs;
994
995 if ((rs = raidget(unit)) == NULL)
996 return ENXIO;
997
998 if ((rs->sc_flags & RAIDF_INITED) == 0)
999 return (ENXIO);
1000
1001 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
1002
1003 }
1004
1005 static int
1006 raid_detach_unlocked(struct raid_softc *rs)
1007 {
1008 int error;
1009 RF_Raid_t *raidPtr;
1010
1011 raidPtr = &rs->sc_r;
1012
1013 /*
1014 * If somebody has a partition mounted, we shouldn't
1015 * shutdown.
1016 */
1017 if (rs->sc_dkdev.dk_openmask != 0)
1018 return EBUSY;
1019
1020 if ((rs->sc_flags & RAIDF_INITED) == 0)
1021 ; /* not initialized: nothing to do */
1022 else if ((error = rf_Shutdown(raidPtr)) != 0)
1023 return error;
1024 else
1025 rs->sc_flags &= ~(RAIDF_INITED|RAIDF_SHUTDOWN);
1026
1027 /* Detach the disk. */
1028 dkwedge_delall(&rs->sc_dkdev);
1029 disk_detach(&rs->sc_dkdev);
1030 disk_destroy(&rs->sc_dkdev);
1031
1032 aprint_normal_dev(rs->sc_dev, "detached\n");
1033
1034 return 0;
1035 }
1036
1037 int
1038 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1039 {
1040 int unit = raidunit(dev);
1041 int error = 0;
1042 int part, pmask, s;
1043 cfdata_t cf;
1044 struct raid_softc *rs;
1045 RF_Config_t *k_cfg, *u_cfg;
1046 RF_Raid_t *raidPtr;
1047 RF_RaidDisk_t *diskPtr;
1048 RF_AccTotals_t *totals;
1049 RF_DeviceConfig_t *d_cfg, **ucfgp;
1050 u_char *specific_buf;
1051 int retcode = 0;
1052 int column;
1053 /* int raidid; */
1054 struct rf_recon_req *rrcopy, *rr;
1055 RF_ComponentLabel_t *clabel;
1056 RF_ComponentLabel_t *ci_label;
1057 RF_ComponentLabel_t **clabel_ptr;
1058 RF_SingleComponent_t *sparePtr,*componentPtr;
1059 RF_SingleComponent_t component;
1060 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
1061 int i, j, d;
1062 #ifdef __HAVE_OLD_DISKLABEL
1063 struct disklabel newlabel;
1064 #endif
1065 struct dkwedge_info *dkw;
1066
1067 if ((rs = raidget(unit)) == NULL)
1068 return ENXIO;
1069 raidPtr = &rs->sc_r;
1070
1071 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1072 (int) DISKPART(dev), (int) unit, cmd));
1073
1074 /* Must be open for writes for these commands... */
1075 switch (cmd) {
1076 #ifdef DIOCGSECTORSIZE
1077 case DIOCGSECTORSIZE:
1078 *(u_int *)data = raidPtr->bytesPerSector;
1079 return 0;
1080 case DIOCGMEDIASIZE:
1081 *(off_t *)data =
1082 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
1083 return 0;
1084 #endif
1085 case DIOCSDINFO:
1086 case DIOCWDINFO:
1087 #ifdef __HAVE_OLD_DISKLABEL
1088 case ODIOCWDINFO:
1089 case ODIOCSDINFO:
1090 #endif
1091 case DIOCWLABEL:
1092 case DIOCAWEDGE:
1093 case DIOCDWEDGE:
1094 case DIOCMWEDGES:
1095 case DIOCSSTRATEGY:
1096 if ((flag & FWRITE) == 0)
1097 return (EBADF);
1098 }
1099
1100 /* Must be initialized for these... */
1101 switch (cmd) {
1102 case DIOCGDINFO:
1103 case DIOCSDINFO:
1104 case DIOCWDINFO:
1105 #ifdef __HAVE_OLD_DISKLABEL
1106 case ODIOCGDINFO:
1107 case ODIOCWDINFO:
1108 case ODIOCSDINFO:
1109 case ODIOCGDEFLABEL:
1110 #endif
1111 case DIOCGPART:
1112 case DIOCWLABEL:
1113 case DIOCGDEFLABEL:
1114 case DIOCAWEDGE:
1115 case DIOCDWEDGE:
1116 case DIOCLWEDGES:
1117 case DIOCMWEDGES:
1118 case DIOCCACHESYNC:
1119 case RAIDFRAME_SHUTDOWN:
1120 case RAIDFRAME_REWRITEPARITY:
1121 case RAIDFRAME_GET_INFO:
1122 case RAIDFRAME_RESET_ACCTOTALS:
1123 case RAIDFRAME_GET_ACCTOTALS:
1124 case RAIDFRAME_KEEP_ACCTOTALS:
1125 case RAIDFRAME_GET_SIZE:
1126 case RAIDFRAME_FAIL_DISK:
1127 case RAIDFRAME_COPYBACK:
1128 case RAIDFRAME_CHECK_RECON_STATUS:
1129 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1130 case RAIDFRAME_GET_COMPONENT_LABEL:
1131 case RAIDFRAME_SET_COMPONENT_LABEL:
1132 case RAIDFRAME_ADD_HOT_SPARE:
1133 case RAIDFRAME_REMOVE_HOT_SPARE:
1134 case RAIDFRAME_INIT_LABELS:
1135 case RAIDFRAME_REBUILD_IN_PLACE:
1136 case RAIDFRAME_CHECK_PARITY:
1137 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1138 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1139 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1140 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1141 case RAIDFRAME_SET_AUTOCONFIG:
1142 case RAIDFRAME_SET_ROOT:
1143 case RAIDFRAME_DELETE_COMPONENT:
1144 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1145 case RAIDFRAME_PARITYMAP_STATUS:
1146 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1147 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1148 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1149 case DIOCGSTRATEGY:
1150 case DIOCSSTRATEGY:
1151 if ((rs->sc_flags & RAIDF_INITED) == 0)
1152 return (ENXIO);
1153 }
1154
1155 switch (cmd) {
1156 #ifdef COMPAT_50
1157 case RAIDFRAME_GET_INFO50:
1158 return rf_get_info50(raidPtr, data);
1159
1160 case RAIDFRAME_CONFIGURE50:
1161 if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0)
1162 return retcode;
1163 goto config;
1164 #endif
1165 /* configure the system */
1166 case RAIDFRAME_CONFIGURE:
1167
1168 if (raidPtr->valid) {
1169 /* There is a valid RAID set running on this unit! */
1170 printf("raid%d: Device already configured!\n",unit);
1171 return(EINVAL);
1172 }
1173
1174 /* copy-in the configuration information */
1175 /* data points to a pointer to the configuration structure */
1176
1177 u_cfg = *((RF_Config_t **) data);
1178 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1179 if (k_cfg == NULL) {
1180 return (ENOMEM);
1181 }
1182 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1183 if (retcode) {
1184 RF_Free(k_cfg, sizeof(RF_Config_t));
1185 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1186 retcode));
1187 return (retcode);
1188 }
1189 goto config;
1190 config:
1191 /* allocate a buffer for the layout-specific data, and copy it
1192 * in */
1193 if (k_cfg->layoutSpecificSize) {
1194 if (k_cfg->layoutSpecificSize > 10000) {
1195 /* sanity check */
1196 RF_Free(k_cfg, sizeof(RF_Config_t));
1197 return (EINVAL);
1198 }
1199 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1200 (u_char *));
1201 if (specific_buf == NULL) {
1202 RF_Free(k_cfg, sizeof(RF_Config_t));
1203 return (ENOMEM);
1204 }
1205 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1206 k_cfg->layoutSpecificSize);
1207 if (retcode) {
1208 RF_Free(k_cfg, sizeof(RF_Config_t));
1209 RF_Free(specific_buf,
1210 k_cfg->layoutSpecificSize);
1211 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1212 retcode));
1213 return (retcode);
1214 }
1215 } else
1216 specific_buf = NULL;
1217 k_cfg->layoutSpecific = specific_buf;
1218
1219 /* should do some kind of sanity check on the configuration.
1220 * Store the sum of all the bytes in the last byte? */
1221
1222 /* configure the system */
1223
1224 /*
1225 * Clear the entire RAID descriptor, just to make sure
1226 * there is no stale data left in the case of a
1227 * reconfiguration
1228 */
1229 memset(raidPtr, 0, sizeof(*raidPtr));
1230 raidPtr->softc = rs;
1231 raidPtr->raidid = unit;
1232
1233 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1234
1235 if (retcode == 0) {
1236
1237 /* allow this many simultaneous IO's to
1238 this RAID device */
1239 raidPtr->openings = RAIDOUTSTANDING;
1240
1241 raidinit(rs);
1242 rf_markalldirty(raidPtr);
1243 }
1244 /* free the buffers. No return code here. */
1245 if (k_cfg->layoutSpecificSize) {
1246 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1247 }
1248 RF_Free(k_cfg, sizeof(RF_Config_t));
1249
1250 return (retcode);
1251
1252 /* shutdown the system */
1253 case RAIDFRAME_SHUTDOWN:
1254
1255 part = DISKPART(dev);
1256 pmask = (1 << part);
1257
1258 if ((error = raidlock(rs)) != 0)
1259 return (error);
1260
1261 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
1262 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1263 (rs->sc_dkdev.dk_copenmask & pmask)))
1264 retcode = EBUSY;
1265 else {
1266 rs->sc_flags |= RAIDF_SHUTDOWN;
1267 rs->sc_dkdev.dk_copenmask &= ~pmask;
1268 rs->sc_dkdev.dk_bopenmask &= ~pmask;
1269 rs->sc_dkdev.dk_openmask &= ~pmask;
1270 retcode = 0;
1271 }
1272
1273 raidunlock(rs);
1274
1275 if (retcode != 0)
1276 return retcode;
1277
1278 /* free the pseudo device attach bits */
1279
1280 cf = device_cfdata(rs->sc_dev);
1281 if ((retcode = config_detach(rs->sc_dev, DETACH_QUIET)) == 0)
1282 free(cf, M_RAIDFRAME);
1283
1284 return (retcode);
1285 case RAIDFRAME_GET_COMPONENT_LABEL:
1286 clabel_ptr = (RF_ComponentLabel_t **) data;
1287 /* need to read the component label for the disk indicated
1288 by row,column in clabel */
1289
1290 /*
1291 * Perhaps there should be an option to skip the in-core
1292 * copy and hit the disk, as with disklabel(8).
1293 */
1294 RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *));
1295
1296 retcode = copyin(*clabel_ptr, clabel, sizeof(*clabel));
1297
1298 if (retcode) {
1299 RF_Free(clabel, sizeof(*clabel));
1300 return retcode;
1301 }
1302
1303 clabel->row = 0; /* Don't allow looking at anything else.*/
1304
1305 column = clabel->column;
1306
1307 if ((column < 0) || (column >= raidPtr->numCol +
1308 raidPtr->numSpare)) {
1309 RF_Free(clabel, sizeof(*clabel));
1310 return EINVAL;
1311 }
1312
1313 RF_Free(clabel, sizeof(*clabel));
1314
1315 clabel = raidget_component_label(raidPtr, column);
1316
1317 return copyout(clabel, *clabel_ptr, sizeof(**clabel_ptr));
1318
1319 #if 0
1320 case RAIDFRAME_SET_COMPONENT_LABEL:
1321 clabel = (RF_ComponentLabel_t *) data;
1322
1323 /* XXX check the label for valid stuff... */
1324 /* Note that some things *should not* get modified --
1325 the user should be re-initing the labels instead of
1326 trying to patch things.
1327 */
1328
1329 raidid = raidPtr->raidid;
1330 #ifdef DEBUG
1331 printf("raid%d: Got component label:\n", raidid);
1332 printf("raid%d: Version: %d\n", raidid, clabel->version);
1333 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1334 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1335 printf("raid%d: Column: %d\n", raidid, clabel->column);
1336 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1337 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1338 printf("raid%d: Status: %d\n", raidid, clabel->status);
1339 #endif
1340 clabel->row = 0;
1341 column = clabel->column;
1342
1343 if ((column < 0) || (column >= raidPtr->numCol)) {
1344 return(EINVAL);
1345 }
1346
1347 /* XXX this isn't allowed to do anything for now :-) */
1348
1349 /* XXX and before it is, we need to fill in the rest
1350 of the fields!?!?!?! */
1351 memcpy(raidget_component_label(raidPtr, column),
1352 clabel, sizeof(*clabel));
1353 raidflush_component_label(raidPtr, column);
1354 return (0);
1355 #endif
1356
1357 case RAIDFRAME_INIT_LABELS:
1358 clabel = (RF_ComponentLabel_t *) data;
1359 /*
1360 we only want the serial number from
1361 the above. We get all the rest of the information
1362 from the config that was used to create this RAID
1363 set.
1364 */
1365
1366 raidPtr->serial_number = clabel->serial_number;
1367
1368 for(column=0;column<raidPtr->numCol;column++) {
1369 diskPtr = &raidPtr->Disks[column];
1370 if (!RF_DEAD_DISK(diskPtr->status)) {
1371 ci_label = raidget_component_label(raidPtr,
1372 column);
1373 /* Zeroing this is important. */
1374 memset(ci_label, 0, sizeof(*ci_label));
1375 raid_init_component_label(raidPtr, ci_label);
1376 ci_label->serial_number =
1377 raidPtr->serial_number;
1378 ci_label->row = 0; /* we dont' pretend to support more */
1379 rf_component_label_set_partitionsize(ci_label,
1380 diskPtr->partitionSize);
1381 ci_label->column = column;
1382 raidflush_component_label(raidPtr, column);
1383 }
1384 /* XXXjld what about the spares? */
1385 }
1386
1387 return (retcode);
1388 case RAIDFRAME_SET_AUTOCONFIG:
1389 d = rf_set_autoconfig(raidPtr, *(int *) data);
1390 printf("raid%d: New autoconfig value is: %d\n",
1391 raidPtr->raidid, d);
1392 *(int *) data = d;
1393 return (retcode);
1394
1395 case RAIDFRAME_SET_ROOT:
1396 d = rf_set_rootpartition(raidPtr, *(int *) data);
1397 printf("raid%d: New rootpartition value is: %d\n",
1398 raidPtr->raidid, d);
1399 *(int *) data = d;
1400 return (retcode);
1401
1402 /* initialize all parity */
1403 case RAIDFRAME_REWRITEPARITY:
1404
1405 if (raidPtr->Layout.map->faultsTolerated == 0) {
1406 /* Parity for RAID 0 is trivially correct */
1407 raidPtr->parity_good = RF_RAID_CLEAN;
1408 return(0);
1409 }
1410
1411 if (raidPtr->parity_rewrite_in_progress == 1) {
1412 /* Re-write is already in progress! */
1413 return(EINVAL);
1414 }
1415
1416 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1417 rf_RewriteParityThread,
1418 raidPtr,"raid_parity");
1419 return (retcode);
1420
1421
1422 case RAIDFRAME_ADD_HOT_SPARE:
1423 sparePtr = (RF_SingleComponent_t *) data;
1424 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1425 retcode = rf_add_hot_spare(raidPtr, &component);
1426 return(retcode);
1427
1428 case RAIDFRAME_REMOVE_HOT_SPARE:
1429 return(retcode);
1430
1431 case RAIDFRAME_DELETE_COMPONENT:
1432 componentPtr = (RF_SingleComponent_t *)data;
1433 memcpy( &component, componentPtr,
1434 sizeof(RF_SingleComponent_t));
1435 retcode = rf_delete_component(raidPtr, &component);
1436 return(retcode);
1437
1438 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1439 componentPtr = (RF_SingleComponent_t *)data;
1440 memcpy( &component, componentPtr,
1441 sizeof(RF_SingleComponent_t));
1442 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1443 return(retcode);
1444
1445 case RAIDFRAME_REBUILD_IN_PLACE:
1446
1447 if (raidPtr->Layout.map->faultsTolerated == 0) {
1448 /* Can't do this on a RAID 0!! */
1449 return(EINVAL);
1450 }
1451
1452 if (raidPtr->recon_in_progress == 1) {
1453 /* a reconstruct is already in progress! */
1454 return(EINVAL);
1455 }
1456
1457 componentPtr = (RF_SingleComponent_t *) data;
1458 memcpy( &component, componentPtr,
1459 sizeof(RF_SingleComponent_t));
1460 component.row = 0; /* we don't support any more */
1461 column = component.column;
1462
1463 if ((column < 0) || (column >= raidPtr->numCol)) {
1464 return(EINVAL);
1465 }
1466
1467 rf_lock_mutex2(raidPtr->mutex);
1468 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1469 (raidPtr->numFailures > 0)) {
1470 /* XXX 0 above shouldn't be constant!!! */
1471 /* some component other than this has failed.
1472 Let's not make things worse than they already
1473 are... */
1474 printf("raid%d: Unable to reconstruct to disk at:\n",
1475 raidPtr->raidid);
1476 printf("raid%d: Col: %d Too many failures.\n",
1477 raidPtr->raidid, column);
1478 rf_unlock_mutex2(raidPtr->mutex);
1479 return (EINVAL);
1480 }
1481 if (raidPtr->Disks[column].status ==
1482 rf_ds_reconstructing) {
1483 printf("raid%d: Unable to reconstruct to disk at:\n",
1484 raidPtr->raidid);
1485 printf("raid%d: Col: %d Reconstruction already occurring!\n", raidPtr->raidid, column);
1486
1487 rf_unlock_mutex2(raidPtr->mutex);
1488 return (EINVAL);
1489 }
1490 if (raidPtr->Disks[column].status == rf_ds_spared) {
1491 rf_unlock_mutex2(raidPtr->mutex);
1492 return (EINVAL);
1493 }
1494 rf_unlock_mutex2(raidPtr->mutex);
1495
1496 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1497 if (rrcopy == NULL)
1498 return(ENOMEM);
1499
1500 rrcopy->raidPtr = (void *) raidPtr;
1501 rrcopy->col = column;
1502
1503 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1504 rf_ReconstructInPlaceThread,
1505 rrcopy,"raid_reconip");
1506 return(retcode);
1507
1508 case RAIDFRAME_GET_INFO:
1509 if (!raidPtr->valid)
1510 return (ENODEV);
1511 ucfgp = (RF_DeviceConfig_t **) data;
1512 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1513 (RF_DeviceConfig_t *));
1514 if (d_cfg == NULL)
1515 return (ENOMEM);
1516 d_cfg->rows = 1; /* there is only 1 row now */
1517 d_cfg->cols = raidPtr->numCol;
1518 d_cfg->ndevs = raidPtr->numCol;
1519 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1520 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1521 return (ENOMEM);
1522 }
1523 d_cfg->nspares = raidPtr->numSpare;
1524 if (d_cfg->nspares >= RF_MAX_DISKS) {
1525 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1526 return (ENOMEM);
1527 }
1528 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1529 d = 0;
1530 for (j = 0; j < d_cfg->cols; j++) {
1531 d_cfg->devs[d] = raidPtr->Disks[j];
1532 d++;
1533 }
1534 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1535 d_cfg->spares[i] = raidPtr->Disks[j];
1536 if (d_cfg->spares[i].status == rf_ds_rebuilding_spare) {
1537 /* XXX: raidctl(8) expects to see this as a used spare */
1538 d_cfg->spares[i].status = rf_ds_used_spare;
1539 }
1540 }
1541 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1542 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1543
1544 return (retcode);
1545
1546 case RAIDFRAME_CHECK_PARITY:
1547 *(int *) data = raidPtr->parity_good;
1548 return (0);
1549
1550 case RAIDFRAME_PARITYMAP_STATUS:
1551 if (rf_paritymap_ineligible(raidPtr))
1552 return EINVAL;
1553 rf_paritymap_status(raidPtr->parity_map,
1554 (struct rf_pmstat *)data);
1555 return 0;
1556
1557 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1558 if (rf_paritymap_ineligible(raidPtr))
1559 return EINVAL;
1560 if (raidPtr->parity_map == NULL)
1561 return ENOENT; /* ??? */
1562 if (0 != rf_paritymap_set_params(raidPtr->parity_map,
1563 (struct rf_pmparams *)data, 1))
1564 return EINVAL;
1565 return 0;
1566
1567 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1568 if (rf_paritymap_ineligible(raidPtr))
1569 return EINVAL;
1570 *(int *) data = rf_paritymap_get_disable(raidPtr);
1571 return 0;
1572
1573 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1574 if (rf_paritymap_ineligible(raidPtr))
1575 return EINVAL;
1576 rf_paritymap_set_disable(raidPtr, *(int *)data);
1577 /* XXX should errors be passed up? */
1578 return 0;
1579
1580 case RAIDFRAME_RESET_ACCTOTALS:
1581 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1582 return (0);
1583
1584 case RAIDFRAME_GET_ACCTOTALS:
1585 totals = (RF_AccTotals_t *) data;
1586 *totals = raidPtr->acc_totals;
1587 return (0);
1588
1589 case RAIDFRAME_KEEP_ACCTOTALS:
1590 raidPtr->keep_acc_totals = *(int *)data;
1591 return (0);
1592
1593 case RAIDFRAME_GET_SIZE:
1594 *(int *) data = raidPtr->totalSectors;
1595 return (0);
1596
1597 /* fail a disk & optionally start reconstruction */
1598 case RAIDFRAME_FAIL_DISK:
1599
1600 if (raidPtr->Layout.map->faultsTolerated == 0) {
1601 /* Can't do this on a RAID 0!! */
1602 return(EINVAL);
1603 }
1604
1605 rr = (struct rf_recon_req *) data;
1606 rr->row = 0;
1607 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1608 return (EINVAL);
1609
1610
1611 rf_lock_mutex2(raidPtr->mutex);
1612 if (raidPtr->status == rf_rs_reconstructing) {
1613 /* you can't fail a disk while we're reconstructing! */
1614 /* XXX wrong for RAID6 */
1615 rf_unlock_mutex2(raidPtr->mutex);
1616 return (EINVAL);
1617 }
1618 if ((raidPtr->Disks[rr->col].status ==
1619 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1620 /* some other component has failed. Let's not make
1621 things worse. XXX wrong for RAID6 */
1622 rf_unlock_mutex2(raidPtr->mutex);
1623 return (EINVAL);
1624 }
1625 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1626 /* Can't fail a spared disk! */
1627 rf_unlock_mutex2(raidPtr->mutex);
1628 return (EINVAL);
1629 }
1630 rf_unlock_mutex2(raidPtr->mutex);
1631
1632 /* make a copy of the recon request so that we don't rely on
1633 * the user's buffer */
1634 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1635 if (rrcopy == NULL)
1636 return(ENOMEM);
1637 memcpy(rrcopy, rr, sizeof(*rr));
1638 rrcopy->raidPtr = (void *) raidPtr;
1639
1640 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1641 rf_ReconThread,
1642 rrcopy,"raid_recon");
1643 return (0);
1644
1645 /* invoke a copyback operation after recon on whatever disk
1646 * needs it, if any */
1647 case RAIDFRAME_COPYBACK:
1648
1649 if (raidPtr->Layout.map->faultsTolerated == 0) {
1650 /* This makes no sense on a RAID 0!! */
1651 return(EINVAL);
1652 }
1653
1654 if (raidPtr->copyback_in_progress == 1) {
1655 /* Copyback is already in progress! */
1656 return(EINVAL);
1657 }
1658
1659 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1660 rf_CopybackThread,
1661 raidPtr,"raid_copyback");
1662 return (retcode);
1663
1664 /* return the percentage completion of reconstruction */
1665 case RAIDFRAME_CHECK_RECON_STATUS:
1666 if (raidPtr->Layout.map->faultsTolerated == 0) {
1667 /* This makes no sense on a RAID 0, so tell the
1668 user it's done. */
1669 *(int *) data = 100;
1670 return(0);
1671 }
1672 if (raidPtr->status != rf_rs_reconstructing)
1673 *(int *) data = 100;
1674 else {
1675 if (raidPtr->reconControl->numRUsTotal > 0) {
1676 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1677 } else {
1678 *(int *) data = 0;
1679 }
1680 }
1681 return (0);
1682 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1683 progressInfoPtr = (RF_ProgressInfo_t **) data;
1684 if (raidPtr->status != rf_rs_reconstructing) {
1685 progressInfo.remaining = 0;
1686 progressInfo.completed = 100;
1687 progressInfo.total = 100;
1688 } else {
1689 progressInfo.total =
1690 raidPtr->reconControl->numRUsTotal;
1691 progressInfo.completed =
1692 raidPtr->reconControl->numRUsComplete;
1693 progressInfo.remaining = progressInfo.total -
1694 progressInfo.completed;
1695 }
1696 retcode = copyout(&progressInfo, *progressInfoPtr,
1697 sizeof(RF_ProgressInfo_t));
1698 return (retcode);
1699
1700 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1701 if (raidPtr->Layout.map->faultsTolerated == 0) {
1702 /* This makes no sense on a RAID 0, so tell the
1703 user it's done. */
1704 *(int *) data = 100;
1705 return(0);
1706 }
1707 if (raidPtr->parity_rewrite_in_progress == 1) {
1708 *(int *) data = 100 *
1709 raidPtr->parity_rewrite_stripes_done /
1710 raidPtr->Layout.numStripe;
1711 } else {
1712 *(int *) data = 100;
1713 }
1714 return (0);
1715
1716 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1717 progressInfoPtr = (RF_ProgressInfo_t **) data;
1718 if (raidPtr->parity_rewrite_in_progress == 1) {
1719 progressInfo.total = raidPtr->Layout.numStripe;
1720 progressInfo.completed =
1721 raidPtr->parity_rewrite_stripes_done;
1722 progressInfo.remaining = progressInfo.total -
1723 progressInfo.completed;
1724 } else {
1725 progressInfo.remaining = 0;
1726 progressInfo.completed = 100;
1727 progressInfo.total = 100;
1728 }
1729 retcode = copyout(&progressInfo, *progressInfoPtr,
1730 sizeof(RF_ProgressInfo_t));
1731 return (retcode);
1732
1733 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1734 if (raidPtr->Layout.map->faultsTolerated == 0) {
1735 /* This makes no sense on a RAID 0 */
1736 *(int *) data = 100;
1737 return(0);
1738 }
1739 if (raidPtr->copyback_in_progress == 1) {
1740 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1741 raidPtr->Layout.numStripe;
1742 } else {
1743 *(int *) data = 100;
1744 }
1745 return (0);
1746
1747 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1748 progressInfoPtr = (RF_ProgressInfo_t **) data;
1749 if (raidPtr->copyback_in_progress == 1) {
1750 progressInfo.total = raidPtr->Layout.numStripe;
1751 progressInfo.completed =
1752 raidPtr->copyback_stripes_done;
1753 progressInfo.remaining = progressInfo.total -
1754 progressInfo.completed;
1755 } else {
1756 progressInfo.remaining = 0;
1757 progressInfo.completed = 100;
1758 progressInfo.total = 100;
1759 }
1760 retcode = copyout(&progressInfo, *progressInfoPtr,
1761 sizeof(RF_ProgressInfo_t));
1762 return (retcode);
1763
1764 /* the sparetable daemon calls this to wait for the kernel to
1765 * need a spare table. this ioctl does not return until a
1766 * spare table is needed. XXX -- calling mpsleep here in the
1767 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1768 * -- I should either compute the spare table in the kernel,
1769 * or have a different -- XXX XXX -- interface (a different
1770 * character device) for delivering the table -- XXX */
1771 #if 0
1772 case RAIDFRAME_SPARET_WAIT:
1773 rf_lock_mutex2(rf_sparet_wait_mutex);
1774 while (!rf_sparet_wait_queue)
1775 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1776 waitreq = rf_sparet_wait_queue;
1777 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1778 rf_unlock_mutex2(rf_sparet_wait_mutex);
1779
1780 /* structure assignment */
1781 *((RF_SparetWait_t *) data) = *waitreq;
1782
1783 RF_Free(waitreq, sizeof(*waitreq));
1784 return (0);
1785
1786 /* wakes up a process waiting on SPARET_WAIT and puts an error
1787 * code in it that will cause the dameon to exit */
1788 case RAIDFRAME_ABORT_SPARET_WAIT:
1789 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1790 waitreq->fcol = -1;
1791 rf_lock_mutex2(rf_sparet_wait_mutex);
1792 waitreq->next = rf_sparet_wait_queue;
1793 rf_sparet_wait_queue = waitreq;
1794 rf_broadcast_conf2(rf_sparet_wait_cv);
1795 rf_unlock_mutex2(rf_sparet_wait_mutex);
1796 return (0);
1797
1798 /* used by the spare table daemon to deliver a spare table
1799 * into the kernel */
1800 case RAIDFRAME_SEND_SPARET:
1801
1802 /* install the spare table */
1803 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1804
1805 /* respond to the requestor. the return status of the spare
1806 * table installation is passed in the "fcol" field */
1807 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1808 waitreq->fcol = retcode;
1809 rf_lock_mutex2(rf_sparet_wait_mutex);
1810 waitreq->next = rf_sparet_resp_queue;
1811 rf_sparet_resp_queue = waitreq;
1812 rf_broadcast_cond2(rf_sparet_resp_cv);
1813 rf_unlock_mutex2(rf_sparet_wait_mutex);
1814
1815 return (retcode);
1816 #endif
1817
1818 default:
1819 break; /* fall through to the os-specific code below */
1820
1821 }
1822
1823 if (!raidPtr->valid)
1824 return (EINVAL);
1825
1826 /*
1827 * Add support for "regular" device ioctls here.
1828 */
1829
1830 error = disk_ioctl(&rs->sc_dkdev, cmd, data, flag, l);
1831 if (error != EPASSTHROUGH)
1832 return (error);
1833
1834 switch (cmd) {
1835 case DIOCGDINFO:
1836 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1837 break;
1838 #ifdef __HAVE_OLD_DISKLABEL
1839 case ODIOCGDINFO:
1840 newlabel = *(rs->sc_dkdev.dk_label);
1841 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1842 return ENOTTY;
1843 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1844 break;
1845 #endif
1846
1847 case DIOCGPART:
1848 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1849 ((struct partinfo *) data)->part =
1850 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1851 break;
1852
1853 case DIOCWDINFO:
1854 case DIOCSDINFO:
1855 #ifdef __HAVE_OLD_DISKLABEL
1856 case ODIOCWDINFO:
1857 case ODIOCSDINFO:
1858 #endif
1859 {
1860 struct disklabel *lp;
1861 #ifdef __HAVE_OLD_DISKLABEL
1862 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1863 memset(&newlabel, 0, sizeof newlabel);
1864 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1865 lp = &newlabel;
1866 } else
1867 #endif
1868 lp = (struct disklabel *)data;
1869
1870 if ((error = raidlock(rs)) != 0)
1871 return (error);
1872
1873 rs->sc_flags |= RAIDF_LABELLING;
1874
1875 error = setdisklabel(rs->sc_dkdev.dk_label,
1876 lp, 0, rs->sc_dkdev.dk_cpulabel);
1877 if (error == 0) {
1878 if (cmd == DIOCWDINFO
1879 #ifdef __HAVE_OLD_DISKLABEL
1880 || cmd == ODIOCWDINFO
1881 #endif
1882 )
1883 error = writedisklabel(RAIDLABELDEV(dev),
1884 raidstrategy, rs->sc_dkdev.dk_label,
1885 rs->sc_dkdev.dk_cpulabel);
1886 }
1887 rs->sc_flags &= ~RAIDF_LABELLING;
1888
1889 raidunlock(rs);
1890
1891 if (error)
1892 return (error);
1893 break;
1894 }
1895
1896 case DIOCWLABEL:
1897 if (*(int *) data != 0)
1898 rs->sc_flags |= RAIDF_WLABEL;
1899 else
1900 rs->sc_flags &= ~RAIDF_WLABEL;
1901 break;
1902
1903 case DIOCGDEFLABEL:
1904 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1905 break;
1906
1907 #ifdef __HAVE_OLD_DISKLABEL
1908 case ODIOCGDEFLABEL:
1909 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1910 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1911 return ENOTTY;
1912 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1913 break;
1914 #endif
1915
1916 case DIOCAWEDGE:
1917 case DIOCDWEDGE:
1918 dkw = (void *)data;
1919
1920 /* If the ioctl happens here, the parent is us. */
1921 (void)strcpy(dkw->dkw_parent, rs->sc_xname);
1922 return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw);
1923
1924 case DIOCLWEDGES:
1925 return dkwedge_list(&rs->sc_dkdev,
1926 (struct dkwedge_list *)data, l);
1927 case DIOCMWEDGES:
1928 dkwedge_discover(&rs->sc_dkdev);
1929 return 0;
1930 case DIOCCACHESYNC:
1931 return rf_sync_component_caches(raidPtr);
1932
1933 case DIOCGSTRATEGY:
1934 {
1935 struct disk_strategy *dks = (void *)data;
1936
1937 s = splbio();
1938 strlcpy(dks->dks_name, bufq_getstrategyname(rs->buf_queue),
1939 sizeof(dks->dks_name));
1940 splx(s);
1941 dks->dks_paramlen = 0;
1942
1943 return 0;
1944 }
1945
1946 case DIOCSSTRATEGY:
1947 {
1948 struct disk_strategy *dks = (void *)data;
1949 struct bufq_state *new;
1950 struct bufq_state *old;
1951
1952 if (dks->dks_param != NULL) {
1953 return EINVAL;
1954 }
1955 dks->dks_name[sizeof(dks->dks_name) - 1] = 0; /* ensure term */
1956 error = bufq_alloc(&new, dks->dks_name,
1957 BUFQ_EXACT|BUFQ_SORT_RAWBLOCK);
1958 if (error) {
1959 return error;
1960 }
1961 s = splbio();
1962 old = rs->buf_queue;
1963 bufq_move(new, old);
1964 rs->buf_queue = new;
1965 splx(s);
1966 bufq_free(old);
1967
1968 return 0;
1969 }
1970
1971 default:
1972 retcode = ENOTTY;
1973 }
1974 return (retcode);
1975
1976 }
1977
1978
1979 /* raidinit -- complete the rest of the initialization for the
1980 RAIDframe device. */
1981
1982
1983 static void
1984 raidinit(struct raid_softc *rs)
1985 {
1986 cfdata_t cf;
1987 int unit;
1988 RF_Raid_t *raidPtr = &rs->sc_r;
1989
1990 unit = raidPtr->raidid;
1991
1992
1993 /* XXX should check return code first... */
1994 rs->sc_flags |= RAIDF_INITED;
1995
1996 /* XXX doesn't check bounds. */
1997 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1998
1999 /* attach the pseudo device */
2000 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
2001 cf->cf_name = raid_cd.cd_name;
2002 cf->cf_atname = raid_cd.cd_name;
2003 cf->cf_unit = unit;
2004 cf->cf_fstate = FSTATE_STAR;
2005
2006 rs->sc_dev = config_attach_pseudo(cf);
2007
2008 if (rs->sc_dev == NULL) {
2009 printf("raid%d: config_attach_pseudo failed\n",
2010 raidPtr->raidid);
2011 rs->sc_flags &= ~RAIDF_INITED;
2012 free(cf, M_RAIDFRAME);
2013 return;
2014 }
2015
2016 /* disk_attach actually creates space for the CPU disklabel, among
2017 * other things, so it's critical to call this *BEFORE* we try putzing
2018 * with disklabels. */
2019
2020 disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver);
2021 disk_attach(&rs->sc_dkdev);
2022 disk_blocksize(&rs->sc_dkdev, raidPtr->bytesPerSector);
2023
2024 /* XXX There may be a weird interaction here between this, and
2025 * protectedSectors, as used in RAIDframe. */
2026
2027 rs->sc_size = raidPtr->totalSectors;
2028
2029 dkwedge_discover(&rs->sc_dkdev);
2030
2031 rf_set_geometry(rs, raidPtr);
2032
2033 }
2034 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
2035 /* wake up the daemon & tell it to get us a spare table
2036 * XXX
2037 * the entries in the queues should be tagged with the raidPtr
2038 * so that in the extremely rare case that two recons happen at once,
2039 * we know for which device were requesting a spare table
2040 * XXX
2041 *
2042 * XXX This code is not currently used. GO
2043 */
2044 int
2045 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
2046 {
2047 int retcode;
2048
2049 rf_lock_mutex2(rf_sparet_wait_mutex);
2050 req->next = rf_sparet_wait_queue;
2051 rf_sparet_wait_queue = req;
2052 rf_broadcast_cond2(rf_sparet_wait_cv);
2053
2054 /* mpsleep unlocks the mutex */
2055 while (!rf_sparet_resp_queue) {
2056 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
2057 }
2058 req = rf_sparet_resp_queue;
2059 rf_sparet_resp_queue = req->next;
2060 rf_unlock_mutex2(rf_sparet_wait_mutex);
2061
2062 retcode = req->fcol;
2063 RF_Free(req, sizeof(*req)); /* this is not the same req as we
2064 * alloc'd */
2065 return (retcode);
2066 }
2067 #endif
2068
2069 /* a wrapper around rf_DoAccess that extracts appropriate info from the
2070 * bp & passes it down.
2071 * any calls originating in the kernel must use non-blocking I/O
2072 * do some extra sanity checking to return "appropriate" error values for
2073 * certain conditions (to make some standard utilities work)
2074 *
2075 * Formerly known as: rf_DoAccessKernel
2076 */
2077 void
2078 raidstart(RF_Raid_t *raidPtr)
2079 {
2080 RF_SectorCount_t num_blocks, pb, sum;
2081 RF_RaidAddr_t raid_addr;
2082 struct partition *pp;
2083 daddr_t blocknum;
2084 struct raid_softc *rs;
2085 int do_async;
2086 struct buf *bp;
2087 int rc;
2088
2089 rs = raidPtr->softc;
2090 /* quick check to see if anything has died recently */
2091 rf_lock_mutex2(raidPtr->mutex);
2092 if (raidPtr->numNewFailures > 0) {
2093 rf_unlock_mutex2(raidPtr->mutex);
2094 rf_update_component_labels(raidPtr,
2095 RF_NORMAL_COMPONENT_UPDATE);
2096 rf_lock_mutex2(raidPtr->mutex);
2097 raidPtr->numNewFailures--;
2098 }
2099
2100 /* Check to see if we're at the limit... */
2101 while (raidPtr->openings > 0) {
2102 rf_unlock_mutex2(raidPtr->mutex);
2103
2104 /* get the next item, if any, from the queue */
2105 if ((bp = bufq_get(rs->buf_queue)) == NULL) {
2106 /* nothing more to do */
2107 return;
2108 }
2109
2110 /* Ok, for the bp we have here, bp->b_blkno is relative to the
2111 * partition.. Need to make it absolute to the underlying
2112 * device.. */
2113
2114 blocknum = bp->b_blkno << DEV_BSHIFT >> raidPtr->logBytesPerSector;
2115 if (DISKPART(bp->b_dev) != RAW_PART) {
2116 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
2117 blocknum += pp->p_offset;
2118 }
2119
2120 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
2121 (int) blocknum));
2122
2123 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
2124 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
2125
2126 /* *THIS* is where we adjust what block we're going to...
2127 * but DO NOT TOUCH bp->b_blkno!!! */
2128 raid_addr = blocknum;
2129
2130 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
2131 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
2132 sum = raid_addr + num_blocks + pb;
2133 if (1 || rf_debugKernelAccess) {
2134 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
2135 (int) raid_addr, (int) sum, (int) num_blocks,
2136 (int) pb, (int) bp->b_resid));
2137 }
2138 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
2139 || (sum < num_blocks) || (sum < pb)) {
2140 bp->b_error = ENOSPC;
2141 bp->b_resid = bp->b_bcount;
2142 biodone(bp);
2143 rf_lock_mutex2(raidPtr->mutex);
2144 continue;
2145 }
2146 /*
2147 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
2148 */
2149
2150 if (bp->b_bcount & raidPtr->sectorMask) {
2151 bp->b_error = EINVAL;
2152 bp->b_resid = bp->b_bcount;
2153 biodone(bp);
2154 rf_lock_mutex2(raidPtr->mutex);
2155 continue;
2156
2157 }
2158 db1_printf(("Calling DoAccess..\n"));
2159
2160
2161 rf_lock_mutex2(raidPtr->mutex);
2162 raidPtr->openings--;
2163 rf_unlock_mutex2(raidPtr->mutex);
2164
2165 /*
2166 * Everything is async.
2167 */
2168 do_async = 1;
2169
2170 disk_busy(&rs->sc_dkdev);
2171
2172 /* XXX we're still at splbio() here... do we *really*
2173 need to be? */
2174
2175 /* don't ever condition on bp->b_flags & B_WRITE.
2176 * always condition on B_READ instead */
2177
2178 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
2179 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
2180 do_async, raid_addr, num_blocks,
2181 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
2182
2183 if (rc) {
2184 bp->b_error = rc;
2185 bp->b_resid = bp->b_bcount;
2186 biodone(bp);
2187 /* continue loop */
2188 }
2189
2190 rf_lock_mutex2(raidPtr->mutex);
2191 }
2192 rf_unlock_mutex2(raidPtr->mutex);
2193 }
2194
2195
2196
2197
2198 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
2199
2200 int
2201 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
2202 {
2203 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2204 struct buf *bp;
2205
2206 req->queue = queue;
2207 bp = req->bp;
2208
2209 switch (req->type) {
2210 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2211 /* XXX need to do something extra here.. */
2212 /* I'm leaving this in, as I've never actually seen it used,
2213 * and I'd like folks to report it... GO */
2214 printf(("WAKEUP CALLED\n"));
2215 queue->numOutstanding++;
2216
2217 bp->b_flags = 0;
2218 bp->b_private = req;
2219
2220 KernelWakeupFunc(bp);
2221 break;
2222
2223 case RF_IO_TYPE_READ:
2224 case RF_IO_TYPE_WRITE:
2225 #if RF_ACC_TRACE > 0
2226 if (req->tracerec) {
2227 RF_ETIMER_START(req->tracerec->timer);
2228 }
2229 #endif
2230 InitBP(bp, queue->rf_cinfo->ci_vp,
2231 op, queue->rf_cinfo->ci_dev,
2232 req->sectorOffset, req->numSector,
2233 req->buf, KernelWakeupFunc, (void *) req,
2234 queue->raidPtr->logBytesPerSector, req->b_proc);
2235
2236 if (rf_debugKernelAccess) {
2237 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2238 (long) bp->b_blkno));
2239 }
2240 queue->numOutstanding++;
2241 queue->last_deq_sector = req->sectorOffset;
2242 /* acc wouldn't have been let in if there were any pending
2243 * reqs at any other priority */
2244 queue->curPriority = req->priority;
2245
2246 db1_printf(("Going for %c to unit %d col %d\n",
2247 req->type, queue->raidPtr->raidid,
2248 queue->col));
2249 db1_printf(("sector %d count %d (%d bytes) %d\n",
2250 (int) req->sectorOffset, (int) req->numSector,
2251 (int) (req->numSector <<
2252 queue->raidPtr->logBytesPerSector),
2253 (int) queue->raidPtr->logBytesPerSector));
2254
2255 /*
2256 * XXX: drop lock here since this can block at
2257 * least with backing SCSI devices. Retake it
2258 * to minimize fuss with calling interfaces.
2259 */
2260
2261 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2262 bdev_strategy(bp);
2263 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2264 break;
2265
2266 default:
2267 panic("bad req->type in rf_DispatchKernelIO");
2268 }
2269 db1_printf(("Exiting from DispatchKernelIO\n"));
2270
2271 return (0);
2272 }
2273 /* this is the callback function associated with a I/O invoked from
2274 kernel code.
2275 */
2276 static void
2277 KernelWakeupFunc(struct buf *bp)
2278 {
2279 RF_DiskQueueData_t *req = NULL;
2280 RF_DiskQueue_t *queue;
2281
2282 db1_printf(("recovering the request queue:\n"));
2283
2284 req = bp->b_private;
2285
2286 queue = (RF_DiskQueue_t *) req->queue;
2287
2288 rf_lock_mutex2(queue->raidPtr->iodone_lock);
2289
2290 #if RF_ACC_TRACE > 0
2291 if (req->tracerec) {
2292 RF_ETIMER_STOP(req->tracerec->timer);
2293 RF_ETIMER_EVAL(req->tracerec->timer);
2294 rf_lock_mutex2(rf_tracing_mutex);
2295 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2296 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2297 req->tracerec->num_phys_ios++;
2298 rf_unlock_mutex2(rf_tracing_mutex);
2299 }
2300 #endif
2301
2302 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2303 * ballistic, and mark the component as hosed... */
2304
2305 if (bp->b_error != 0) {
2306 /* Mark the disk as dead */
2307 /* but only mark it once... */
2308 /* and only if it wouldn't leave this RAID set
2309 completely broken */
2310 if (((queue->raidPtr->Disks[queue->col].status ==
2311 rf_ds_optimal) ||
2312 (queue->raidPtr->Disks[queue->col].status ==
2313 rf_ds_used_spare)) &&
2314 (queue->raidPtr->numFailures <
2315 queue->raidPtr->Layout.map->faultsTolerated)) {
2316 printf("raid%d: IO Error. Marking %s as failed.\n",
2317 queue->raidPtr->raidid,
2318 queue->raidPtr->Disks[queue->col].devname);
2319 queue->raidPtr->Disks[queue->col].status =
2320 rf_ds_failed;
2321 queue->raidPtr->status = rf_rs_degraded;
2322 queue->raidPtr->numFailures++;
2323 queue->raidPtr->numNewFailures++;
2324 } else { /* Disk is already dead... */
2325 /* printf("Disk already marked as dead!\n"); */
2326 }
2327
2328 }
2329
2330 /* Fill in the error value */
2331 req->error = bp->b_error;
2332
2333 /* Drop this one on the "finished" queue... */
2334 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2335
2336 /* Let the raidio thread know there is work to be done. */
2337 rf_signal_cond2(queue->raidPtr->iodone_cv);
2338
2339 rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2340 }
2341
2342
2343 /*
2344 * initialize a buf structure for doing an I/O in the kernel.
2345 */
2346 static void
2347 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2348 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2349 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2350 struct proc *b_proc)
2351 {
2352 /* bp->b_flags = B_PHYS | rw_flag; */
2353 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */
2354 bp->b_oflags = 0;
2355 bp->b_cflags = 0;
2356 bp->b_bcount = numSect << logBytesPerSector;
2357 bp->b_bufsize = bp->b_bcount;
2358 bp->b_error = 0;
2359 bp->b_dev = dev;
2360 bp->b_data = bf;
2361 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2362 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2363 if (bp->b_bcount == 0) {
2364 panic("bp->b_bcount is zero in InitBP!!");
2365 }
2366 bp->b_proc = b_proc;
2367 bp->b_iodone = cbFunc;
2368 bp->b_private = cbArg;
2369 }
2370
2371 static void
2372 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2373 struct disklabel *lp)
2374 {
2375 memset(lp, 0, sizeof(*lp));
2376
2377 /* fabricate a label... */
2378 if (raidPtr->totalSectors > UINT32_MAX)
2379 lp->d_secperunit = UINT32_MAX;
2380 else
2381 lp->d_secperunit = raidPtr->totalSectors;
2382 lp->d_secsize = raidPtr->bytesPerSector;
2383 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2384 lp->d_ntracks = 4 * raidPtr->numCol;
2385 lp->d_ncylinders = raidPtr->totalSectors /
2386 (lp->d_nsectors * lp->d_ntracks);
2387 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2388
2389 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2390 lp->d_type = DTYPE_RAID;
2391 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2392 lp->d_rpm = 3600;
2393 lp->d_interleave = 1;
2394 lp->d_flags = 0;
2395
2396 lp->d_partitions[RAW_PART].p_offset = 0;
2397 lp->d_partitions[RAW_PART].p_size = lp->d_secperunit;
2398 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2399 lp->d_npartitions = RAW_PART + 1;
2400
2401 lp->d_magic = DISKMAGIC;
2402 lp->d_magic2 = DISKMAGIC;
2403 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2404
2405 }
2406 /*
2407 * Read the disklabel from the raid device. If one is not present, fake one
2408 * up.
2409 */
2410 static void
2411 raidgetdisklabel(dev_t dev)
2412 {
2413 int unit = raidunit(dev);
2414 struct raid_softc *rs;
2415 const char *errstring;
2416 struct disklabel *lp;
2417 struct cpu_disklabel *clp;
2418 RF_Raid_t *raidPtr;
2419
2420 if ((rs = raidget(unit)) == NULL)
2421 return;
2422
2423 lp = rs->sc_dkdev.dk_label;
2424 clp = rs->sc_dkdev.dk_cpulabel;
2425
2426 db1_printf(("Getting the disklabel...\n"));
2427
2428 memset(clp, 0, sizeof(*clp));
2429
2430 raidPtr = &rs->sc_r;
2431
2432 raidgetdefaultlabel(raidPtr, rs, lp);
2433
2434 /*
2435 * Call the generic disklabel extraction routine.
2436 */
2437 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2438 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2439 if (errstring)
2440 raidmakedisklabel(rs);
2441 else {
2442 int i;
2443 struct partition *pp;
2444
2445 /*
2446 * Sanity check whether the found disklabel is valid.
2447 *
2448 * This is necessary since total size of the raid device
2449 * may vary when an interleave is changed even though exactly
2450 * same components are used, and old disklabel may used
2451 * if that is found.
2452 */
2453 if (lp->d_secperunit < UINT32_MAX ?
2454 lp->d_secperunit != rs->sc_size :
2455 lp->d_secperunit > rs->sc_size)
2456 printf("raid%d: WARNING: %s: "
2457 "total sector size in disklabel (%ju) != "
2458 "the size of raid (%ju)\n", unit, rs->sc_xname,
2459 (uintmax_t)lp->d_secperunit,
2460 (uintmax_t)rs->sc_size);
2461 for (i = 0; i < lp->d_npartitions; i++) {
2462 pp = &lp->d_partitions[i];
2463 if (pp->p_offset + pp->p_size > rs->sc_size)
2464 printf("raid%d: WARNING: %s: end of partition `%c' "
2465 "exceeds the size of raid (%ju)\n",
2466 unit, rs->sc_xname, 'a' + i,
2467 (uintmax_t)rs->sc_size);
2468 }
2469 }
2470
2471 }
2472 /*
2473 * Take care of things one might want to take care of in the event
2474 * that a disklabel isn't present.
2475 */
2476 static void
2477 raidmakedisklabel(struct raid_softc *rs)
2478 {
2479 struct disklabel *lp = rs->sc_dkdev.dk_label;
2480 db1_printf(("Making a label..\n"));
2481
2482 /*
2483 * For historical reasons, if there's no disklabel present
2484 * the raw partition must be marked FS_BSDFFS.
2485 */
2486
2487 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2488
2489 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2490
2491 lp->d_checksum = dkcksum(lp);
2492 }
2493 /*
2494 * Wait interruptibly for an exclusive lock.
2495 *
2496 * XXX
2497 * Several drivers do this; it should be abstracted and made MP-safe.
2498 * (Hmm... where have we seen this warning before :-> GO )
2499 */
2500 static int
2501 raidlock(struct raid_softc *rs)
2502 {
2503 int error;
2504
2505 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2506 rs->sc_flags |= RAIDF_WANTED;
2507 if ((error =
2508 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2509 return (error);
2510 }
2511 rs->sc_flags |= RAIDF_LOCKED;
2512 return (0);
2513 }
2514 /*
2515 * Unlock and wake up any waiters.
2516 */
2517 static void
2518 raidunlock(struct raid_softc *rs)
2519 {
2520
2521 rs->sc_flags &= ~RAIDF_LOCKED;
2522 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2523 rs->sc_flags &= ~RAIDF_WANTED;
2524 wakeup(rs);
2525 }
2526 }
2527
2528
2529 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2530 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2531 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2532
2533 static daddr_t
2534 rf_component_info_offset(void)
2535 {
2536
2537 return RF_COMPONENT_INFO_OFFSET;
2538 }
2539
2540 static daddr_t
2541 rf_component_info_size(unsigned secsize)
2542 {
2543 daddr_t info_size;
2544
2545 KASSERT(secsize);
2546 if (secsize > RF_COMPONENT_INFO_SIZE)
2547 info_size = secsize;
2548 else
2549 info_size = RF_COMPONENT_INFO_SIZE;
2550
2551 return info_size;
2552 }
2553
2554 static daddr_t
2555 rf_parity_map_offset(RF_Raid_t *raidPtr)
2556 {
2557 daddr_t map_offset;
2558
2559 KASSERT(raidPtr->bytesPerSector);
2560 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2561 map_offset = raidPtr->bytesPerSector;
2562 else
2563 map_offset = RF_COMPONENT_INFO_SIZE;
2564 map_offset += rf_component_info_offset();
2565
2566 return map_offset;
2567 }
2568
2569 static daddr_t
2570 rf_parity_map_size(RF_Raid_t *raidPtr)
2571 {
2572 daddr_t map_size;
2573
2574 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2575 map_size = raidPtr->bytesPerSector;
2576 else
2577 map_size = RF_PARITY_MAP_SIZE;
2578
2579 return map_size;
2580 }
2581
2582 int
2583 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2584 {
2585 RF_ComponentLabel_t *clabel;
2586
2587 clabel = raidget_component_label(raidPtr, col);
2588 clabel->clean = RF_RAID_CLEAN;
2589 raidflush_component_label(raidPtr, col);
2590 return(0);
2591 }
2592
2593
2594 int
2595 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2596 {
2597 RF_ComponentLabel_t *clabel;
2598
2599 clabel = raidget_component_label(raidPtr, col);
2600 clabel->clean = RF_RAID_DIRTY;
2601 raidflush_component_label(raidPtr, col);
2602 return(0);
2603 }
2604
2605 int
2606 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2607 {
2608 KASSERT(raidPtr->bytesPerSector);
2609 return raidread_component_label(raidPtr->bytesPerSector,
2610 raidPtr->Disks[col].dev,
2611 raidPtr->raid_cinfo[col].ci_vp,
2612 &raidPtr->raid_cinfo[col].ci_label);
2613 }
2614
2615 RF_ComponentLabel_t *
2616 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2617 {
2618 return &raidPtr->raid_cinfo[col].ci_label;
2619 }
2620
2621 int
2622 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2623 {
2624 RF_ComponentLabel_t *label;
2625
2626 label = &raidPtr->raid_cinfo[col].ci_label;
2627 label->mod_counter = raidPtr->mod_counter;
2628 #ifndef RF_NO_PARITY_MAP
2629 label->parity_map_modcount = label->mod_counter;
2630 #endif
2631 return raidwrite_component_label(raidPtr->bytesPerSector,
2632 raidPtr->Disks[col].dev,
2633 raidPtr->raid_cinfo[col].ci_vp, label);
2634 }
2635
2636
2637 static int
2638 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2639 RF_ComponentLabel_t *clabel)
2640 {
2641 return raidread_component_area(dev, b_vp, clabel,
2642 sizeof(RF_ComponentLabel_t),
2643 rf_component_info_offset(),
2644 rf_component_info_size(secsize));
2645 }
2646
2647 /* ARGSUSED */
2648 static int
2649 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2650 size_t msize, daddr_t offset, daddr_t dsize)
2651 {
2652 struct buf *bp;
2653 const struct bdevsw *bdev;
2654 int error;
2655
2656 /* XXX should probably ensure that we don't try to do this if
2657 someone has changed rf_protected_sectors. */
2658
2659 if (b_vp == NULL) {
2660 /* For whatever reason, this component is not valid.
2661 Don't try to read a component label from it. */
2662 return(EINVAL);
2663 }
2664
2665 /* get a block of the appropriate size... */
2666 bp = geteblk((int)dsize);
2667 bp->b_dev = dev;
2668
2669 /* get our ducks in a row for the read */
2670 bp->b_blkno = offset / DEV_BSIZE;
2671 bp->b_bcount = dsize;
2672 bp->b_flags |= B_READ;
2673 bp->b_resid = dsize;
2674
2675 bdev = bdevsw_lookup(bp->b_dev);
2676 if (bdev == NULL)
2677 return (ENXIO);
2678 (*bdev->d_strategy)(bp);
2679
2680 error = biowait(bp);
2681
2682 if (!error) {
2683 memcpy(data, bp->b_data, msize);
2684 }
2685
2686 brelse(bp, 0);
2687 return(error);
2688 }
2689
2690
2691 static int
2692 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2693 RF_ComponentLabel_t *clabel)
2694 {
2695 return raidwrite_component_area(dev, b_vp, clabel,
2696 sizeof(RF_ComponentLabel_t),
2697 rf_component_info_offset(),
2698 rf_component_info_size(secsize), 0);
2699 }
2700
2701 /* ARGSUSED */
2702 static int
2703 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2704 size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2705 {
2706 struct buf *bp;
2707 const struct bdevsw *bdev;
2708 int error;
2709
2710 /* get a block of the appropriate size... */
2711 bp = geteblk((int)dsize);
2712 bp->b_dev = dev;
2713
2714 /* get our ducks in a row for the write */
2715 bp->b_blkno = offset / DEV_BSIZE;
2716 bp->b_bcount = dsize;
2717 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2718 bp->b_resid = dsize;
2719
2720 memset(bp->b_data, 0, dsize);
2721 memcpy(bp->b_data, data, msize);
2722
2723 bdev = bdevsw_lookup(bp->b_dev);
2724 if (bdev == NULL)
2725 return (ENXIO);
2726 (*bdev->d_strategy)(bp);
2727 if (asyncp)
2728 return 0;
2729 error = biowait(bp);
2730 brelse(bp, 0);
2731 if (error) {
2732 #if 1
2733 printf("Failed to write RAID component info!\n");
2734 #endif
2735 }
2736
2737 return(error);
2738 }
2739
2740 void
2741 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2742 {
2743 int c;
2744
2745 for (c = 0; c < raidPtr->numCol; c++) {
2746 /* Skip dead disks. */
2747 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2748 continue;
2749 /* XXXjld: what if an error occurs here? */
2750 raidwrite_component_area(raidPtr->Disks[c].dev,
2751 raidPtr->raid_cinfo[c].ci_vp, map,
2752 RF_PARITYMAP_NBYTE,
2753 rf_parity_map_offset(raidPtr),
2754 rf_parity_map_size(raidPtr), 0);
2755 }
2756 }
2757
2758 void
2759 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2760 {
2761 struct rf_paritymap_ondisk tmp;
2762 int c,first;
2763
2764 first=1;
2765 for (c = 0; c < raidPtr->numCol; c++) {
2766 /* Skip dead disks. */
2767 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2768 continue;
2769 raidread_component_area(raidPtr->Disks[c].dev,
2770 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2771 RF_PARITYMAP_NBYTE,
2772 rf_parity_map_offset(raidPtr),
2773 rf_parity_map_size(raidPtr));
2774 if (first) {
2775 memcpy(map, &tmp, sizeof(*map));
2776 first = 0;
2777 } else {
2778 rf_paritymap_merge(map, &tmp);
2779 }
2780 }
2781 }
2782
2783 void
2784 rf_markalldirty(RF_Raid_t *raidPtr)
2785 {
2786 RF_ComponentLabel_t *clabel;
2787 int sparecol;
2788 int c;
2789 int j;
2790 int scol = -1;
2791
2792 raidPtr->mod_counter++;
2793 for (c = 0; c < raidPtr->numCol; c++) {
2794 /* we don't want to touch (at all) a disk that has
2795 failed */
2796 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2797 clabel = raidget_component_label(raidPtr, c);
2798 if (clabel->status == rf_ds_spared) {
2799 /* XXX do something special...
2800 but whatever you do, don't
2801 try to access it!! */
2802 } else {
2803 raidmarkdirty(raidPtr, c);
2804 }
2805 }
2806 }
2807
2808 for( c = 0; c < raidPtr->numSpare ; c++) {
2809 sparecol = raidPtr->numCol + c;
2810 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2811 /*
2812
2813 we claim this disk is "optimal" if it's
2814 rf_ds_used_spare, as that means it should be
2815 directly substitutable for the disk it replaced.
2816 We note that too...
2817
2818 */
2819
2820 for(j=0;j<raidPtr->numCol;j++) {
2821 if (raidPtr->Disks[j].spareCol == sparecol) {
2822 scol = j;
2823 break;
2824 }
2825 }
2826
2827 clabel = raidget_component_label(raidPtr, sparecol);
2828 /* make sure status is noted */
2829
2830 raid_init_component_label(raidPtr, clabel);
2831
2832 clabel->row = 0;
2833 clabel->column = scol;
2834 /* Note: we *don't* change status from rf_ds_used_spare
2835 to rf_ds_optimal */
2836 /* clabel.status = rf_ds_optimal; */
2837
2838 raidmarkdirty(raidPtr, sparecol);
2839 }
2840 }
2841 }
2842
2843
2844 void
2845 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2846 {
2847 RF_ComponentLabel_t *clabel;
2848 int sparecol;
2849 int c;
2850 int j;
2851 int scol;
2852
2853 scol = -1;
2854
2855 /* XXX should do extra checks to make sure things really are clean,
2856 rather than blindly setting the clean bit... */
2857
2858 raidPtr->mod_counter++;
2859
2860 for (c = 0; c < raidPtr->numCol; c++) {
2861 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2862 clabel = raidget_component_label(raidPtr, c);
2863 /* make sure status is noted */
2864 clabel->status = rf_ds_optimal;
2865
2866 /* note what unit we are configured as */
2867 clabel->last_unit = raidPtr->raidid;
2868
2869 raidflush_component_label(raidPtr, c);
2870 if (final == RF_FINAL_COMPONENT_UPDATE) {
2871 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2872 raidmarkclean(raidPtr, c);
2873 }
2874 }
2875 }
2876 /* else we don't touch it.. */
2877 }
2878
2879 for( c = 0; c < raidPtr->numSpare ; c++) {
2880 sparecol = raidPtr->numCol + c;
2881 /* Need to ensure that the reconstruct actually completed! */
2882 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2883 /*
2884
2885 we claim this disk is "optimal" if it's
2886 rf_ds_used_spare, as that means it should be
2887 directly substitutable for the disk it replaced.
2888 We note that too...
2889
2890 */
2891
2892 for(j=0;j<raidPtr->numCol;j++) {
2893 if (raidPtr->Disks[j].spareCol == sparecol) {
2894 scol = j;
2895 break;
2896 }
2897 }
2898
2899 /* XXX shouldn't *really* need this... */
2900 clabel = raidget_component_label(raidPtr, sparecol);
2901 /* make sure status is noted */
2902
2903 raid_init_component_label(raidPtr, clabel);
2904
2905 clabel->column = scol;
2906 clabel->status = rf_ds_optimal;
2907 clabel->last_unit = raidPtr->raidid;
2908
2909 raidflush_component_label(raidPtr, sparecol);
2910 if (final == RF_FINAL_COMPONENT_UPDATE) {
2911 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2912 raidmarkclean(raidPtr, sparecol);
2913 }
2914 }
2915 }
2916 }
2917 }
2918
2919 void
2920 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2921 {
2922
2923 if (vp != NULL) {
2924 if (auto_configured == 1) {
2925 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2926 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2927 vput(vp);
2928
2929 } else {
2930 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2931 }
2932 }
2933 }
2934
2935
2936 void
2937 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2938 {
2939 int r,c;
2940 struct vnode *vp;
2941 int acd;
2942
2943
2944 /* We take this opportunity to close the vnodes like we should.. */
2945
2946 for (c = 0; c < raidPtr->numCol; c++) {
2947 vp = raidPtr->raid_cinfo[c].ci_vp;
2948 acd = raidPtr->Disks[c].auto_configured;
2949 rf_close_component(raidPtr, vp, acd);
2950 raidPtr->raid_cinfo[c].ci_vp = NULL;
2951 raidPtr->Disks[c].auto_configured = 0;
2952 }
2953
2954 for (r = 0; r < raidPtr->numSpare; r++) {
2955 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2956 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2957 rf_close_component(raidPtr, vp, acd);
2958 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2959 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2960 }
2961 }
2962
2963
2964 void
2965 rf_ReconThread(struct rf_recon_req *req)
2966 {
2967 int s;
2968 RF_Raid_t *raidPtr;
2969
2970 s = splbio();
2971 raidPtr = (RF_Raid_t *) req->raidPtr;
2972 raidPtr->recon_in_progress = 1;
2973
2974 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2975 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2976
2977 RF_Free(req, sizeof(*req));
2978
2979 raidPtr->recon_in_progress = 0;
2980 splx(s);
2981
2982 /* That's all... */
2983 kthread_exit(0); /* does not return */
2984 }
2985
2986 void
2987 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2988 {
2989 int retcode;
2990 int s;
2991
2992 raidPtr->parity_rewrite_stripes_done = 0;
2993 raidPtr->parity_rewrite_in_progress = 1;
2994 s = splbio();
2995 retcode = rf_RewriteParity(raidPtr);
2996 splx(s);
2997 if (retcode) {
2998 printf("raid%d: Error re-writing parity (%d)!\n",
2999 raidPtr->raidid, retcode);
3000 } else {
3001 /* set the clean bit! If we shutdown correctly,
3002 the clean bit on each component label will get
3003 set */
3004 raidPtr->parity_good = RF_RAID_CLEAN;
3005 }
3006 raidPtr->parity_rewrite_in_progress = 0;
3007
3008 /* Anyone waiting for us to stop? If so, inform them... */
3009 if (raidPtr->waitShutdown) {
3010 wakeup(&raidPtr->parity_rewrite_in_progress);
3011 }
3012
3013 /* That's all... */
3014 kthread_exit(0); /* does not return */
3015 }
3016
3017
3018 void
3019 rf_CopybackThread(RF_Raid_t *raidPtr)
3020 {
3021 int s;
3022
3023 raidPtr->copyback_in_progress = 1;
3024 s = splbio();
3025 rf_CopybackReconstructedData(raidPtr);
3026 splx(s);
3027 raidPtr->copyback_in_progress = 0;
3028
3029 /* That's all... */
3030 kthread_exit(0); /* does not return */
3031 }
3032
3033
3034 void
3035 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
3036 {
3037 int s;
3038 RF_Raid_t *raidPtr;
3039
3040 s = splbio();
3041 raidPtr = req->raidPtr;
3042 raidPtr->recon_in_progress = 1;
3043 rf_ReconstructInPlace(raidPtr, req->col);
3044 RF_Free(req, sizeof(*req));
3045 raidPtr->recon_in_progress = 0;
3046 splx(s);
3047
3048 /* That's all... */
3049 kthread_exit(0); /* does not return */
3050 }
3051
3052 static RF_AutoConfig_t *
3053 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
3054 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
3055 unsigned secsize)
3056 {
3057 int good_one = 0;
3058 RF_ComponentLabel_t *clabel;
3059 RF_AutoConfig_t *ac;
3060
3061 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
3062 if (clabel == NULL) {
3063 oomem:
3064 while(ac_list) {
3065 ac = ac_list;
3066 if (ac->clabel)
3067 free(ac->clabel, M_RAIDFRAME);
3068 ac_list = ac_list->next;
3069 free(ac, M_RAIDFRAME);
3070 }
3071 printf("RAID auto config: out of memory!\n");
3072 return NULL; /* XXX probably should panic? */
3073 }
3074
3075 if (!raidread_component_label(secsize, dev, vp, clabel)) {
3076 /* Got the label. Does it look reasonable? */
3077 if (rf_reasonable_label(clabel, numsecs) &&
3078 (rf_component_label_partitionsize(clabel) <= size)) {
3079 #ifdef DEBUG
3080 printf("Component on: %s: %llu\n",
3081 cname, (unsigned long long)size);
3082 rf_print_component_label(clabel);
3083 #endif
3084 /* if it's reasonable, add it, else ignore it. */
3085 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
3086 M_NOWAIT);
3087 if (ac == NULL) {
3088 free(clabel, M_RAIDFRAME);
3089 goto oomem;
3090 }
3091 strlcpy(ac->devname, cname, sizeof(ac->devname));
3092 ac->dev = dev;
3093 ac->vp = vp;
3094 ac->clabel = clabel;
3095 ac->next = ac_list;
3096 ac_list = ac;
3097 good_one = 1;
3098 }
3099 }
3100 if (!good_one) {
3101 /* cleanup */
3102 free(clabel, M_RAIDFRAME);
3103 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3104 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3105 vput(vp);
3106 }
3107 return ac_list;
3108 }
3109
3110 RF_AutoConfig_t *
3111 rf_find_raid_components(void)
3112 {
3113 struct vnode *vp;
3114 struct disklabel label;
3115 device_t dv;
3116 deviter_t di;
3117 dev_t dev;
3118 int bmajor, bminor, wedge, rf_part_found;
3119 int error;
3120 int i;
3121 RF_AutoConfig_t *ac_list;
3122 uint64_t numsecs;
3123 unsigned secsize;
3124
3125 /* initialize the AutoConfig list */
3126 ac_list = NULL;
3127
3128 /* we begin by trolling through *all* the devices on the system */
3129
3130 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
3131 dv = deviter_next(&di)) {
3132
3133 /* we are only interested in disks... */
3134 if (device_class(dv) != DV_DISK)
3135 continue;
3136
3137 /* we don't care about floppies... */
3138 if (device_is_a(dv, "fd")) {
3139 continue;
3140 }
3141
3142 /* we don't care about CD's... */
3143 if (device_is_a(dv, "cd")) {
3144 continue;
3145 }
3146
3147 /* we don't care about md's... */
3148 if (device_is_a(dv, "md")) {
3149 continue;
3150 }
3151
3152 /* hdfd is the Atari/Hades floppy driver */
3153 if (device_is_a(dv, "hdfd")) {
3154 continue;
3155 }
3156
3157 /* fdisa is the Atari/Milan floppy driver */
3158 if (device_is_a(dv, "fdisa")) {
3159 continue;
3160 }
3161
3162 /* need to find the device_name_to_block_device_major stuff */
3163 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
3164
3165 rf_part_found = 0; /*No raid partition as yet*/
3166
3167 /* get a vnode for the raw partition of this disk */
3168
3169 wedge = device_is_a(dv, "dk");
3170 bminor = minor(device_unit(dv));
3171 dev = wedge ? makedev(bmajor, bminor) :
3172 MAKEDISKDEV(bmajor, bminor, RAW_PART);
3173 if (bdevvp(dev, &vp))
3174 panic("RAID can't alloc vnode");
3175
3176 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
3177
3178 if (error) {
3179 /* "Who cares." Continue looking
3180 for something that exists*/
3181 vput(vp);
3182 continue;
3183 }
3184
3185 error = getdisksize(vp, &numsecs, &secsize);
3186 if (error) {
3187 vput(vp);
3188 continue;
3189 }
3190 if (wedge) {
3191 struct dkwedge_info dkw;
3192 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
3193 NOCRED);
3194 if (error) {
3195 printf("RAIDframe: can't get wedge info for "
3196 "dev %s (%d)\n", device_xname(dv), error);
3197 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3198 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3199 vput(vp);
3200 continue;
3201 }
3202
3203 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
3204 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3205 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3206 vput(vp);
3207 continue;
3208 }
3209
3210 ac_list = rf_get_component(ac_list, dev, vp,
3211 device_xname(dv), dkw.dkw_size, numsecs, secsize);
3212 rf_part_found = 1; /*There is a raid component on this disk*/
3213 continue;
3214 }
3215
3216 /* Ok, the disk exists. Go get the disklabel. */
3217 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
3218 if (error) {
3219 /*
3220 * XXX can't happen - open() would
3221 * have errored out (or faked up one)
3222 */
3223 if (error != ENOTTY)
3224 printf("RAIDframe: can't get label for dev "
3225 "%s (%d)\n", device_xname(dv), error);
3226 }
3227
3228 /* don't need this any more. We'll allocate it again
3229 a little later if we really do... */
3230 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3231 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3232 vput(vp);
3233
3234 if (error)
3235 continue;
3236
3237 rf_part_found = 0; /*No raid partitions yet*/
3238 for (i = 0; i < label.d_npartitions; i++) {
3239 char cname[sizeof(ac_list->devname)];
3240
3241 /* We only support partitions marked as RAID */
3242 if (label.d_partitions[i].p_fstype != FS_RAID)
3243 continue;
3244
3245 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
3246 if (bdevvp(dev, &vp))
3247 panic("RAID can't alloc vnode");
3248
3249 error = VOP_OPEN(vp, FREAD, NOCRED);
3250 if (error) {
3251 /* Whatever... */
3252 vput(vp);
3253 continue;
3254 }
3255 snprintf(cname, sizeof(cname), "%s%c",
3256 device_xname(dv), 'a' + i);
3257 ac_list = rf_get_component(ac_list, dev, vp, cname,
3258 label.d_partitions[i].p_size, numsecs, secsize);
3259 rf_part_found = 1; /*There is at least one raid partition on this disk*/
3260 }
3261
3262 /*
3263 *If there is no raid component on this disk, either in a
3264 *disklabel or inside a wedge, check the raw partition as well,
3265 *as it is possible to configure raid components on raw disk
3266 *devices.
3267 */
3268
3269 if (!rf_part_found) {
3270 char cname[sizeof(ac_list->devname)];
3271
3272 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
3273 if (bdevvp(dev, &vp))
3274 panic("RAID can't alloc vnode");
3275
3276 error = VOP_OPEN(vp, FREAD, NOCRED);
3277 if (error) {
3278 /* Whatever... */
3279 vput(vp);
3280 continue;
3281 }
3282 snprintf(cname, sizeof(cname), "%s%c",
3283 device_xname(dv), 'a' + RAW_PART);
3284 ac_list = rf_get_component(ac_list, dev, vp, cname,
3285 label.d_partitions[RAW_PART].p_size, numsecs, secsize);
3286 }
3287 }
3288 deviter_release(&di);
3289 return ac_list;
3290 }
3291
3292
3293 int
3294 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3295 {
3296
3297 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
3298 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
3299 ((clabel->clean == RF_RAID_CLEAN) ||
3300 (clabel->clean == RF_RAID_DIRTY)) &&
3301 clabel->row >=0 &&
3302 clabel->column >= 0 &&
3303 clabel->num_rows > 0 &&
3304 clabel->num_columns > 0 &&
3305 clabel->row < clabel->num_rows &&
3306 clabel->column < clabel->num_columns &&
3307 clabel->blockSize > 0 &&
3308 /*
3309 * numBlocksHi may contain garbage, but it is ok since
3310 * the type is unsigned. If it is really garbage,
3311 * rf_fix_old_label_size() will fix it.
3312 */
3313 rf_component_label_numblocks(clabel) > 0) {
3314 /*
3315 * label looks reasonable enough...
3316 * let's make sure it has no old garbage.
3317 */
3318 if (numsecs)
3319 rf_fix_old_label_size(clabel, numsecs);
3320 return(1);
3321 }
3322 return(0);
3323 }
3324
3325
3326 /*
3327 * For reasons yet unknown, some old component labels have garbage in
3328 * the newer numBlocksHi region, and this causes lossage. Since those
3329 * disks will also have numsecs set to less than 32 bits of sectors,
3330 * we can determine when this corruption has occurred, and fix it.
3331 *
3332 * The exact same problem, with the same unknown reason, happens to
3333 * the partitionSizeHi member as well.
3334 */
3335 static void
3336 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3337 {
3338
3339 if (numsecs < ((uint64_t)1 << 32)) {
3340 if (clabel->numBlocksHi) {
3341 printf("WARNING: total sectors < 32 bits, yet "
3342 "numBlocksHi set\n"
3343 "WARNING: resetting numBlocksHi to zero.\n");
3344 clabel->numBlocksHi = 0;
3345 }
3346
3347 if (clabel->partitionSizeHi) {
3348 printf("WARNING: total sectors < 32 bits, yet "
3349 "partitionSizeHi set\n"
3350 "WARNING: resetting partitionSizeHi to zero.\n");
3351 clabel->partitionSizeHi = 0;
3352 }
3353 }
3354 }
3355
3356
3357 #ifdef DEBUG
3358 void
3359 rf_print_component_label(RF_ComponentLabel_t *clabel)
3360 {
3361 uint64_t numBlocks;
3362 static const char *rp[] = {
3363 "No", "Force", "Soft", "*invalid*"
3364 };
3365
3366
3367 numBlocks = rf_component_label_numblocks(clabel);
3368
3369 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3370 clabel->row, clabel->column,
3371 clabel->num_rows, clabel->num_columns);
3372 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3373 clabel->version, clabel->serial_number,
3374 clabel->mod_counter);
3375 printf(" Clean: %s Status: %d\n",
3376 clabel->clean ? "Yes" : "No", clabel->status);
3377 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3378 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3379 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3380 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3381 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3382 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]);
3383 printf(" Last configured as: raid%d\n", clabel->last_unit);
3384 #if 0
3385 printf(" Config order: %d\n", clabel->config_order);
3386 #endif
3387
3388 }
3389 #endif
3390
3391 RF_ConfigSet_t *
3392 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3393 {
3394 RF_AutoConfig_t *ac;
3395 RF_ConfigSet_t *config_sets;
3396 RF_ConfigSet_t *cset;
3397 RF_AutoConfig_t *ac_next;
3398
3399
3400 config_sets = NULL;
3401
3402 /* Go through the AutoConfig list, and figure out which components
3403 belong to what sets. */
3404 ac = ac_list;
3405 while(ac!=NULL) {
3406 /* we're going to putz with ac->next, so save it here
3407 for use at the end of the loop */
3408 ac_next = ac->next;
3409
3410 if (config_sets == NULL) {
3411 /* will need at least this one... */
3412 config_sets = (RF_ConfigSet_t *)
3413 malloc(sizeof(RF_ConfigSet_t),
3414 M_RAIDFRAME, M_NOWAIT);
3415 if (config_sets == NULL) {
3416 panic("rf_create_auto_sets: No memory!");
3417 }
3418 /* this one is easy :) */
3419 config_sets->ac = ac;
3420 config_sets->next = NULL;
3421 config_sets->rootable = 0;
3422 ac->next = NULL;
3423 } else {
3424 /* which set does this component fit into? */
3425 cset = config_sets;
3426 while(cset!=NULL) {
3427 if (rf_does_it_fit(cset, ac)) {
3428 /* looks like it matches... */
3429 ac->next = cset->ac;
3430 cset->ac = ac;
3431 break;
3432 }
3433 cset = cset->next;
3434 }
3435 if (cset==NULL) {
3436 /* didn't find a match above... new set..*/
3437 cset = (RF_ConfigSet_t *)
3438 malloc(sizeof(RF_ConfigSet_t),
3439 M_RAIDFRAME, M_NOWAIT);
3440 if (cset == NULL) {
3441 panic("rf_create_auto_sets: No memory!");
3442 }
3443 cset->ac = ac;
3444 ac->next = NULL;
3445 cset->next = config_sets;
3446 cset->rootable = 0;
3447 config_sets = cset;
3448 }
3449 }
3450 ac = ac_next;
3451 }
3452
3453
3454 return(config_sets);
3455 }
3456
3457 static int
3458 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3459 {
3460 RF_ComponentLabel_t *clabel1, *clabel2;
3461
3462 /* If this one matches the *first* one in the set, that's good
3463 enough, since the other members of the set would have been
3464 through here too... */
3465 /* note that we are not checking partitionSize here..
3466
3467 Note that we are also not checking the mod_counters here.
3468 If everything else matches except the mod_counter, that's
3469 good enough for this test. We will deal with the mod_counters
3470 a little later in the autoconfiguration process.
3471
3472 (clabel1->mod_counter == clabel2->mod_counter) &&
3473
3474 The reason we don't check for this is that failed disks
3475 will have lower modification counts. If those disks are
3476 not added to the set they used to belong to, then they will
3477 form their own set, which may result in 2 different sets,
3478 for example, competing to be configured at raid0, and
3479 perhaps competing to be the root filesystem set. If the
3480 wrong ones get configured, or both attempt to become /,
3481 weird behaviour and or serious lossage will occur. Thus we
3482 need to bring them into the fold here, and kick them out at
3483 a later point.
3484
3485 */
3486
3487 clabel1 = cset->ac->clabel;
3488 clabel2 = ac->clabel;
3489 if ((clabel1->version == clabel2->version) &&
3490 (clabel1->serial_number == clabel2->serial_number) &&
3491 (clabel1->num_rows == clabel2->num_rows) &&
3492 (clabel1->num_columns == clabel2->num_columns) &&
3493 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3494 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3495 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3496 (clabel1->parityConfig == clabel2->parityConfig) &&
3497 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3498 (clabel1->blockSize == clabel2->blockSize) &&
3499 rf_component_label_numblocks(clabel1) ==
3500 rf_component_label_numblocks(clabel2) &&
3501 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3502 (clabel1->root_partition == clabel2->root_partition) &&
3503 (clabel1->last_unit == clabel2->last_unit) &&
3504 (clabel1->config_order == clabel2->config_order)) {
3505 /* if it get's here, it almost *has* to be a match */
3506 } else {
3507 /* it's not consistent with somebody in the set..
3508 punt */
3509 return(0);
3510 }
3511 /* all was fine.. it must fit... */
3512 return(1);
3513 }
3514
3515 int
3516 rf_have_enough_components(RF_ConfigSet_t *cset)
3517 {
3518 RF_AutoConfig_t *ac;
3519 RF_AutoConfig_t *auto_config;
3520 RF_ComponentLabel_t *clabel;
3521 int c;
3522 int num_cols;
3523 int num_missing;
3524 int mod_counter;
3525 int mod_counter_found;
3526 int even_pair_failed;
3527 char parity_type;
3528
3529
3530 /* check to see that we have enough 'live' components
3531 of this set. If so, we can configure it if necessary */
3532
3533 num_cols = cset->ac->clabel->num_columns;
3534 parity_type = cset->ac->clabel->parityConfig;
3535
3536 /* XXX Check for duplicate components!?!?!? */
3537
3538 /* Determine what the mod_counter is supposed to be for this set. */
3539
3540 mod_counter_found = 0;
3541 mod_counter = 0;
3542 ac = cset->ac;
3543 while(ac!=NULL) {
3544 if (mod_counter_found==0) {
3545 mod_counter = ac->clabel->mod_counter;
3546 mod_counter_found = 1;
3547 } else {
3548 if (ac->clabel->mod_counter > mod_counter) {
3549 mod_counter = ac->clabel->mod_counter;
3550 }
3551 }
3552 ac = ac->next;
3553 }
3554
3555 num_missing = 0;
3556 auto_config = cset->ac;
3557
3558 even_pair_failed = 0;
3559 for(c=0; c<num_cols; c++) {
3560 ac = auto_config;
3561 while(ac!=NULL) {
3562 if ((ac->clabel->column == c) &&
3563 (ac->clabel->mod_counter == mod_counter)) {
3564 /* it's this one... */
3565 #ifdef DEBUG
3566 printf("Found: %s at %d\n",
3567 ac->devname,c);
3568 #endif
3569 break;
3570 }
3571 ac=ac->next;
3572 }
3573 if (ac==NULL) {
3574 /* Didn't find one here! */
3575 /* special case for RAID 1, especially
3576 where there are more than 2
3577 components (where RAIDframe treats
3578 things a little differently :( ) */
3579 if (parity_type == '1') {
3580 if (c%2 == 0) { /* even component */
3581 even_pair_failed = 1;
3582 } else { /* odd component. If
3583 we're failed, and
3584 so is the even
3585 component, it's
3586 "Good Night, Charlie" */
3587 if (even_pair_failed == 1) {
3588 return(0);
3589 }
3590 }
3591 } else {
3592 /* normal accounting */
3593 num_missing++;
3594 }
3595 }
3596 if ((parity_type == '1') && (c%2 == 1)) {
3597 /* Just did an even component, and we didn't
3598 bail.. reset the even_pair_failed flag,
3599 and go on to the next component.... */
3600 even_pair_failed = 0;
3601 }
3602 }
3603
3604 clabel = cset->ac->clabel;
3605
3606 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3607 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3608 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3609 /* XXX this needs to be made *much* more general */
3610 /* Too many failures */
3611 return(0);
3612 }
3613 /* otherwise, all is well, and we've got enough to take a kick
3614 at autoconfiguring this set */
3615 return(1);
3616 }
3617
3618 void
3619 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3620 RF_Raid_t *raidPtr)
3621 {
3622 RF_ComponentLabel_t *clabel;
3623 int i;
3624
3625 clabel = ac->clabel;
3626
3627 /* 1. Fill in the common stuff */
3628 config->numRow = clabel->num_rows = 1;
3629 config->numCol = clabel->num_columns;
3630 config->numSpare = 0; /* XXX should this be set here? */
3631 config->sectPerSU = clabel->sectPerSU;
3632 config->SUsPerPU = clabel->SUsPerPU;
3633 config->SUsPerRU = clabel->SUsPerRU;
3634 config->parityConfig = clabel->parityConfig;
3635 /* XXX... */
3636 strcpy(config->diskQueueType,"fifo");
3637 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3638 config->layoutSpecificSize = 0; /* XXX ?? */
3639
3640 while(ac!=NULL) {
3641 /* row/col values will be in range due to the checks
3642 in reasonable_label() */
3643 strcpy(config->devnames[0][ac->clabel->column],
3644 ac->devname);
3645 ac = ac->next;
3646 }
3647
3648 for(i=0;i<RF_MAXDBGV;i++) {
3649 config->debugVars[i][0] = 0;
3650 }
3651 }
3652
3653 int
3654 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3655 {
3656 RF_ComponentLabel_t *clabel;
3657 int column;
3658 int sparecol;
3659
3660 raidPtr->autoconfigure = new_value;
3661
3662 for(column=0; column<raidPtr->numCol; column++) {
3663 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3664 clabel = raidget_component_label(raidPtr, column);
3665 clabel->autoconfigure = new_value;
3666 raidflush_component_label(raidPtr, column);
3667 }
3668 }
3669 for(column = 0; column < raidPtr->numSpare ; column++) {
3670 sparecol = raidPtr->numCol + column;
3671 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3672 clabel = raidget_component_label(raidPtr, sparecol);
3673 clabel->autoconfigure = new_value;
3674 raidflush_component_label(raidPtr, sparecol);
3675 }
3676 }
3677 return(new_value);
3678 }
3679
3680 int
3681 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3682 {
3683 RF_ComponentLabel_t *clabel;
3684 int column;
3685 int sparecol;
3686
3687 raidPtr->root_partition = new_value;
3688 for(column=0; column<raidPtr->numCol; column++) {
3689 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3690 clabel = raidget_component_label(raidPtr, column);
3691 clabel->root_partition = new_value;
3692 raidflush_component_label(raidPtr, column);
3693 }
3694 }
3695 for(column = 0; column < raidPtr->numSpare ; column++) {
3696 sparecol = raidPtr->numCol + column;
3697 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3698 clabel = raidget_component_label(raidPtr, sparecol);
3699 clabel->root_partition = new_value;
3700 raidflush_component_label(raidPtr, sparecol);
3701 }
3702 }
3703 return(new_value);
3704 }
3705
3706 void
3707 rf_release_all_vps(RF_ConfigSet_t *cset)
3708 {
3709 RF_AutoConfig_t *ac;
3710
3711 ac = cset->ac;
3712 while(ac!=NULL) {
3713 /* Close the vp, and give it back */
3714 if (ac->vp) {
3715 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3716 VOP_CLOSE(ac->vp, FREAD, NOCRED);
3717 vput(ac->vp);
3718 ac->vp = NULL;
3719 }
3720 ac = ac->next;
3721 }
3722 }
3723
3724
3725 void
3726 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3727 {
3728 RF_AutoConfig_t *ac;
3729 RF_AutoConfig_t *next_ac;
3730
3731 ac = cset->ac;
3732 while(ac!=NULL) {
3733 next_ac = ac->next;
3734 /* nuke the label */
3735 free(ac->clabel, M_RAIDFRAME);
3736 /* cleanup the config structure */
3737 free(ac, M_RAIDFRAME);
3738 /* "next.." */
3739 ac = next_ac;
3740 }
3741 /* and, finally, nuke the config set */
3742 free(cset, M_RAIDFRAME);
3743 }
3744
3745
3746 void
3747 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3748 {
3749 /* current version number */
3750 clabel->version = RF_COMPONENT_LABEL_VERSION;
3751 clabel->serial_number = raidPtr->serial_number;
3752 clabel->mod_counter = raidPtr->mod_counter;
3753
3754 clabel->num_rows = 1;
3755 clabel->num_columns = raidPtr->numCol;
3756 clabel->clean = RF_RAID_DIRTY; /* not clean */
3757 clabel->status = rf_ds_optimal; /* "It's good!" */
3758
3759 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3760 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3761 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3762
3763 clabel->blockSize = raidPtr->bytesPerSector;
3764 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3765
3766 /* XXX not portable */
3767 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3768 clabel->maxOutstanding = raidPtr->maxOutstanding;
3769 clabel->autoconfigure = raidPtr->autoconfigure;
3770 clabel->root_partition = raidPtr->root_partition;
3771 clabel->last_unit = raidPtr->raidid;
3772 clabel->config_order = raidPtr->config_order;
3773
3774 #ifndef RF_NO_PARITY_MAP
3775 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3776 #endif
3777 }
3778
3779 struct raid_softc *
3780 rf_auto_config_set(RF_ConfigSet_t *cset)
3781 {
3782 RF_Raid_t *raidPtr;
3783 RF_Config_t *config;
3784 int raidID;
3785 struct raid_softc *sc;
3786
3787 #ifdef DEBUG
3788 printf("RAID autoconfigure\n");
3789 #endif
3790
3791 /* 1. Create a config structure */
3792 config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO);
3793 if (config == NULL) {
3794 printf("Out of mem!?!?\n");
3795 /* XXX do something more intelligent here. */
3796 return NULL;
3797 }
3798
3799 /*
3800 2. Figure out what RAID ID this one is supposed to live at
3801 See if we can get the same RAID dev that it was configured
3802 on last time..
3803 */
3804
3805 raidID = cset->ac->clabel->last_unit;
3806 for (sc = raidget(raidID); sc->sc_r.valid != 0; sc = raidget(++raidID))
3807 continue;
3808 #ifdef DEBUG
3809 printf("Configuring raid%d:\n",raidID);
3810 #endif
3811
3812 raidPtr = &sc->sc_r;
3813
3814 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3815 raidPtr->softc = sc;
3816 raidPtr->raidid = raidID;
3817 raidPtr->openings = RAIDOUTSTANDING;
3818
3819 /* 3. Build the configuration structure */
3820 rf_create_configuration(cset->ac, config, raidPtr);
3821
3822 /* 4. Do the configuration */
3823 if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3824 raidinit(sc);
3825
3826 rf_markalldirty(raidPtr);
3827 raidPtr->autoconfigure = 1; /* XXX do this here? */
3828 switch (cset->ac->clabel->root_partition) {
3829 case 1: /* Force Root */
3830 case 2: /* Soft Root: root when boot partition part of raid */
3831 /*
3832 * everything configured just fine. Make a note
3833 * that this set is eligible to be root,
3834 * or forced to be root
3835 */
3836 cset->rootable = cset->ac->clabel->root_partition;
3837 /* XXX do this here? */
3838 raidPtr->root_partition = cset->rootable;
3839 break;
3840 default:
3841 break;
3842 }
3843 } else {
3844 raidput(sc);
3845 sc = NULL;
3846 }
3847
3848 /* 5. Cleanup */
3849 free(config, M_RAIDFRAME);
3850 return sc;
3851 }
3852
3853 void
3854 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3855 {
3856 struct buf *bp;
3857 struct raid_softc *rs;
3858
3859 bp = (struct buf *)desc->bp;
3860 rs = desc->raidPtr->softc;
3861 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid),
3862 (bp->b_flags & B_READ));
3863 }
3864
3865 void
3866 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3867 size_t xmin, size_t xmax)
3868 {
3869 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3870 pool_sethiwat(p, xmax);
3871 pool_prime(p, xmin);
3872 pool_setlowat(p, xmin);
3873 }
3874
3875 /*
3876 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buf_queue to see
3877 * if there is IO pending and if that IO could possibly be done for a
3878 * given RAID set. Returns 0 if IO is waiting and can be done, 1
3879 * otherwise.
3880 *
3881 */
3882
3883 int
3884 rf_buf_queue_check(RF_Raid_t *raidPtr)
3885 {
3886 struct raid_softc *rs = raidPtr->softc;
3887 if ((bufq_peek(rs->buf_queue) != NULL) && raidPtr->openings > 0) {
3888 /* there is work to do */
3889 return 0;
3890 }
3891 /* default is nothing to do */
3892 return 1;
3893 }
3894
3895 int
3896 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3897 {
3898 uint64_t numsecs;
3899 unsigned secsize;
3900 int error;
3901
3902 error = getdisksize(vp, &numsecs, &secsize);
3903 if (error == 0) {
3904 diskPtr->blockSize = secsize;
3905 diskPtr->numBlocks = numsecs - rf_protectedSectors;
3906 diskPtr->partitionSize = numsecs;
3907 return 0;
3908 }
3909 return error;
3910 }
3911
3912 static int
3913 raid_match(device_t self, cfdata_t cfdata, void *aux)
3914 {
3915 return 1;
3916 }
3917
3918 static void
3919 raid_attach(device_t parent, device_t self, void *aux)
3920 {
3921
3922 }
3923
3924
3925 static int
3926 raid_detach(device_t self, int flags)
3927 {
3928 int error;
3929 struct raid_softc *rs = raidget(device_unit(self));
3930
3931 if (rs == NULL)
3932 return ENXIO;
3933
3934 if ((error = raidlock(rs)) != 0)
3935 return (error);
3936
3937 error = raid_detach_unlocked(rs);
3938
3939 raidunlock(rs);
3940
3941 /* XXXkd: raidput(rs) ??? */
3942
3943 return error;
3944 }
3945
3946 static void
3947 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
3948 {
3949 struct disk_geom *dg = &rs->sc_dkdev.dk_geom;
3950
3951 memset(dg, 0, sizeof(*dg));
3952
3953 dg->dg_secperunit = raidPtr->totalSectors;
3954 dg->dg_secsize = raidPtr->bytesPerSector;
3955 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
3956 dg->dg_ntracks = 4 * raidPtr->numCol;
3957
3958 disk_set_info(rs->sc_dev, &rs->sc_dkdev, NULL);
3959 }
3960
3961 /*
3962 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3963 * We end up returning whatever error was returned by the first cache flush
3964 * that fails.
3965 */
3966
3967 int
3968 rf_sync_component_caches(RF_Raid_t *raidPtr)
3969 {
3970 int c, sparecol;
3971 int e,error;
3972 int force = 1;
3973
3974 error = 0;
3975 for (c = 0; c < raidPtr->numCol; c++) {
3976 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3977 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3978 &force, FWRITE, NOCRED);
3979 if (e) {
3980 if (e != ENODEV)
3981 printf("raid%d: cache flush to component %s failed.\n",
3982 raidPtr->raidid, raidPtr->Disks[c].devname);
3983 if (error == 0) {
3984 error = e;
3985 }
3986 }
3987 }
3988 }
3989
3990 for( c = 0; c < raidPtr->numSpare ; c++) {
3991 sparecol = raidPtr->numCol + c;
3992 /* Need to ensure that the reconstruct actually completed! */
3993 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3994 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3995 DIOCCACHESYNC, &force, FWRITE, NOCRED);
3996 if (e) {
3997 if (e != ENODEV)
3998 printf("raid%d: cache flush to component %s failed.\n",
3999 raidPtr->raidid, raidPtr->Disks[sparecol].devname);
4000 if (error == 0) {
4001 error = e;
4002 }
4003 }
4004 }
4005 }
4006 return error;
4007 }
4008