rf_netbsdkintf.c revision 1.318 1 /* $NetBSD: rf_netbsdkintf.c,v 1.318 2014/12/31 08:24:50 mlelstv Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: cd.c 1.6 90/11/28$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
89 * School of Computer Science
90 * Carnegie Mellon University
91 * Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97 /***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.318 2014/12/31 08:24:50 mlelstv Exp $");
105
106 #ifdef _KERNEL_OPT
107 #include "opt_compat_netbsd.h"
108 #include "opt_raid_autoconfig.h"
109 #endif
110
111 #include <sys/param.h>
112 #include <sys/errno.h>
113 #include <sys/pool.h>
114 #include <sys/proc.h>
115 #include <sys/queue.h>
116 #include <sys/disk.h>
117 #include <sys/device.h>
118 #include <sys/stat.h>
119 #include <sys/ioctl.h>
120 #include <sys/fcntl.h>
121 #include <sys/systm.h>
122 #include <sys/vnode.h>
123 #include <sys/disklabel.h>
124 #include <sys/conf.h>
125 #include <sys/buf.h>
126 #include <sys/bufq.h>
127 #include <sys/reboot.h>
128 #include <sys/kauth.h>
129
130 #include <prop/proplib.h>
131
132 #include <dev/raidframe/raidframevar.h>
133 #include <dev/raidframe/raidframeio.h>
134 #include <dev/raidframe/rf_paritymap.h>
135
136 #include "rf_raid.h"
137 #include "rf_copyback.h"
138 #include "rf_dag.h"
139 #include "rf_dagflags.h"
140 #include "rf_desc.h"
141 #include "rf_diskqueue.h"
142 #include "rf_etimer.h"
143 #include "rf_general.h"
144 #include "rf_kintf.h"
145 #include "rf_options.h"
146 #include "rf_driver.h"
147 #include "rf_parityscan.h"
148 #include "rf_threadstuff.h"
149
150 #ifdef COMPAT_50
151 #include "rf_compat50.h"
152 #endif
153
154 #ifdef DEBUG
155 int rf_kdebug_level = 0;
156 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
157 #else /* DEBUG */
158 #define db1_printf(a) { }
159 #endif /* DEBUG */
160
161 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
162 static rf_declare_mutex2(rf_sparet_wait_mutex);
163 static rf_declare_cond2(rf_sparet_wait_cv);
164 static rf_declare_cond2(rf_sparet_resp_cv);
165
166 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
167 * spare table */
168 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
169 * installation process */
170 #endif
171
172 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
173
174 /* prototypes */
175 static void KernelWakeupFunc(struct buf *);
176 static void InitBP(struct buf *, struct vnode *, unsigned,
177 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
178 void *, int, struct proc *);
179 struct raid_softc;
180 static void raidinit(struct raid_softc *);
181
182 void raidattach(int);
183 static int raid_match(device_t, cfdata_t, void *);
184 static void raid_attach(device_t, device_t, void *);
185 static int raid_detach(device_t, int);
186
187 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
188 daddr_t, daddr_t);
189 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
190 daddr_t, daddr_t, int);
191
192 static int raidwrite_component_label(unsigned,
193 dev_t, struct vnode *, RF_ComponentLabel_t *);
194 static int raidread_component_label(unsigned,
195 dev_t, struct vnode *, RF_ComponentLabel_t *);
196
197
198 dev_type_open(raidopen);
199 dev_type_close(raidclose);
200 dev_type_read(raidread);
201 dev_type_write(raidwrite);
202 dev_type_ioctl(raidioctl);
203 dev_type_strategy(raidstrategy);
204 dev_type_dump(raiddump);
205 dev_type_size(raidsize);
206
207 const struct bdevsw raid_bdevsw = {
208 .d_open = raidopen,
209 .d_close = raidclose,
210 .d_strategy = raidstrategy,
211 .d_ioctl = raidioctl,
212 .d_dump = raiddump,
213 .d_psize = raidsize,
214 .d_discard = nodiscard,
215 .d_flag = D_DISK
216 };
217
218 const struct cdevsw raid_cdevsw = {
219 .d_open = raidopen,
220 .d_close = raidclose,
221 .d_read = raidread,
222 .d_write = raidwrite,
223 .d_ioctl = raidioctl,
224 .d_stop = nostop,
225 .d_tty = notty,
226 .d_poll = nopoll,
227 .d_mmap = nommap,
228 .d_kqfilter = nokqfilter,
229 .d_discard = nodiscard,
230 .d_flag = D_DISK
231 };
232
233 static struct dkdriver rf_dkdriver = { raidstrategy, minphys };
234
235 struct raid_softc {
236 device_t sc_dev;
237 int sc_unit;
238 int sc_flags; /* flags */
239 int sc_cflags; /* configuration flags */
240 uint64_t sc_size; /* size of the raid device */
241 char sc_xname[20]; /* XXX external name */
242 struct disk sc_dkdev; /* generic disk device info */
243 struct bufq_state *buf_queue; /* used for the device queue */
244 RF_Raid_t sc_r;
245 LIST_ENTRY(raid_softc) sc_link;
246 };
247 /* sc_flags */
248 #define RAIDF_INITED 0x01 /* unit has been initialized */
249 #define RAIDF_WLABEL 0x02 /* label area is writable */
250 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
251 #define RAIDF_SHUTDOWN 0x08 /* unit is being shutdown */
252 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
253 #define RAIDF_LOCKED 0x80 /* unit is locked */
254
255 #define raidunit(x) DISKUNIT(x)
256
257 extern struct cfdriver raid_cd;
258 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
259 raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
260 DVF_DETACH_SHUTDOWN);
261
262 /*
263 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
264 * Be aware that large numbers can allow the driver to consume a lot of
265 * kernel memory, especially on writes, and in degraded mode reads.
266 *
267 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
268 * a single 64K write will typically require 64K for the old data,
269 * 64K for the old parity, and 64K for the new parity, for a total
270 * of 192K (if the parity buffer is not re-used immediately).
271 * Even it if is used immediately, that's still 128K, which when multiplied
272 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
273 *
274 * Now in degraded mode, for example, a 64K read on the above setup may
275 * require data reconstruction, which will require *all* of the 4 remaining
276 * disks to participate -- 4 * 32K/disk == 128K again.
277 */
278
279 #ifndef RAIDOUTSTANDING
280 #define RAIDOUTSTANDING 6
281 #endif
282
283 #define RAIDLABELDEV(dev) \
284 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
285
286 /* declared here, and made public, for the benefit of KVM stuff.. */
287
288 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
289 struct disklabel *);
290 static void raidgetdisklabel(dev_t);
291 static void raidmakedisklabel(struct raid_softc *);
292
293 static int raidlock(struct raid_softc *);
294 static void raidunlock(struct raid_softc *);
295
296 static int raid_detach_unlocked(struct raid_softc *);
297
298 static void rf_markalldirty(RF_Raid_t *);
299 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
300
301 void rf_ReconThread(struct rf_recon_req *);
302 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
303 void rf_CopybackThread(RF_Raid_t *raidPtr);
304 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
305 int rf_autoconfig(device_t);
306 void rf_buildroothack(RF_ConfigSet_t *);
307
308 RF_AutoConfig_t *rf_find_raid_components(void);
309 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
310 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
311 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
312 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
313 int rf_set_autoconfig(RF_Raid_t *, int);
314 int rf_set_rootpartition(RF_Raid_t *, int);
315 void rf_release_all_vps(RF_ConfigSet_t *);
316 void rf_cleanup_config_set(RF_ConfigSet_t *);
317 int rf_have_enough_components(RF_ConfigSet_t *);
318 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
319 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
320
321 /*
322 * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
323 * Note that this is overridden by having RAID_AUTOCONFIG as an option
324 * in the kernel config file.
325 */
326 #ifdef RAID_AUTOCONFIG
327 int raidautoconfig = 1;
328 #else
329 int raidautoconfig = 0;
330 #endif
331 static bool raidautoconfigdone = false;
332
333 struct RF_Pools_s rf_pools;
334
335 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
336 static kmutex_t raid_lock;
337
338 static struct raid_softc *
339 raidcreate(int unit) {
340 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
341 if (sc == NULL) {
342 #ifdef DIAGNOSTIC
343 printf("%s: out of memory\n", __func__);
344 #endif
345 return NULL;
346 }
347 sc->sc_unit = unit;
348 bufq_alloc(&sc->buf_queue, "fcfs", BUFQ_SORT_RAWBLOCK);
349 return sc;
350 }
351
352 static void
353 raiddestroy(struct raid_softc *sc) {
354 bufq_free(sc->buf_queue);
355 kmem_free(sc, sizeof(*sc));
356 }
357
358 static struct raid_softc *
359 raidget(int unit) {
360 struct raid_softc *sc;
361 if (unit < 0) {
362 #ifdef DIAGNOSTIC
363 panic("%s: unit %d!", __func__, unit);
364 #endif
365 return NULL;
366 }
367 mutex_enter(&raid_lock);
368 LIST_FOREACH(sc, &raids, sc_link) {
369 if (sc->sc_unit == unit) {
370 mutex_exit(&raid_lock);
371 return sc;
372 }
373 }
374 mutex_exit(&raid_lock);
375 if ((sc = raidcreate(unit)) == NULL)
376 return NULL;
377 mutex_enter(&raid_lock);
378 LIST_INSERT_HEAD(&raids, sc, sc_link);
379 mutex_exit(&raid_lock);
380 return sc;
381 }
382
383 static void
384 raidput(struct raid_softc *sc) {
385 mutex_enter(&raid_lock);
386 LIST_REMOVE(sc, sc_link);
387 mutex_exit(&raid_lock);
388 raiddestroy(sc);
389 }
390
391 void
392 raidattach(int num)
393 {
394 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
395 /* This is where all the initialization stuff gets done. */
396
397 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
398 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
399 rf_init_cond2(rf_sparet_wait_cv, "sparetw");
400 rf_init_cond2(rf_sparet_resp_cv, "rfgst");
401
402 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
403 #endif
404
405 if (rf_BootRaidframe() == 0)
406 aprint_verbose("Kernelized RAIDframe activated\n");
407 else
408 panic("Serious error booting RAID!!");
409
410 if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) {
411 aprint_error("raidattach: config_cfattach_attach failed?\n");
412 }
413
414 raidautoconfigdone = false;
415
416 /*
417 * Register a finalizer which will be used to auto-config RAID
418 * sets once all real hardware devices have been found.
419 */
420 if (config_finalize_register(NULL, rf_autoconfig) != 0)
421 aprint_error("WARNING: unable to register RAIDframe finalizer\n");
422 }
423
424 int
425 rf_autoconfig(device_t self)
426 {
427 RF_AutoConfig_t *ac_list;
428 RF_ConfigSet_t *config_sets;
429
430 if (!raidautoconfig || raidautoconfigdone == true)
431 return (0);
432
433 /* XXX This code can only be run once. */
434 raidautoconfigdone = true;
435
436 #ifdef __HAVE_CPU_BOOTCONF
437 /*
438 * 0. find the boot device if needed first so we can use it later
439 * this needs to be done before we autoconfigure any raid sets,
440 * because if we use wedges we are not going to be able to open
441 * the boot device later
442 */
443 if (booted_device == NULL)
444 cpu_bootconf();
445 #endif
446 /* 1. locate all RAID components on the system */
447 aprint_debug("Searching for RAID components...\n");
448 ac_list = rf_find_raid_components();
449
450 /* 2. Sort them into their respective sets. */
451 config_sets = rf_create_auto_sets(ac_list);
452
453 /*
454 * 3. Evaluate each set and configure the valid ones.
455 * This gets done in rf_buildroothack().
456 */
457 rf_buildroothack(config_sets);
458
459 return 1;
460 }
461
462 static int
463 rf_containsboot(RF_Raid_t *r, device_t bdv) {
464 const char *bootname = device_xname(bdv);
465 size_t len = strlen(bootname);
466
467 for (int col = 0; col < r->numCol; col++) {
468 const char *devname = r->Disks[col].devname;
469 devname += sizeof("/dev/") - 1;
470 if (strncmp(devname, "dk", 2) == 0) {
471 const char *parent =
472 dkwedge_get_parent_name(r->Disks[col].dev);
473 if (parent != NULL)
474 devname = parent;
475 }
476 if (strncmp(devname, bootname, len) == 0) {
477 struct raid_softc *sc = r->softc;
478 aprint_debug("raid%d includes boot device %s\n",
479 sc->sc_unit, devname);
480 return 1;
481 }
482 }
483 return 0;
484 }
485
486 void
487 rf_buildroothack(RF_ConfigSet_t *config_sets)
488 {
489 RF_ConfigSet_t *cset;
490 RF_ConfigSet_t *next_cset;
491 int num_root;
492 struct raid_softc *sc, *rsc;
493
494 sc = rsc = NULL;
495 num_root = 0;
496 cset = config_sets;
497 while (cset != NULL) {
498 next_cset = cset->next;
499 if (rf_have_enough_components(cset) &&
500 cset->ac->clabel->autoconfigure == 1) {
501 sc = rf_auto_config_set(cset);
502 if (sc != NULL) {
503 aprint_debug("raid%d: configured ok\n",
504 sc->sc_unit);
505 if (cset->rootable) {
506 rsc = sc;
507 num_root++;
508 }
509 } else {
510 /* The autoconfig didn't work :( */
511 aprint_debug("Autoconfig failed\n");
512 rf_release_all_vps(cset);
513 }
514 } else {
515 /* we're not autoconfiguring this set...
516 release the associated resources */
517 rf_release_all_vps(cset);
518 }
519 /* cleanup */
520 rf_cleanup_config_set(cset);
521 cset = next_cset;
522 }
523
524 /* if the user has specified what the root device should be
525 then we don't touch booted_device or boothowto... */
526
527 if (rootspec != NULL)
528 return;
529
530 /* we found something bootable... */
531
532 /*
533 * XXX: The following code assumes that the root raid
534 * is the first ('a') partition. This is about the best
535 * we can do with a BSD disklabel, but we might be able
536 * to do better with a GPT label, by setting a specified
537 * attribute to indicate the root partition. We can then
538 * stash the partition number in the r->root_partition
539 * high bits (the bottom 2 bits are already used). For
540 * now we just set booted_partition to 0 when we override
541 * root.
542 */
543 if (num_root == 1) {
544 device_t candidate_root;
545 if (rsc->sc_dkdev.dk_nwedges != 0) {
546 char cname[sizeof(cset->ac->devname)];
547 /* XXX: assume 'a' */
548 snprintf(cname, sizeof(cname), "%s%c",
549 device_xname(rsc->sc_dev), 'a');
550 candidate_root = dkwedge_find_by_wname(cname);
551 } else
552 candidate_root = rsc->sc_dev;
553 if (booted_device == NULL ||
554 rsc->sc_r.root_partition == 1 ||
555 rf_containsboot(&rsc->sc_r, booted_device)) {
556 booted_device = candidate_root;
557 booted_partition = 0; /* XXX assume 'a' */
558 }
559 } else if (num_root > 1) {
560
561 /*
562 * Maybe the MD code can help. If it cannot, then
563 * setroot() will discover that we have no
564 * booted_device and will ask the user if nothing was
565 * hardwired in the kernel config file
566 */
567 if (booted_device == NULL)
568 return;
569
570 num_root = 0;
571 mutex_enter(&raid_lock);
572 LIST_FOREACH(sc, &raids, sc_link) {
573 RF_Raid_t *r = &sc->sc_r;
574 if (r->valid == 0)
575 continue;
576
577 if (r->root_partition == 0)
578 continue;
579
580 if (rf_containsboot(r, booted_device)) {
581 num_root++;
582 rsc = sc;
583 }
584 }
585 mutex_exit(&raid_lock);
586
587 if (num_root == 1) {
588 booted_device = rsc->sc_dev;
589 booted_partition = 0; /* XXX assume 'a' */
590 } else {
591 /* we can't guess.. require the user to answer... */
592 boothowto |= RB_ASKNAME;
593 }
594 }
595 }
596
597
598 int
599 raidsize(dev_t dev)
600 {
601 struct raid_softc *rs;
602 struct disklabel *lp;
603 int part, unit, omask, size;
604
605 unit = raidunit(dev);
606 if ((rs = raidget(unit)) == NULL)
607 return -1;
608 if ((rs->sc_flags & RAIDF_INITED) == 0)
609 return (-1);
610
611 part = DISKPART(dev);
612 omask = rs->sc_dkdev.dk_openmask & (1 << part);
613 lp = rs->sc_dkdev.dk_label;
614
615 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
616 return (-1);
617
618 if (lp->d_partitions[part].p_fstype != FS_SWAP)
619 size = -1;
620 else
621 size = lp->d_partitions[part].p_size *
622 (lp->d_secsize / DEV_BSIZE);
623
624 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
625 return (-1);
626
627 return (size);
628
629 }
630
631 int
632 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
633 {
634 int unit = raidunit(dev);
635 struct raid_softc *rs;
636 const struct bdevsw *bdev;
637 struct disklabel *lp;
638 RF_Raid_t *raidPtr;
639 daddr_t offset;
640 int part, c, sparecol, j, scol, dumpto;
641 int error = 0;
642
643 if ((rs = raidget(unit)) == NULL)
644 return ENXIO;
645
646 raidPtr = &rs->sc_r;
647
648 if ((rs->sc_flags & RAIDF_INITED) == 0)
649 return ENXIO;
650
651 /* we only support dumping to RAID 1 sets */
652 if (raidPtr->Layout.numDataCol != 1 ||
653 raidPtr->Layout.numParityCol != 1)
654 return EINVAL;
655
656
657 if ((error = raidlock(rs)) != 0)
658 return error;
659
660 if (size % DEV_BSIZE != 0) {
661 error = EINVAL;
662 goto out;
663 }
664
665 if (blkno + size / DEV_BSIZE > rs->sc_size) {
666 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
667 "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
668 size / DEV_BSIZE, rs->sc_size);
669 error = EINVAL;
670 goto out;
671 }
672
673 part = DISKPART(dev);
674 lp = rs->sc_dkdev.dk_label;
675 offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
676
677 /* figure out what device is alive.. */
678
679 /*
680 Look for a component to dump to. The preference for the
681 component to dump to is as follows:
682 1) the master
683 2) a used_spare of the master
684 3) the slave
685 4) a used_spare of the slave
686 */
687
688 dumpto = -1;
689 for (c = 0; c < raidPtr->numCol; c++) {
690 if (raidPtr->Disks[c].status == rf_ds_optimal) {
691 /* this might be the one */
692 dumpto = c;
693 break;
694 }
695 }
696
697 /*
698 At this point we have possibly selected a live master or a
699 live slave. We now check to see if there is a spared
700 master (or a spared slave), if we didn't find a live master
701 or a live slave.
702 */
703
704 for (c = 0; c < raidPtr->numSpare; c++) {
705 sparecol = raidPtr->numCol + c;
706 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
707 /* How about this one? */
708 scol = -1;
709 for(j=0;j<raidPtr->numCol;j++) {
710 if (raidPtr->Disks[j].spareCol == sparecol) {
711 scol = j;
712 break;
713 }
714 }
715 if (scol == 0) {
716 /*
717 We must have found a spared master!
718 We'll take that over anything else
719 found so far. (We couldn't have
720 found a real master before, since
721 this is a used spare, and it's
722 saying that it's replacing the
723 master.) On reboot (with
724 autoconfiguration turned on)
725 sparecol will become the 1st
726 component (component0) of this set.
727 */
728 dumpto = sparecol;
729 break;
730 } else if (scol != -1) {
731 /*
732 Must be a spared slave. We'll dump
733 to that if we havn't found anything
734 else so far.
735 */
736 if (dumpto == -1)
737 dumpto = sparecol;
738 }
739 }
740 }
741
742 if (dumpto == -1) {
743 /* we couldn't find any live components to dump to!?!?
744 */
745 error = EINVAL;
746 goto out;
747 }
748
749 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
750
751 /*
752 Note that blkno is relative to this particular partition.
753 By adding the offset of this partition in the RAID
754 set, and also adding RF_PROTECTED_SECTORS, we get a
755 value that is relative to the partition used for the
756 underlying component.
757 */
758
759 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
760 blkno + offset, va, size);
761
762 out:
763 raidunlock(rs);
764
765 return error;
766 }
767 /* ARGSUSED */
768 int
769 raidopen(dev_t dev, int flags, int fmt,
770 struct lwp *l)
771 {
772 int unit = raidunit(dev);
773 struct raid_softc *rs;
774 struct disklabel *lp;
775 int part, pmask;
776 int error = 0;
777
778 if ((rs = raidget(unit)) == NULL)
779 return ENXIO;
780 if ((error = raidlock(rs)) != 0)
781 return (error);
782
783 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
784 error = EBUSY;
785 goto bad;
786 }
787
788 lp = rs->sc_dkdev.dk_label;
789
790 part = DISKPART(dev);
791
792 /*
793 * If there are wedges, and this is not RAW_PART, then we
794 * need to fail.
795 */
796 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
797 error = EBUSY;
798 goto bad;
799 }
800 pmask = (1 << part);
801
802 if ((rs->sc_flags & RAIDF_INITED) &&
803 (rs->sc_dkdev.dk_nwedges == 0) &&
804 (rs->sc_dkdev.dk_openmask == 0))
805 raidgetdisklabel(dev);
806
807 /* make sure that this partition exists */
808
809 if (part != RAW_PART) {
810 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
811 ((part >= lp->d_npartitions) ||
812 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
813 error = ENXIO;
814 goto bad;
815 }
816 }
817 /* Prevent this unit from being unconfigured while open. */
818 switch (fmt) {
819 case S_IFCHR:
820 rs->sc_dkdev.dk_copenmask |= pmask;
821 break;
822
823 case S_IFBLK:
824 rs->sc_dkdev.dk_bopenmask |= pmask;
825 break;
826 }
827
828 if ((rs->sc_dkdev.dk_openmask == 0) &&
829 ((rs->sc_flags & RAIDF_INITED) != 0)) {
830 /* First one... mark things as dirty... Note that we *MUST*
831 have done a configure before this. I DO NOT WANT TO BE
832 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
833 THAT THEY BELONG TOGETHER!!!!! */
834 /* XXX should check to see if we're only open for reading
835 here... If so, we needn't do this, but then need some
836 other way of keeping track of what's happened.. */
837
838 rf_markalldirty(&rs->sc_r);
839 }
840
841
842 rs->sc_dkdev.dk_openmask =
843 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
844
845 bad:
846 raidunlock(rs);
847
848 return (error);
849
850
851 }
852 /* ARGSUSED */
853 int
854 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
855 {
856 int unit = raidunit(dev);
857 struct raid_softc *rs;
858 int error = 0;
859 int part;
860
861 if ((rs = raidget(unit)) == NULL)
862 return ENXIO;
863
864 if ((error = raidlock(rs)) != 0)
865 return (error);
866
867 part = DISKPART(dev);
868
869 /* ...that much closer to allowing unconfiguration... */
870 switch (fmt) {
871 case S_IFCHR:
872 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
873 break;
874
875 case S_IFBLK:
876 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
877 break;
878 }
879 rs->sc_dkdev.dk_openmask =
880 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
881
882 if ((rs->sc_dkdev.dk_openmask == 0) &&
883 ((rs->sc_flags & RAIDF_INITED) != 0)) {
884 /* Last one... device is not unconfigured yet.
885 Device shutdown has taken care of setting the
886 clean bits if RAIDF_INITED is not set
887 mark things as clean... */
888
889 rf_update_component_labels(&rs->sc_r,
890 RF_FINAL_COMPONENT_UPDATE);
891
892 /* If the kernel is shutting down, it will detach
893 * this RAID set soon enough.
894 */
895 }
896
897 raidunlock(rs);
898 return (0);
899
900 }
901
902 void
903 raidstrategy(struct buf *bp)
904 {
905 unsigned int unit = raidunit(bp->b_dev);
906 RF_Raid_t *raidPtr;
907 int wlabel;
908 struct raid_softc *rs;
909
910 if ((rs = raidget(unit)) == NULL) {
911 bp->b_error = ENXIO;
912 goto done;
913 }
914 if ((rs->sc_flags & RAIDF_INITED) == 0) {
915 bp->b_error = ENXIO;
916 goto done;
917 }
918 raidPtr = &rs->sc_r;
919 if (!raidPtr->valid) {
920 bp->b_error = ENODEV;
921 goto done;
922 }
923 if (bp->b_bcount == 0) {
924 db1_printf(("b_bcount is zero..\n"));
925 goto done;
926 }
927
928 /*
929 * Do bounds checking and adjust transfer. If there's an
930 * error, the bounds check will flag that for us.
931 */
932
933 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
934 if (DISKPART(bp->b_dev) == RAW_PART) {
935 uint64_t size; /* device size in DEV_BSIZE unit */
936
937 if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
938 size = raidPtr->totalSectors <<
939 (raidPtr->logBytesPerSector - DEV_BSHIFT);
940 } else {
941 size = raidPtr->totalSectors >>
942 (DEV_BSHIFT - raidPtr->logBytesPerSector);
943 }
944 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
945 goto done;
946 }
947 } else {
948 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
949 db1_printf(("Bounds check failed!!:%d %d\n",
950 (int) bp->b_blkno, (int) wlabel));
951 goto done;
952 }
953 }
954
955 rf_lock_mutex2(raidPtr->iodone_lock);
956
957 bp->b_resid = 0;
958
959 /* stuff it onto our queue */
960 bufq_put(rs->buf_queue, bp);
961
962 /* scheduled the IO to happen at the next convenient time */
963 rf_signal_cond2(raidPtr->iodone_cv);
964 rf_unlock_mutex2(raidPtr->iodone_lock);
965
966 return;
967
968 done:
969 bp->b_resid = bp->b_bcount;
970 biodone(bp);
971 }
972 /* ARGSUSED */
973 int
974 raidread(dev_t dev, struct uio *uio, int flags)
975 {
976 int unit = raidunit(dev);
977 struct raid_softc *rs;
978
979 if ((rs = raidget(unit)) == NULL)
980 return ENXIO;
981
982 if ((rs->sc_flags & RAIDF_INITED) == 0)
983 return (ENXIO);
984
985 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
986
987 }
988 /* ARGSUSED */
989 int
990 raidwrite(dev_t dev, struct uio *uio, int flags)
991 {
992 int unit = raidunit(dev);
993 struct raid_softc *rs;
994
995 if ((rs = raidget(unit)) == NULL)
996 return ENXIO;
997
998 if ((rs->sc_flags & RAIDF_INITED) == 0)
999 return (ENXIO);
1000
1001 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
1002
1003 }
1004
1005 static int
1006 raid_detach_unlocked(struct raid_softc *rs)
1007 {
1008 int error;
1009 RF_Raid_t *raidPtr;
1010
1011 raidPtr = &rs->sc_r;
1012
1013 /*
1014 * If somebody has a partition mounted, we shouldn't
1015 * shutdown.
1016 */
1017 if (rs->sc_dkdev.dk_openmask != 0)
1018 return EBUSY;
1019
1020 if ((rs->sc_flags & RAIDF_INITED) == 0)
1021 ; /* not initialized: nothing to do */
1022 else if ((error = rf_Shutdown(raidPtr)) != 0)
1023 return error;
1024 else
1025 rs->sc_flags &= ~(RAIDF_INITED|RAIDF_SHUTDOWN);
1026
1027 /* Detach the disk. */
1028 dkwedge_delall(&rs->sc_dkdev);
1029 disk_detach(&rs->sc_dkdev);
1030 disk_destroy(&rs->sc_dkdev);
1031
1032 aprint_normal_dev(rs->sc_dev, "detached\n");
1033
1034 return 0;
1035 }
1036
1037 int
1038 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1039 {
1040 int unit = raidunit(dev);
1041 int error = 0;
1042 int part, pmask, s;
1043 cfdata_t cf;
1044 struct raid_softc *rs;
1045 RF_Config_t *k_cfg, *u_cfg;
1046 RF_Raid_t *raidPtr;
1047 RF_RaidDisk_t *diskPtr;
1048 RF_AccTotals_t *totals;
1049 RF_DeviceConfig_t *d_cfg, **ucfgp;
1050 u_char *specific_buf;
1051 int retcode = 0;
1052 int column;
1053 /* int raidid; */
1054 struct rf_recon_req *rrcopy, *rr;
1055 RF_ComponentLabel_t *clabel;
1056 RF_ComponentLabel_t *ci_label;
1057 RF_ComponentLabel_t **clabel_ptr;
1058 RF_SingleComponent_t *sparePtr,*componentPtr;
1059 RF_SingleComponent_t component;
1060 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
1061 int i, j, d;
1062 #ifdef __HAVE_OLD_DISKLABEL
1063 struct disklabel newlabel;
1064 #endif
1065 struct dkwedge_info *dkw;
1066
1067 if ((rs = raidget(unit)) == NULL)
1068 return ENXIO;
1069 raidPtr = &rs->sc_r;
1070
1071 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1072 (int) DISKPART(dev), (int) unit, cmd));
1073
1074 /* Must be open for writes for these commands... */
1075 switch (cmd) {
1076 #ifdef DIOCGSECTORSIZE
1077 case DIOCGSECTORSIZE:
1078 *(u_int *)data = raidPtr->bytesPerSector;
1079 return 0;
1080 case DIOCGMEDIASIZE:
1081 *(off_t *)data =
1082 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
1083 return 0;
1084 #endif
1085 case DIOCSDINFO:
1086 case DIOCWDINFO:
1087 #ifdef __HAVE_OLD_DISKLABEL
1088 case ODIOCWDINFO:
1089 case ODIOCSDINFO:
1090 #endif
1091 case DIOCWLABEL:
1092 case DIOCAWEDGE:
1093 case DIOCDWEDGE:
1094 case DIOCMWEDGES:
1095 case DIOCSSTRATEGY:
1096 if ((flag & FWRITE) == 0)
1097 return (EBADF);
1098 }
1099
1100 /* Must be initialized for these... */
1101 switch (cmd) {
1102 case DIOCGDINFO:
1103 case DIOCSDINFO:
1104 case DIOCWDINFO:
1105 #ifdef __HAVE_OLD_DISKLABEL
1106 case ODIOCGDINFO:
1107 case ODIOCWDINFO:
1108 case ODIOCSDINFO:
1109 case ODIOCGDEFLABEL:
1110 #endif
1111 case DIOCGPART:
1112 case DIOCWLABEL:
1113 case DIOCGDEFLABEL:
1114 case DIOCAWEDGE:
1115 case DIOCDWEDGE:
1116 case DIOCLWEDGES:
1117 case DIOCMWEDGES:
1118 case DIOCCACHESYNC:
1119 case RAIDFRAME_SHUTDOWN:
1120 case RAIDFRAME_REWRITEPARITY:
1121 case RAIDFRAME_GET_INFO:
1122 case RAIDFRAME_RESET_ACCTOTALS:
1123 case RAIDFRAME_GET_ACCTOTALS:
1124 case RAIDFRAME_KEEP_ACCTOTALS:
1125 case RAIDFRAME_GET_SIZE:
1126 case RAIDFRAME_FAIL_DISK:
1127 case RAIDFRAME_COPYBACK:
1128 case RAIDFRAME_CHECK_RECON_STATUS:
1129 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1130 case RAIDFRAME_GET_COMPONENT_LABEL:
1131 case RAIDFRAME_SET_COMPONENT_LABEL:
1132 case RAIDFRAME_ADD_HOT_SPARE:
1133 case RAIDFRAME_REMOVE_HOT_SPARE:
1134 case RAIDFRAME_INIT_LABELS:
1135 case RAIDFRAME_REBUILD_IN_PLACE:
1136 case RAIDFRAME_CHECK_PARITY:
1137 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1138 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1139 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1140 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1141 case RAIDFRAME_SET_AUTOCONFIG:
1142 case RAIDFRAME_SET_ROOT:
1143 case RAIDFRAME_DELETE_COMPONENT:
1144 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1145 case RAIDFRAME_PARITYMAP_STATUS:
1146 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1147 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1148 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1149 case DIOCGSTRATEGY:
1150 case DIOCSSTRATEGY:
1151 if ((rs->sc_flags & RAIDF_INITED) == 0)
1152 return (ENXIO);
1153 }
1154
1155 switch (cmd) {
1156 #ifdef COMPAT_50
1157 case RAIDFRAME_GET_INFO50:
1158 return rf_get_info50(raidPtr, data);
1159
1160 case RAIDFRAME_CONFIGURE50:
1161 if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0)
1162 return retcode;
1163 goto config;
1164 #endif
1165 /* configure the system */
1166 case RAIDFRAME_CONFIGURE:
1167
1168 if (raidPtr->valid) {
1169 /* There is a valid RAID set running on this unit! */
1170 printf("raid%d: Device already configured!\n",unit);
1171 return(EINVAL);
1172 }
1173
1174 /* copy-in the configuration information */
1175 /* data points to a pointer to the configuration structure */
1176
1177 u_cfg = *((RF_Config_t **) data);
1178 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1179 if (k_cfg == NULL) {
1180 return (ENOMEM);
1181 }
1182 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1183 if (retcode) {
1184 RF_Free(k_cfg, sizeof(RF_Config_t));
1185 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1186 retcode));
1187 return (retcode);
1188 }
1189 goto config;
1190 config:
1191 /* allocate a buffer for the layout-specific data, and copy it
1192 * in */
1193 if (k_cfg->layoutSpecificSize) {
1194 if (k_cfg->layoutSpecificSize > 10000) {
1195 /* sanity check */
1196 RF_Free(k_cfg, sizeof(RF_Config_t));
1197 return (EINVAL);
1198 }
1199 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1200 (u_char *));
1201 if (specific_buf == NULL) {
1202 RF_Free(k_cfg, sizeof(RF_Config_t));
1203 return (ENOMEM);
1204 }
1205 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1206 k_cfg->layoutSpecificSize);
1207 if (retcode) {
1208 RF_Free(k_cfg, sizeof(RF_Config_t));
1209 RF_Free(specific_buf,
1210 k_cfg->layoutSpecificSize);
1211 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1212 retcode));
1213 return (retcode);
1214 }
1215 } else
1216 specific_buf = NULL;
1217 k_cfg->layoutSpecific = specific_buf;
1218
1219 /* should do some kind of sanity check on the configuration.
1220 * Store the sum of all the bytes in the last byte? */
1221
1222 /* configure the system */
1223
1224 /*
1225 * Clear the entire RAID descriptor, just to make sure
1226 * there is no stale data left in the case of a
1227 * reconfiguration
1228 */
1229 memset(raidPtr, 0, sizeof(*raidPtr));
1230 raidPtr->softc = rs;
1231 raidPtr->raidid = unit;
1232
1233 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1234
1235 if (retcode == 0) {
1236
1237 /* allow this many simultaneous IO's to
1238 this RAID device */
1239 raidPtr->openings = RAIDOUTSTANDING;
1240
1241 raidinit(rs);
1242 rf_markalldirty(raidPtr);
1243 }
1244 /* free the buffers. No return code here. */
1245 if (k_cfg->layoutSpecificSize) {
1246 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1247 }
1248 RF_Free(k_cfg, sizeof(RF_Config_t));
1249
1250 return (retcode);
1251
1252 /* shutdown the system */
1253 case RAIDFRAME_SHUTDOWN:
1254
1255 part = DISKPART(dev);
1256 pmask = (1 << part);
1257
1258 if ((error = raidlock(rs)) != 0)
1259 return (error);
1260
1261 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
1262 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1263 (rs->sc_dkdev.dk_copenmask & pmask)))
1264 retcode = EBUSY;
1265 else {
1266 rs->sc_flags |= RAIDF_SHUTDOWN;
1267 rs->sc_dkdev.dk_copenmask &= ~pmask;
1268 rs->sc_dkdev.dk_bopenmask &= ~pmask;
1269 rs->sc_dkdev.dk_openmask &= ~pmask;
1270 retcode = 0;
1271 }
1272
1273 raidunlock(rs);
1274
1275 if (retcode != 0)
1276 return retcode;
1277
1278 /* free the pseudo device attach bits */
1279
1280 cf = device_cfdata(rs->sc_dev);
1281 if ((retcode = config_detach(rs->sc_dev, DETACH_QUIET)) == 0)
1282 free(cf, M_RAIDFRAME);
1283
1284 return (retcode);
1285 case RAIDFRAME_GET_COMPONENT_LABEL:
1286 clabel_ptr = (RF_ComponentLabel_t **) data;
1287 /* need to read the component label for the disk indicated
1288 by row,column in clabel */
1289
1290 /*
1291 * Perhaps there should be an option to skip the in-core
1292 * copy and hit the disk, as with disklabel(8).
1293 */
1294 RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *));
1295
1296 retcode = copyin(*clabel_ptr, clabel, sizeof(*clabel));
1297
1298 if (retcode) {
1299 RF_Free(clabel, sizeof(*clabel));
1300 return retcode;
1301 }
1302
1303 clabel->row = 0; /* Don't allow looking at anything else.*/
1304
1305 column = clabel->column;
1306
1307 if ((column < 0) || (column >= raidPtr->numCol +
1308 raidPtr->numSpare)) {
1309 RF_Free(clabel, sizeof(*clabel));
1310 return EINVAL;
1311 }
1312
1313 RF_Free(clabel, sizeof(*clabel));
1314
1315 clabel = raidget_component_label(raidPtr, column);
1316
1317 return copyout(clabel, *clabel_ptr, sizeof(**clabel_ptr));
1318
1319 #if 0
1320 case RAIDFRAME_SET_COMPONENT_LABEL:
1321 clabel = (RF_ComponentLabel_t *) data;
1322
1323 /* XXX check the label for valid stuff... */
1324 /* Note that some things *should not* get modified --
1325 the user should be re-initing the labels instead of
1326 trying to patch things.
1327 */
1328
1329 raidid = raidPtr->raidid;
1330 #ifdef DEBUG
1331 printf("raid%d: Got component label:\n", raidid);
1332 printf("raid%d: Version: %d\n", raidid, clabel->version);
1333 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1334 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1335 printf("raid%d: Column: %d\n", raidid, clabel->column);
1336 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1337 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1338 printf("raid%d: Status: %d\n", raidid, clabel->status);
1339 #endif
1340 clabel->row = 0;
1341 column = clabel->column;
1342
1343 if ((column < 0) || (column >= raidPtr->numCol)) {
1344 return(EINVAL);
1345 }
1346
1347 /* XXX this isn't allowed to do anything for now :-) */
1348
1349 /* XXX and before it is, we need to fill in the rest
1350 of the fields!?!?!?! */
1351 memcpy(raidget_component_label(raidPtr, column),
1352 clabel, sizeof(*clabel));
1353 raidflush_component_label(raidPtr, column);
1354 return (0);
1355 #endif
1356
1357 case RAIDFRAME_INIT_LABELS:
1358 clabel = (RF_ComponentLabel_t *) data;
1359 /*
1360 we only want the serial number from
1361 the above. We get all the rest of the information
1362 from the config that was used to create this RAID
1363 set.
1364 */
1365
1366 raidPtr->serial_number = clabel->serial_number;
1367
1368 for(column=0;column<raidPtr->numCol;column++) {
1369 diskPtr = &raidPtr->Disks[column];
1370 if (!RF_DEAD_DISK(diskPtr->status)) {
1371 ci_label = raidget_component_label(raidPtr,
1372 column);
1373 /* Zeroing this is important. */
1374 memset(ci_label, 0, sizeof(*ci_label));
1375 raid_init_component_label(raidPtr, ci_label);
1376 ci_label->serial_number =
1377 raidPtr->serial_number;
1378 ci_label->row = 0; /* we dont' pretend to support more */
1379 rf_component_label_set_partitionsize(ci_label,
1380 diskPtr->partitionSize);
1381 ci_label->column = column;
1382 raidflush_component_label(raidPtr, column);
1383 }
1384 /* XXXjld what about the spares? */
1385 }
1386
1387 return (retcode);
1388 case RAIDFRAME_SET_AUTOCONFIG:
1389 d = rf_set_autoconfig(raidPtr, *(int *) data);
1390 printf("raid%d: New autoconfig value is: %d\n",
1391 raidPtr->raidid, d);
1392 *(int *) data = d;
1393 return (retcode);
1394
1395 case RAIDFRAME_SET_ROOT:
1396 d = rf_set_rootpartition(raidPtr, *(int *) data);
1397 printf("raid%d: New rootpartition value is: %d\n",
1398 raidPtr->raidid, d);
1399 *(int *) data = d;
1400 return (retcode);
1401
1402 /* initialize all parity */
1403 case RAIDFRAME_REWRITEPARITY:
1404
1405 if (raidPtr->Layout.map->faultsTolerated == 0) {
1406 /* Parity for RAID 0 is trivially correct */
1407 raidPtr->parity_good = RF_RAID_CLEAN;
1408 return(0);
1409 }
1410
1411 if (raidPtr->parity_rewrite_in_progress == 1) {
1412 /* Re-write is already in progress! */
1413 return(EINVAL);
1414 }
1415
1416 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1417 rf_RewriteParityThread,
1418 raidPtr,"raid_parity");
1419 return (retcode);
1420
1421
1422 case RAIDFRAME_ADD_HOT_SPARE:
1423 sparePtr = (RF_SingleComponent_t *) data;
1424 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1425 retcode = rf_add_hot_spare(raidPtr, &component);
1426 return(retcode);
1427
1428 case RAIDFRAME_REMOVE_HOT_SPARE:
1429 return(retcode);
1430
1431 case RAIDFRAME_DELETE_COMPONENT:
1432 componentPtr = (RF_SingleComponent_t *)data;
1433 memcpy( &component, componentPtr,
1434 sizeof(RF_SingleComponent_t));
1435 retcode = rf_delete_component(raidPtr, &component);
1436 return(retcode);
1437
1438 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1439 componentPtr = (RF_SingleComponent_t *)data;
1440 memcpy( &component, componentPtr,
1441 sizeof(RF_SingleComponent_t));
1442 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1443 return(retcode);
1444
1445 case RAIDFRAME_REBUILD_IN_PLACE:
1446
1447 if (raidPtr->Layout.map->faultsTolerated == 0) {
1448 /* Can't do this on a RAID 0!! */
1449 return(EINVAL);
1450 }
1451
1452 if (raidPtr->recon_in_progress == 1) {
1453 /* a reconstruct is already in progress! */
1454 return(EINVAL);
1455 }
1456
1457 componentPtr = (RF_SingleComponent_t *) data;
1458 memcpy( &component, componentPtr,
1459 sizeof(RF_SingleComponent_t));
1460 component.row = 0; /* we don't support any more */
1461 column = component.column;
1462
1463 if ((column < 0) || (column >= raidPtr->numCol)) {
1464 return(EINVAL);
1465 }
1466
1467 rf_lock_mutex2(raidPtr->mutex);
1468 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1469 (raidPtr->numFailures > 0)) {
1470 /* XXX 0 above shouldn't be constant!!! */
1471 /* some component other than this has failed.
1472 Let's not make things worse than they already
1473 are... */
1474 printf("raid%d: Unable to reconstruct to disk at:\n",
1475 raidPtr->raidid);
1476 printf("raid%d: Col: %d Too many failures.\n",
1477 raidPtr->raidid, column);
1478 rf_unlock_mutex2(raidPtr->mutex);
1479 return (EINVAL);
1480 }
1481 if (raidPtr->Disks[column].status ==
1482 rf_ds_reconstructing) {
1483 printf("raid%d: Unable to reconstruct to disk at:\n",
1484 raidPtr->raidid);
1485 printf("raid%d: Col: %d Reconstruction already occurring!\n", raidPtr->raidid, column);
1486
1487 rf_unlock_mutex2(raidPtr->mutex);
1488 return (EINVAL);
1489 }
1490 if (raidPtr->Disks[column].status == rf_ds_spared) {
1491 rf_unlock_mutex2(raidPtr->mutex);
1492 return (EINVAL);
1493 }
1494 rf_unlock_mutex2(raidPtr->mutex);
1495
1496 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1497 if (rrcopy == NULL)
1498 return(ENOMEM);
1499
1500 rrcopy->raidPtr = (void *) raidPtr;
1501 rrcopy->col = column;
1502
1503 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1504 rf_ReconstructInPlaceThread,
1505 rrcopy,"raid_reconip");
1506 return(retcode);
1507
1508 case RAIDFRAME_GET_INFO:
1509 if (!raidPtr->valid)
1510 return (ENODEV);
1511 ucfgp = (RF_DeviceConfig_t **) data;
1512 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1513 (RF_DeviceConfig_t *));
1514 if (d_cfg == NULL)
1515 return (ENOMEM);
1516 d_cfg->rows = 1; /* there is only 1 row now */
1517 d_cfg->cols = raidPtr->numCol;
1518 d_cfg->ndevs = raidPtr->numCol;
1519 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1520 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1521 return (ENOMEM);
1522 }
1523 d_cfg->nspares = raidPtr->numSpare;
1524 if (d_cfg->nspares >= RF_MAX_DISKS) {
1525 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1526 return (ENOMEM);
1527 }
1528 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1529 d = 0;
1530 for (j = 0; j < d_cfg->cols; j++) {
1531 d_cfg->devs[d] = raidPtr->Disks[j];
1532 d++;
1533 }
1534 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1535 d_cfg->spares[i] = raidPtr->Disks[j];
1536 if (d_cfg->spares[i].status == rf_ds_rebuilding_spare) {
1537 /* XXX: raidctl(8) expects to see this as a used spare */
1538 d_cfg->spares[i].status = rf_ds_used_spare;
1539 }
1540 }
1541 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1542 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1543
1544 return (retcode);
1545
1546 case RAIDFRAME_CHECK_PARITY:
1547 *(int *) data = raidPtr->parity_good;
1548 return (0);
1549
1550 case RAIDFRAME_PARITYMAP_STATUS:
1551 if (rf_paritymap_ineligible(raidPtr))
1552 return EINVAL;
1553 rf_paritymap_status(raidPtr->parity_map,
1554 (struct rf_pmstat *)data);
1555 return 0;
1556
1557 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1558 if (rf_paritymap_ineligible(raidPtr))
1559 return EINVAL;
1560 if (raidPtr->parity_map == NULL)
1561 return ENOENT; /* ??? */
1562 if (0 != rf_paritymap_set_params(raidPtr->parity_map,
1563 (struct rf_pmparams *)data, 1))
1564 return EINVAL;
1565 return 0;
1566
1567 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1568 if (rf_paritymap_ineligible(raidPtr))
1569 return EINVAL;
1570 *(int *) data = rf_paritymap_get_disable(raidPtr);
1571 return 0;
1572
1573 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1574 if (rf_paritymap_ineligible(raidPtr))
1575 return EINVAL;
1576 rf_paritymap_set_disable(raidPtr, *(int *)data);
1577 /* XXX should errors be passed up? */
1578 return 0;
1579
1580 case RAIDFRAME_RESET_ACCTOTALS:
1581 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1582 return (0);
1583
1584 case RAIDFRAME_GET_ACCTOTALS:
1585 totals = (RF_AccTotals_t *) data;
1586 *totals = raidPtr->acc_totals;
1587 return (0);
1588
1589 case RAIDFRAME_KEEP_ACCTOTALS:
1590 raidPtr->keep_acc_totals = *(int *)data;
1591 return (0);
1592
1593 case RAIDFRAME_GET_SIZE:
1594 *(int *) data = raidPtr->totalSectors;
1595 return (0);
1596
1597 /* fail a disk & optionally start reconstruction */
1598 case RAIDFRAME_FAIL_DISK:
1599
1600 if (raidPtr->Layout.map->faultsTolerated == 0) {
1601 /* Can't do this on a RAID 0!! */
1602 return(EINVAL);
1603 }
1604
1605 rr = (struct rf_recon_req *) data;
1606 rr->row = 0;
1607 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1608 return (EINVAL);
1609
1610
1611 rf_lock_mutex2(raidPtr->mutex);
1612 if (raidPtr->status == rf_rs_reconstructing) {
1613 /* you can't fail a disk while we're reconstructing! */
1614 /* XXX wrong for RAID6 */
1615 rf_unlock_mutex2(raidPtr->mutex);
1616 return (EINVAL);
1617 }
1618 if ((raidPtr->Disks[rr->col].status ==
1619 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1620 /* some other component has failed. Let's not make
1621 things worse. XXX wrong for RAID6 */
1622 rf_unlock_mutex2(raidPtr->mutex);
1623 return (EINVAL);
1624 }
1625 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1626 /* Can't fail a spared disk! */
1627 rf_unlock_mutex2(raidPtr->mutex);
1628 return (EINVAL);
1629 }
1630 rf_unlock_mutex2(raidPtr->mutex);
1631
1632 /* make a copy of the recon request so that we don't rely on
1633 * the user's buffer */
1634 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1635 if (rrcopy == NULL)
1636 return(ENOMEM);
1637 memcpy(rrcopy, rr, sizeof(*rr));
1638 rrcopy->raidPtr = (void *) raidPtr;
1639
1640 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1641 rf_ReconThread,
1642 rrcopy,"raid_recon");
1643 return (0);
1644
1645 /* invoke a copyback operation after recon on whatever disk
1646 * needs it, if any */
1647 case RAIDFRAME_COPYBACK:
1648
1649 if (raidPtr->Layout.map->faultsTolerated == 0) {
1650 /* This makes no sense on a RAID 0!! */
1651 return(EINVAL);
1652 }
1653
1654 if (raidPtr->copyback_in_progress == 1) {
1655 /* Copyback is already in progress! */
1656 return(EINVAL);
1657 }
1658
1659 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1660 rf_CopybackThread,
1661 raidPtr,"raid_copyback");
1662 return (retcode);
1663
1664 /* return the percentage completion of reconstruction */
1665 case RAIDFRAME_CHECK_RECON_STATUS:
1666 if (raidPtr->Layout.map->faultsTolerated == 0) {
1667 /* This makes no sense on a RAID 0, so tell the
1668 user it's done. */
1669 *(int *) data = 100;
1670 return(0);
1671 }
1672 if (raidPtr->status != rf_rs_reconstructing)
1673 *(int *) data = 100;
1674 else {
1675 if (raidPtr->reconControl->numRUsTotal > 0) {
1676 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1677 } else {
1678 *(int *) data = 0;
1679 }
1680 }
1681 return (0);
1682 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1683 progressInfoPtr = (RF_ProgressInfo_t **) data;
1684 if (raidPtr->status != rf_rs_reconstructing) {
1685 progressInfo.remaining = 0;
1686 progressInfo.completed = 100;
1687 progressInfo.total = 100;
1688 } else {
1689 progressInfo.total =
1690 raidPtr->reconControl->numRUsTotal;
1691 progressInfo.completed =
1692 raidPtr->reconControl->numRUsComplete;
1693 progressInfo.remaining = progressInfo.total -
1694 progressInfo.completed;
1695 }
1696 retcode = copyout(&progressInfo, *progressInfoPtr,
1697 sizeof(RF_ProgressInfo_t));
1698 return (retcode);
1699
1700 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1701 if (raidPtr->Layout.map->faultsTolerated == 0) {
1702 /* This makes no sense on a RAID 0, so tell the
1703 user it's done. */
1704 *(int *) data = 100;
1705 return(0);
1706 }
1707 if (raidPtr->parity_rewrite_in_progress == 1) {
1708 *(int *) data = 100 *
1709 raidPtr->parity_rewrite_stripes_done /
1710 raidPtr->Layout.numStripe;
1711 } else {
1712 *(int *) data = 100;
1713 }
1714 return (0);
1715
1716 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1717 progressInfoPtr = (RF_ProgressInfo_t **) data;
1718 if (raidPtr->parity_rewrite_in_progress == 1) {
1719 progressInfo.total = raidPtr->Layout.numStripe;
1720 progressInfo.completed =
1721 raidPtr->parity_rewrite_stripes_done;
1722 progressInfo.remaining = progressInfo.total -
1723 progressInfo.completed;
1724 } else {
1725 progressInfo.remaining = 0;
1726 progressInfo.completed = 100;
1727 progressInfo.total = 100;
1728 }
1729 retcode = copyout(&progressInfo, *progressInfoPtr,
1730 sizeof(RF_ProgressInfo_t));
1731 return (retcode);
1732
1733 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1734 if (raidPtr->Layout.map->faultsTolerated == 0) {
1735 /* This makes no sense on a RAID 0 */
1736 *(int *) data = 100;
1737 return(0);
1738 }
1739 if (raidPtr->copyback_in_progress == 1) {
1740 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1741 raidPtr->Layout.numStripe;
1742 } else {
1743 *(int *) data = 100;
1744 }
1745 return (0);
1746
1747 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1748 progressInfoPtr = (RF_ProgressInfo_t **) data;
1749 if (raidPtr->copyback_in_progress == 1) {
1750 progressInfo.total = raidPtr->Layout.numStripe;
1751 progressInfo.completed =
1752 raidPtr->copyback_stripes_done;
1753 progressInfo.remaining = progressInfo.total -
1754 progressInfo.completed;
1755 } else {
1756 progressInfo.remaining = 0;
1757 progressInfo.completed = 100;
1758 progressInfo.total = 100;
1759 }
1760 retcode = copyout(&progressInfo, *progressInfoPtr,
1761 sizeof(RF_ProgressInfo_t));
1762 return (retcode);
1763
1764 /* the sparetable daemon calls this to wait for the kernel to
1765 * need a spare table. this ioctl does not return until a
1766 * spare table is needed. XXX -- calling mpsleep here in the
1767 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1768 * -- I should either compute the spare table in the kernel,
1769 * or have a different -- XXX XXX -- interface (a different
1770 * character device) for delivering the table -- XXX */
1771 #if 0
1772 case RAIDFRAME_SPARET_WAIT:
1773 rf_lock_mutex2(rf_sparet_wait_mutex);
1774 while (!rf_sparet_wait_queue)
1775 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1776 waitreq = rf_sparet_wait_queue;
1777 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1778 rf_unlock_mutex2(rf_sparet_wait_mutex);
1779
1780 /* structure assignment */
1781 *((RF_SparetWait_t *) data) = *waitreq;
1782
1783 RF_Free(waitreq, sizeof(*waitreq));
1784 return (0);
1785
1786 /* wakes up a process waiting on SPARET_WAIT and puts an error
1787 * code in it that will cause the dameon to exit */
1788 case RAIDFRAME_ABORT_SPARET_WAIT:
1789 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1790 waitreq->fcol = -1;
1791 rf_lock_mutex2(rf_sparet_wait_mutex);
1792 waitreq->next = rf_sparet_wait_queue;
1793 rf_sparet_wait_queue = waitreq;
1794 rf_broadcast_conf2(rf_sparet_wait_cv);
1795 rf_unlock_mutex2(rf_sparet_wait_mutex);
1796 return (0);
1797
1798 /* used by the spare table daemon to deliver a spare table
1799 * into the kernel */
1800 case RAIDFRAME_SEND_SPARET:
1801
1802 /* install the spare table */
1803 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1804
1805 /* respond to the requestor. the return status of the spare
1806 * table installation is passed in the "fcol" field */
1807 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1808 waitreq->fcol = retcode;
1809 rf_lock_mutex2(rf_sparet_wait_mutex);
1810 waitreq->next = rf_sparet_resp_queue;
1811 rf_sparet_resp_queue = waitreq;
1812 rf_broadcast_cond2(rf_sparet_resp_cv);
1813 rf_unlock_mutex2(rf_sparet_wait_mutex);
1814
1815 return (retcode);
1816 #endif
1817
1818 default:
1819 break; /* fall through to the os-specific code below */
1820
1821 }
1822
1823 if (!raidPtr->valid)
1824 return (EINVAL);
1825
1826 /*
1827 * Add support for "regular" device ioctls here.
1828 */
1829
1830 error = disk_ioctl(&rs->sc_dkdev, cmd, data, flag, l);
1831 if (error != EPASSTHROUGH)
1832 return (error);
1833
1834 switch (cmd) {
1835 case DIOCGDINFO:
1836 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1837 break;
1838 #ifdef __HAVE_OLD_DISKLABEL
1839 case ODIOCGDINFO:
1840 newlabel = *(rs->sc_dkdev.dk_label);
1841 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1842 return ENOTTY;
1843 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1844 break;
1845 #endif
1846
1847 case DIOCGPART:
1848 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1849 ((struct partinfo *) data)->part =
1850 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1851 break;
1852
1853 case DIOCWDINFO:
1854 case DIOCSDINFO:
1855 #ifdef __HAVE_OLD_DISKLABEL
1856 case ODIOCWDINFO:
1857 case ODIOCSDINFO:
1858 #endif
1859 {
1860 struct disklabel *lp;
1861 #ifdef __HAVE_OLD_DISKLABEL
1862 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1863 memset(&newlabel, 0, sizeof newlabel);
1864 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1865 lp = &newlabel;
1866 } else
1867 #endif
1868 lp = (struct disklabel *)data;
1869
1870 if ((error = raidlock(rs)) != 0)
1871 return (error);
1872
1873 rs->sc_flags |= RAIDF_LABELLING;
1874
1875 error = setdisklabel(rs->sc_dkdev.dk_label,
1876 lp, 0, rs->sc_dkdev.dk_cpulabel);
1877 if (error == 0) {
1878 if (cmd == DIOCWDINFO
1879 #ifdef __HAVE_OLD_DISKLABEL
1880 || cmd == ODIOCWDINFO
1881 #endif
1882 )
1883 error = writedisklabel(RAIDLABELDEV(dev),
1884 raidstrategy, rs->sc_dkdev.dk_label,
1885 rs->sc_dkdev.dk_cpulabel);
1886 }
1887 rs->sc_flags &= ~RAIDF_LABELLING;
1888
1889 raidunlock(rs);
1890
1891 if (error)
1892 return (error);
1893 break;
1894 }
1895
1896 case DIOCWLABEL:
1897 if (*(int *) data != 0)
1898 rs->sc_flags |= RAIDF_WLABEL;
1899 else
1900 rs->sc_flags &= ~RAIDF_WLABEL;
1901 break;
1902
1903 case DIOCGDEFLABEL:
1904 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1905 break;
1906
1907 #ifdef __HAVE_OLD_DISKLABEL
1908 case ODIOCGDEFLABEL:
1909 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1910 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1911 return ENOTTY;
1912 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1913 break;
1914 #endif
1915
1916 case DIOCAWEDGE:
1917 case DIOCDWEDGE:
1918 dkw = (void *)data;
1919
1920 /* If the ioctl happens here, the parent is us. */
1921 (void)strcpy(dkw->dkw_parent, rs->sc_xname);
1922 return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw);
1923
1924 case DIOCLWEDGES:
1925 return dkwedge_list(&rs->sc_dkdev,
1926 (struct dkwedge_list *)data, l);
1927 case DIOCMWEDGES:
1928 dkwedge_discover(&rs->sc_dkdev);
1929 return 0;
1930 case DIOCCACHESYNC:
1931 return rf_sync_component_caches(raidPtr);
1932
1933 case DIOCGSTRATEGY:
1934 {
1935 struct disk_strategy *dks = (void *)data;
1936
1937 s = splbio();
1938 strlcpy(dks->dks_name, bufq_getstrategyname(rs->buf_queue),
1939 sizeof(dks->dks_name));
1940 splx(s);
1941 dks->dks_paramlen = 0;
1942
1943 return 0;
1944 }
1945
1946 case DIOCSSTRATEGY:
1947 {
1948 struct disk_strategy *dks = (void *)data;
1949 struct bufq_state *new;
1950 struct bufq_state *old;
1951
1952 if (dks->dks_param != NULL) {
1953 return EINVAL;
1954 }
1955 dks->dks_name[sizeof(dks->dks_name) - 1] = 0; /* ensure term */
1956 error = bufq_alloc(&new, dks->dks_name,
1957 BUFQ_EXACT|BUFQ_SORT_RAWBLOCK);
1958 if (error) {
1959 return error;
1960 }
1961 s = splbio();
1962 old = rs->buf_queue;
1963 bufq_move(new, old);
1964 rs->buf_queue = new;
1965 splx(s);
1966 bufq_free(old);
1967
1968 return 0;
1969 }
1970
1971 default:
1972 retcode = ENOTTY;
1973 }
1974 return (retcode);
1975
1976 }
1977
1978
1979 /* raidinit -- complete the rest of the initialization for the
1980 RAIDframe device. */
1981
1982
1983 static void
1984 raidinit(struct raid_softc *rs)
1985 {
1986 cfdata_t cf;
1987 int unit;
1988 RF_Raid_t *raidPtr = &rs->sc_r;
1989
1990 unit = raidPtr->raidid;
1991
1992
1993 /* XXX should check return code first... */
1994 rs->sc_flags |= RAIDF_INITED;
1995
1996 /* XXX doesn't check bounds. */
1997 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1998
1999 /* attach the pseudo device */
2000 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
2001 cf->cf_name = raid_cd.cd_name;
2002 cf->cf_atname = raid_cd.cd_name;
2003 cf->cf_unit = unit;
2004 cf->cf_fstate = FSTATE_STAR;
2005
2006 rs->sc_dev = config_attach_pseudo(cf);
2007
2008 if (rs->sc_dev == NULL) {
2009 printf("raid%d: config_attach_pseudo failed\n",
2010 raidPtr->raidid);
2011 rs->sc_flags &= ~RAIDF_INITED;
2012 free(cf, M_RAIDFRAME);
2013 return;
2014 }
2015
2016 /* disk_attach actually creates space for the CPU disklabel, among
2017 * other things, so it's critical to call this *BEFORE* we try putzing
2018 * with disklabels. */
2019
2020 disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver);
2021 disk_attach(&rs->sc_dkdev);
2022
2023 /* XXX There may be a weird interaction here between this, and
2024 * protectedSectors, as used in RAIDframe. */
2025
2026 rs->sc_size = raidPtr->totalSectors;
2027
2028 rf_set_geometry(rs, raidPtr);
2029
2030 dkwedge_discover(&rs->sc_dkdev);
2031
2032 }
2033 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
2034 /* wake up the daemon & tell it to get us a spare table
2035 * XXX
2036 * the entries in the queues should be tagged with the raidPtr
2037 * so that in the extremely rare case that two recons happen at once,
2038 * we know for which device were requesting a spare table
2039 * XXX
2040 *
2041 * XXX This code is not currently used. GO
2042 */
2043 int
2044 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
2045 {
2046 int retcode;
2047
2048 rf_lock_mutex2(rf_sparet_wait_mutex);
2049 req->next = rf_sparet_wait_queue;
2050 rf_sparet_wait_queue = req;
2051 rf_broadcast_cond2(rf_sparet_wait_cv);
2052
2053 /* mpsleep unlocks the mutex */
2054 while (!rf_sparet_resp_queue) {
2055 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
2056 }
2057 req = rf_sparet_resp_queue;
2058 rf_sparet_resp_queue = req->next;
2059 rf_unlock_mutex2(rf_sparet_wait_mutex);
2060
2061 retcode = req->fcol;
2062 RF_Free(req, sizeof(*req)); /* this is not the same req as we
2063 * alloc'd */
2064 return (retcode);
2065 }
2066 #endif
2067
2068 /* a wrapper around rf_DoAccess that extracts appropriate info from the
2069 * bp & passes it down.
2070 * any calls originating in the kernel must use non-blocking I/O
2071 * do some extra sanity checking to return "appropriate" error values for
2072 * certain conditions (to make some standard utilities work)
2073 *
2074 * Formerly known as: rf_DoAccessKernel
2075 */
2076 void
2077 raidstart(RF_Raid_t *raidPtr)
2078 {
2079 RF_SectorCount_t num_blocks, pb, sum;
2080 RF_RaidAddr_t raid_addr;
2081 struct partition *pp;
2082 daddr_t blocknum;
2083 struct raid_softc *rs;
2084 int do_async;
2085 struct buf *bp;
2086 int rc;
2087
2088 rs = raidPtr->softc;
2089 /* quick check to see if anything has died recently */
2090 rf_lock_mutex2(raidPtr->mutex);
2091 if (raidPtr->numNewFailures > 0) {
2092 rf_unlock_mutex2(raidPtr->mutex);
2093 rf_update_component_labels(raidPtr,
2094 RF_NORMAL_COMPONENT_UPDATE);
2095 rf_lock_mutex2(raidPtr->mutex);
2096 raidPtr->numNewFailures--;
2097 }
2098
2099 /* Check to see if we're at the limit... */
2100 while (raidPtr->openings > 0) {
2101 rf_unlock_mutex2(raidPtr->mutex);
2102
2103 /* get the next item, if any, from the queue */
2104 if ((bp = bufq_get(rs->buf_queue)) == NULL) {
2105 /* nothing more to do */
2106 return;
2107 }
2108
2109 /* Ok, for the bp we have here, bp->b_blkno is relative to the
2110 * partition.. Need to make it absolute to the underlying
2111 * device.. */
2112
2113 blocknum = bp->b_blkno << DEV_BSHIFT >> raidPtr->logBytesPerSector;
2114 if (DISKPART(bp->b_dev) != RAW_PART) {
2115 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
2116 blocknum += pp->p_offset;
2117 }
2118
2119 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
2120 (int) blocknum));
2121
2122 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
2123 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
2124
2125 /* *THIS* is where we adjust what block we're going to...
2126 * but DO NOT TOUCH bp->b_blkno!!! */
2127 raid_addr = blocknum;
2128
2129 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
2130 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
2131 sum = raid_addr + num_blocks + pb;
2132 if (1 || rf_debugKernelAccess) {
2133 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
2134 (int) raid_addr, (int) sum, (int) num_blocks,
2135 (int) pb, (int) bp->b_resid));
2136 }
2137 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
2138 || (sum < num_blocks) || (sum < pb)) {
2139 bp->b_error = ENOSPC;
2140 bp->b_resid = bp->b_bcount;
2141 biodone(bp);
2142 rf_lock_mutex2(raidPtr->mutex);
2143 continue;
2144 }
2145 /*
2146 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
2147 */
2148
2149 if (bp->b_bcount & raidPtr->sectorMask) {
2150 bp->b_error = EINVAL;
2151 bp->b_resid = bp->b_bcount;
2152 biodone(bp);
2153 rf_lock_mutex2(raidPtr->mutex);
2154 continue;
2155
2156 }
2157 db1_printf(("Calling DoAccess..\n"));
2158
2159
2160 rf_lock_mutex2(raidPtr->mutex);
2161 raidPtr->openings--;
2162 rf_unlock_mutex2(raidPtr->mutex);
2163
2164 /*
2165 * Everything is async.
2166 */
2167 do_async = 1;
2168
2169 disk_busy(&rs->sc_dkdev);
2170
2171 /* XXX we're still at splbio() here... do we *really*
2172 need to be? */
2173
2174 /* don't ever condition on bp->b_flags & B_WRITE.
2175 * always condition on B_READ instead */
2176
2177 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
2178 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
2179 do_async, raid_addr, num_blocks,
2180 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
2181
2182 if (rc) {
2183 bp->b_error = rc;
2184 bp->b_resid = bp->b_bcount;
2185 biodone(bp);
2186 /* continue loop */
2187 }
2188
2189 rf_lock_mutex2(raidPtr->mutex);
2190 }
2191 rf_unlock_mutex2(raidPtr->mutex);
2192 }
2193
2194
2195
2196
2197 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
2198
2199 int
2200 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
2201 {
2202 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2203 struct buf *bp;
2204
2205 req->queue = queue;
2206 bp = req->bp;
2207
2208 switch (req->type) {
2209 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2210 /* XXX need to do something extra here.. */
2211 /* I'm leaving this in, as I've never actually seen it used,
2212 * and I'd like folks to report it... GO */
2213 printf(("WAKEUP CALLED\n"));
2214 queue->numOutstanding++;
2215
2216 bp->b_flags = 0;
2217 bp->b_private = req;
2218
2219 KernelWakeupFunc(bp);
2220 break;
2221
2222 case RF_IO_TYPE_READ:
2223 case RF_IO_TYPE_WRITE:
2224 #if RF_ACC_TRACE > 0
2225 if (req->tracerec) {
2226 RF_ETIMER_START(req->tracerec->timer);
2227 }
2228 #endif
2229 InitBP(bp, queue->rf_cinfo->ci_vp,
2230 op, queue->rf_cinfo->ci_dev,
2231 req->sectorOffset, req->numSector,
2232 req->buf, KernelWakeupFunc, (void *) req,
2233 queue->raidPtr->logBytesPerSector, req->b_proc);
2234
2235 if (rf_debugKernelAccess) {
2236 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2237 (long) bp->b_blkno));
2238 }
2239 queue->numOutstanding++;
2240 queue->last_deq_sector = req->sectorOffset;
2241 /* acc wouldn't have been let in if there were any pending
2242 * reqs at any other priority */
2243 queue->curPriority = req->priority;
2244
2245 db1_printf(("Going for %c to unit %d col %d\n",
2246 req->type, queue->raidPtr->raidid,
2247 queue->col));
2248 db1_printf(("sector %d count %d (%d bytes) %d\n",
2249 (int) req->sectorOffset, (int) req->numSector,
2250 (int) (req->numSector <<
2251 queue->raidPtr->logBytesPerSector),
2252 (int) queue->raidPtr->logBytesPerSector));
2253
2254 /*
2255 * XXX: drop lock here since this can block at
2256 * least with backing SCSI devices. Retake it
2257 * to minimize fuss with calling interfaces.
2258 */
2259
2260 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2261 bdev_strategy(bp);
2262 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2263 break;
2264
2265 default:
2266 panic("bad req->type in rf_DispatchKernelIO");
2267 }
2268 db1_printf(("Exiting from DispatchKernelIO\n"));
2269
2270 return (0);
2271 }
2272 /* this is the callback function associated with a I/O invoked from
2273 kernel code.
2274 */
2275 static void
2276 KernelWakeupFunc(struct buf *bp)
2277 {
2278 RF_DiskQueueData_t *req = NULL;
2279 RF_DiskQueue_t *queue;
2280
2281 db1_printf(("recovering the request queue:\n"));
2282
2283 req = bp->b_private;
2284
2285 queue = (RF_DiskQueue_t *) req->queue;
2286
2287 rf_lock_mutex2(queue->raidPtr->iodone_lock);
2288
2289 #if RF_ACC_TRACE > 0
2290 if (req->tracerec) {
2291 RF_ETIMER_STOP(req->tracerec->timer);
2292 RF_ETIMER_EVAL(req->tracerec->timer);
2293 rf_lock_mutex2(rf_tracing_mutex);
2294 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2295 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2296 req->tracerec->num_phys_ios++;
2297 rf_unlock_mutex2(rf_tracing_mutex);
2298 }
2299 #endif
2300
2301 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2302 * ballistic, and mark the component as hosed... */
2303
2304 if (bp->b_error != 0) {
2305 /* Mark the disk as dead */
2306 /* but only mark it once... */
2307 /* and only if it wouldn't leave this RAID set
2308 completely broken */
2309 if (((queue->raidPtr->Disks[queue->col].status ==
2310 rf_ds_optimal) ||
2311 (queue->raidPtr->Disks[queue->col].status ==
2312 rf_ds_used_spare)) &&
2313 (queue->raidPtr->numFailures <
2314 queue->raidPtr->Layout.map->faultsTolerated)) {
2315 printf("raid%d: IO Error. Marking %s as failed.\n",
2316 queue->raidPtr->raidid,
2317 queue->raidPtr->Disks[queue->col].devname);
2318 queue->raidPtr->Disks[queue->col].status =
2319 rf_ds_failed;
2320 queue->raidPtr->status = rf_rs_degraded;
2321 queue->raidPtr->numFailures++;
2322 queue->raidPtr->numNewFailures++;
2323 } else { /* Disk is already dead... */
2324 /* printf("Disk already marked as dead!\n"); */
2325 }
2326
2327 }
2328
2329 /* Fill in the error value */
2330 req->error = bp->b_error;
2331
2332 /* Drop this one on the "finished" queue... */
2333 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2334
2335 /* Let the raidio thread know there is work to be done. */
2336 rf_signal_cond2(queue->raidPtr->iodone_cv);
2337
2338 rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2339 }
2340
2341
2342 /*
2343 * initialize a buf structure for doing an I/O in the kernel.
2344 */
2345 static void
2346 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2347 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2348 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2349 struct proc *b_proc)
2350 {
2351 /* bp->b_flags = B_PHYS | rw_flag; */
2352 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */
2353 bp->b_oflags = 0;
2354 bp->b_cflags = 0;
2355 bp->b_bcount = numSect << logBytesPerSector;
2356 bp->b_bufsize = bp->b_bcount;
2357 bp->b_error = 0;
2358 bp->b_dev = dev;
2359 bp->b_data = bf;
2360 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2361 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2362 if (bp->b_bcount == 0) {
2363 panic("bp->b_bcount is zero in InitBP!!");
2364 }
2365 bp->b_proc = b_proc;
2366 bp->b_iodone = cbFunc;
2367 bp->b_private = cbArg;
2368 }
2369
2370 static void
2371 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2372 struct disklabel *lp)
2373 {
2374 memset(lp, 0, sizeof(*lp));
2375
2376 /* fabricate a label... */
2377 if (raidPtr->totalSectors > UINT32_MAX)
2378 lp->d_secperunit = UINT32_MAX;
2379 else
2380 lp->d_secperunit = raidPtr->totalSectors;
2381 lp->d_secsize = raidPtr->bytesPerSector;
2382 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2383 lp->d_ntracks = 4 * raidPtr->numCol;
2384 lp->d_ncylinders = raidPtr->totalSectors /
2385 (lp->d_nsectors * lp->d_ntracks);
2386 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2387
2388 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2389 lp->d_type = DTYPE_RAID;
2390 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2391 lp->d_rpm = 3600;
2392 lp->d_interleave = 1;
2393 lp->d_flags = 0;
2394
2395 lp->d_partitions[RAW_PART].p_offset = 0;
2396 lp->d_partitions[RAW_PART].p_size = lp->d_secperunit;
2397 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2398 lp->d_npartitions = RAW_PART + 1;
2399
2400 lp->d_magic = DISKMAGIC;
2401 lp->d_magic2 = DISKMAGIC;
2402 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2403
2404 }
2405 /*
2406 * Read the disklabel from the raid device. If one is not present, fake one
2407 * up.
2408 */
2409 static void
2410 raidgetdisklabel(dev_t dev)
2411 {
2412 int unit = raidunit(dev);
2413 struct raid_softc *rs;
2414 const char *errstring;
2415 struct disklabel *lp;
2416 struct cpu_disklabel *clp;
2417 RF_Raid_t *raidPtr;
2418
2419 if ((rs = raidget(unit)) == NULL)
2420 return;
2421
2422 lp = rs->sc_dkdev.dk_label;
2423 clp = rs->sc_dkdev.dk_cpulabel;
2424
2425 db1_printf(("Getting the disklabel...\n"));
2426
2427 memset(clp, 0, sizeof(*clp));
2428
2429 raidPtr = &rs->sc_r;
2430
2431 raidgetdefaultlabel(raidPtr, rs, lp);
2432
2433 /*
2434 * Call the generic disklabel extraction routine.
2435 */
2436 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2437 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2438 if (errstring)
2439 raidmakedisklabel(rs);
2440 else {
2441 int i;
2442 struct partition *pp;
2443
2444 /*
2445 * Sanity check whether the found disklabel is valid.
2446 *
2447 * This is necessary since total size of the raid device
2448 * may vary when an interleave is changed even though exactly
2449 * same components are used, and old disklabel may used
2450 * if that is found.
2451 */
2452 if (lp->d_secperunit < UINT32_MAX ?
2453 lp->d_secperunit != rs->sc_size :
2454 lp->d_secperunit > rs->sc_size)
2455 printf("raid%d: WARNING: %s: "
2456 "total sector size in disklabel (%ju) != "
2457 "the size of raid (%ju)\n", unit, rs->sc_xname,
2458 (uintmax_t)lp->d_secperunit,
2459 (uintmax_t)rs->sc_size);
2460 for (i = 0; i < lp->d_npartitions; i++) {
2461 pp = &lp->d_partitions[i];
2462 if (pp->p_offset + pp->p_size > rs->sc_size)
2463 printf("raid%d: WARNING: %s: end of partition `%c' "
2464 "exceeds the size of raid (%ju)\n",
2465 unit, rs->sc_xname, 'a' + i,
2466 (uintmax_t)rs->sc_size);
2467 }
2468 }
2469
2470 }
2471 /*
2472 * Take care of things one might want to take care of in the event
2473 * that a disklabel isn't present.
2474 */
2475 static void
2476 raidmakedisklabel(struct raid_softc *rs)
2477 {
2478 struct disklabel *lp = rs->sc_dkdev.dk_label;
2479 db1_printf(("Making a label..\n"));
2480
2481 /*
2482 * For historical reasons, if there's no disklabel present
2483 * the raw partition must be marked FS_BSDFFS.
2484 */
2485
2486 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2487
2488 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2489
2490 lp->d_checksum = dkcksum(lp);
2491 }
2492 /*
2493 * Wait interruptibly for an exclusive lock.
2494 *
2495 * XXX
2496 * Several drivers do this; it should be abstracted and made MP-safe.
2497 * (Hmm... where have we seen this warning before :-> GO )
2498 */
2499 static int
2500 raidlock(struct raid_softc *rs)
2501 {
2502 int error;
2503
2504 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2505 rs->sc_flags |= RAIDF_WANTED;
2506 if ((error =
2507 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2508 return (error);
2509 }
2510 rs->sc_flags |= RAIDF_LOCKED;
2511 return (0);
2512 }
2513 /*
2514 * Unlock and wake up any waiters.
2515 */
2516 static void
2517 raidunlock(struct raid_softc *rs)
2518 {
2519
2520 rs->sc_flags &= ~RAIDF_LOCKED;
2521 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2522 rs->sc_flags &= ~RAIDF_WANTED;
2523 wakeup(rs);
2524 }
2525 }
2526
2527
2528 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2529 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2530 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2531
2532 static daddr_t
2533 rf_component_info_offset(void)
2534 {
2535
2536 return RF_COMPONENT_INFO_OFFSET;
2537 }
2538
2539 static daddr_t
2540 rf_component_info_size(unsigned secsize)
2541 {
2542 daddr_t info_size;
2543
2544 KASSERT(secsize);
2545 if (secsize > RF_COMPONENT_INFO_SIZE)
2546 info_size = secsize;
2547 else
2548 info_size = RF_COMPONENT_INFO_SIZE;
2549
2550 return info_size;
2551 }
2552
2553 static daddr_t
2554 rf_parity_map_offset(RF_Raid_t *raidPtr)
2555 {
2556 daddr_t map_offset;
2557
2558 KASSERT(raidPtr->bytesPerSector);
2559 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2560 map_offset = raidPtr->bytesPerSector;
2561 else
2562 map_offset = RF_COMPONENT_INFO_SIZE;
2563 map_offset += rf_component_info_offset();
2564
2565 return map_offset;
2566 }
2567
2568 static daddr_t
2569 rf_parity_map_size(RF_Raid_t *raidPtr)
2570 {
2571 daddr_t map_size;
2572
2573 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2574 map_size = raidPtr->bytesPerSector;
2575 else
2576 map_size = RF_PARITY_MAP_SIZE;
2577
2578 return map_size;
2579 }
2580
2581 int
2582 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2583 {
2584 RF_ComponentLabel_t *clabel;
2585
2586 clabel = raidget_component_label(raidPtr, col);
2587 clabel->clean = RF_RAID_CLEAN;
2588 raidflush_component_label(raidPtr, col);
2589 return(0);
2590 }
2591
2592
2593 int
2594 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2595 {
2596 RF_ComponentLabel_t *clabel;
2597
2598 clabel = raidget_component_label(raidPtr, col);
2599 clabel->clean = RF_RAID_DIRTY;
2600 raidflush_component_label(raidPtr, col);
2601 return(0);
2602 }
2603
2604 int
2605 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2606 {
2607 KASSERT(raidPtr->bytesPerSector);
2608 return raidread_component_label(raidPtr->bytesPerSector,
2609 raidPtr->Disks[col].dev,
2610 raidPtr->raid_cinfo[col].ci_vp,
2611 &raidPtr->raid_cinfo[col].ci_label);
2612 }
2613
2614 RF_ComponentLabel_t *
2615 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2616 {
2617 return &raidPtr->raid_cinfo[col].ci_label;
2618 }
2619
2620 int
2621 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2622 {
2623 RF_ComponentLabel_t *label;
2624
2625 label = &raidPtr->raid_cinfo[col].ci_label;
2626 label->mod_counter = raidPtr->mod_counter;
2627 #ifndef RF_NO_PARITY_MAP
2628 label->parity_map_modcount = label->mod_counter;
2629 #endif
2630 return raidwrite_component_label(raidPtr->bytesPerSector,
2631 raidPtr->Disks[col].dev,
2632 raidPtr->raid_cinfo[col].ci_vp, label);
2633 }
2634
2635
2636 static int
2637 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2638 RF_ComponentLabel_t *clabel)
2639 {
2640 return raidread_component_area(dev, b_vp, clabel,
2641 sizeof(RF_ComponentLabel_t),
2642 rf_component_info_offset(),
2643 rf_component_info_size(secsize));
2644 }
2645
2646 /* ARGSUSED */
2647 static int
2648 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2649 size_t msize, daddr_t offset, daddr_t dsize)
2650 {
2651 struct buf *bp;
2652 const struct bdevsw *bdev;
2653 int error;
2654
2655 /* XXX should probably ensure that we don't try to do this if
2656 someone has changed rf_protected_sectors. */
2657
2658 if (b_vp == NULL) {
2659 /* For whatever reason, this component is not valid.
2660 Don't try to read a component label from it. */
2661 return(EINVAL);
2662 }
2663
2664 /* get a block of the appropriate size... */
2665 bp = geteblk((int)dsize);
2666 bp->b_dev = dev;
2667
2668 /* get our ducks in a row for the read */
2669 bp->b_blkno = offset / DEV_BSIZE;
2670 bp->b_bcount = dsize;
2671 bp->b_flags |= B_READ;
2672 bp->b_resid = dsize;
2673
2674 bdev = bdevsw_lookup(bp->b_dev);
2675 if (bdev == NULL)
2676 return (ENXIO);
2677 (*bdev->d_strategy)(bp);
2678
2679 error = biowait(bp);
2680
2681 if (!error) {
2682 memcpy(data, bp->b_data, msize);
2683 }
2684
2685 brelse(bp, 0);
2686 return(error);
2687 }
2688
2689
2690 static int
2691 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2692 RF_ComponentLabel_t *clabel)
2693 {
2694 return raidwrite_component_area(dev, b_vp, clabel,
2695 sizeof(RF_ComponentLabel_t),
2696 rf_component_info_offset(),
2697 rf_component_info_size(secsize), 0);
2698 }
2699
2700 /* ARGSUSED */
2701 static int
2702 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2703 size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2704 {
2705 struct buf *bp;
2706 const struct bdevsw *bdev;
2707 int error;
2708
2709 /* get a block of the appropriate size... */
2710 bp = geteblk((int)dsize);
2711 bp->b_dev = dev;
2712
2713 /* get our ducks in a row for the write */
2714 bp->b_blkno = offset / DEV_BSIZE;
2715 bp->b_bcount = dsize;
2716 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2717 bp->b_resid = dsize;
2718
2719 memset(bp->b_data, 0, dsize);
2720 memcpy(bp->b_data, data, msize);
2721
2722 bdev = bdevsw_lookup(bp->b_dev);
2723 if (bdev == NULL)
2724 return (ENXIO);
2725 (*bdev->d_strategy)(bp);
2726 if (asyncp)
2727 return 0;
2728 error = biowait(bp);
2729 brelse(bp, 0);
2730 if (error) {
2731 #if 1
2732 printf("Failed to write RAID component info!\n");
2733 #endif
2734 }
2735
2736 return(error);
2737 }
2738
2739 void
2740 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2741 {
2742 int c;
2743
2744 for (c = 0; c < raidPtr->numCol; c++) {
2745 /* Skip dead disks. */
2746 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2747 continue;
2748 /* XXXjld: what if an error occurs here? */
2749 raidwrite_component_area(raidPtr->Disks[c].dev,
2750 raidPtr->raid_cinfo[c].ci_vp, map,
2751 RF_PARITYMAP_NBYTE,
2752 rf_parity_map_offset(raidPtr),
2753 rf_parity_map_size(raidPtr), 0);
2754 }
2755 }
2756
2757 void
2758 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2759 {
2760 struct rf_paritymap_ondisk tmp;
2761 int c,first;
2762
2763 first=1;
2764 for (c = 0; c < raidPtr->numCol; c++) {
2765 /* Skip dead disks. */
2766 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2767 continue;
2768 raidread_component_area(raidPtr->Disks[c].dev,
2769 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2770 RF_PARITYMAP_NBYTE,
2771 rf_parity_map_offset(raidPtr),
2772 rf_parity_map_size(raidPtr));
2773 if (first) {
2774 memcpy(map, &tmp, sizeof(*map));
2775 first = 0;
2776 } else {
2777 rf_paritymap_merge(map, &tmp);
2778 }
2779 }
2780 }
2781
2782 void
2783 rf_markalldirty(RF_Raid_t *raidPtr)
2784 {
2785 RF_ComponentLabel_t *clabel;
2786 int sparecol;
2787 int c;
2788 int j;
2789 int scol = -1;
2790
2791 raidPtr->mod_counter++;
2792 for (c = 0; c < raidPtr->numCol; c++) {
2793 /* we don't want to touch (at all) a disk that has
2794 failed */
2795 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2796 clabel = raidget_component_label(raidPtr, c);
2797 if (clabel->status == rf_ds_spared) {
2798 /* XXX do something special...
2799 but whatever you do, don't
2800 try to access it!! */
2801 } else {
2802 raidmarkdirty(raidPtr, c);
2803 }
2804 }
2805 }
2806
2807 for( c = 0; c < raidPtr->numSpare ; c++) {
2808 sparecol = raidPtr->numCol + c;
2809 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2810 /*
2811
2812 we claim this disk is "optimal" if it's
2813 rf_ds_used_spare, as that means it should be
2814 directly substitutable for the disk it replaced.
2815 We note that too...
2816
2817 */
2818
2819 for(j=0;j<raidPtr->numCol;j++) {
2820 if (raidPtr->Disks[j].spareCol == sparecol) {
2821 scol = j;
2822 break;
2823 }
2824 }
2825
2826 clabel = raidget_component_label(raidPtr, sparecol);
2827 /* make sure status is noted */
2828
2829 raid_init_component_label(raidPtr, clabel);
2830
2831 clabel->row = 0;
2832 clabel->column = scol;
2833 /* Note: we *don't* change status from rf_ds_used_spare
2834 to rf_ds_optimal */
2835 /* clabel.status = rf_ds_optimal; */
2836
2837 raidmarkdirty(raidPtr, sparecol);
2838 }
2839 }
2840 }
2841
2842
2843 void
2844 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2845 {
2846 RF_ComponentLabel_t *clabel;
2847 int sparecol;
2848 int c;
2849 int j;
2850 int scol;
2851
2852 scol = -1;
2853
2854 /* XXX should do extra checks to make sure things really are clean,
2855 rather than blindly setting the clean bit... */
2856
2857 raidPtr->mod_counter++;
2858
2859 for (c = 0; c < raidPtr->numCol; c++) {
2860 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2861 clabel = raidget_component_label(raidPtr, c);
2862 /* make sure status is noted */
2863 clabel->status = rf_ds_optimal;
2864
2865 /* note what unit we are configured as */
2866 clabel->last_unit = raidPtr->raidid;
2867
2868 raidflush_component_label(raidPtr, c);
2869 if (final == RF_FINAL_COMPONENT_UPDATE) {
2870 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2871 raidmarkclean(raidPtr, c);
2872 }
2873 }
2874 }
2875 /* else we don't touch it.. */
2876 }
2877
2878 for( c = 0; c < raidPtr->numSpare ; c++) {
2879 sparecol = raidPtr->numCol + c;
2880 /* Need to ensure that the reconstruct actually completed! */
2881 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2882 /*
2883
2884 we claim this disk is "optimal" if it's
2885 rf_ds_used_spare, as that means it should be
2886 directly substitutable for the disk it replaced.
2887 We note that too...
2888
2889 */
2890
2891 for(j=0;j<raidPtr->numCol;j++) {
2892 if (raidPtr->Disks[j].spareCol == sparecol) {
2893 scol = j;
2894 break;
2895 }
2896 }
2897
2898 /* XXX shouldn't *really* need this... */
2899 clabel = raidget_component_label(raidPtr, sparecol);
2900 /* make sure status is noted */
2901
2902 raid_init_component_label(raidPtr, clabel);
2903
2904 clabel->column = scol;
2905 clabel->status = rf_ds_optimal;
2906 clabel->last_unit = raidPtr->raidid;
2907
2908 raidflush_component_label(raidPtr, sparecol);
2909 if (final == RF_FINAL_COMPONENT_UPDATE) {
2910 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2911 raidmarkclean(raidPtr, sparecol);
2912 }
2913 }
2914 }
2915 }
2916 }
2917
2918 void
2919 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2920 {
2921
2922 if (vp != NULL) {
2923 if (auto_configured == 1) {
2924 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2925 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2926 vput(vp);
2927
2928 } else {
2929 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2930 }
2931 }
2932 }
2933
2934
2935 void
2936 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2937 {
2938 int r,c;
2939 struct vnode *vp;
2940 int acd;
2941
2942
2943 /* We take this opportunity to close the vnodes like we should.. */
2944
2945 for (c = 0; c < raidPtr->numCol; c++) {
2946 vp = raidPtr->raid_cinfo[c].ci_vp;
2947 acd = raidPtr->Disks[c].auto_configured;
2948 rf_close_component(raidPtr, vp, acd);
2949 raidPtr->raid_cinfo[c].ci_vp = NULL;
2950 raidPtr->Disks[c].auto_configured = 0;
2951 }
2952
2953 for (r = 0; r < raidPtr->numSpare; r++) {
2954 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2955 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2956 rf_close_component(raidPtr, vp, acd);
2957 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2958 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2959 }
2960 }
2961
2962
2963 void
2964 rf_ReconThread(struct rf_recon_req *req)
2965 {
2966 int s;
2967 RF_Raid_t *raidPtr;
2968
2969 s = splbio();
2970 raidPtr = (RF_Raid_t *) req->raidPtr;
2971 raidPtr->recon_in_progress = 1;
2972
2973 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2974 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2975
2976 RF_Free(req, sizeof(*req));
2977
2978 raidPtr->recon_in_progress = 0;
2979 splx(s);
2980
2981 /* That's all... */
2982 kthread_exit(0); /* does not return */
2983 }
2984
2985 void
2986 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2987 {
2988 int retcode;
2989 int s;
2990
2991 raidPtr->parity_rewrite_stripes_done = 0;
2992 raidPtr->parity_rewrite_in_progress = 1;
2993 s = splbio();
2994 retcode = rf_RewriteParity(raidPtr);
2995 splx(s);
2996 if (retcode) {
2997 printf("raid%d: Error re-writing parity (%d)!\n",
2998 raidPtr->raidid, retcode);
2999 } else {
3000 /* set the clean bit! If we shutdown correctly,
3001 the clean bit on each component label will get
3002 set */
3003 raidPtr->parity_good = RF_RAID_CLEAN;
3004 }
3005 raidPtr->parity_rewrite_in_progress = 0;
3006
3007 /* Anyone waiting for us to stop? If so, inform them... */
3008 if (raidPtr->waitShutdown) {
3009 wakeup(&raidPtr->parity_rewrite_in_progress);
3010 }
3011
3012 /* That's all... */
3013 kthread_exit(0); /* does not return */
3014 }
3015
3016
3017 void
3018 rf_CopybackThread(RF_Raid_t *raidPtr)
3019 {
3020 int s;
3021
3022 raidPtr->copyback_in_progress = 1;
3023 s = splbio();
3024 rf_CopybackReconstructedData(raidPtr);
3025 splx(s);
3026 raidPtr->copyback_in_progress = 0;
3027
3028 /* That's all... */
3029 kthread_exit(0); /* does not return */
3030 }
3031
3032
3033 void
3034 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
3035 {
3036 int s;
3037 RF_Raid_t *raidPtr;
3038
3039 s = splbio();
3040 raidPtr = req->raidPtr;
3041 raidPtr->recon_in_progress = 1;
3042 rf_ReconstructInPlace(raidPtr, req->col);
3043 RF_Free(req, sizeof(*req));
3044 raidPtr->recon_in_progress = 0;
3045 splx(s);
3046
3047 /* That's all... */
3048 kthread_exit(0); /* does not return */
3049 }
3050
3051 static RF_AutoConfig_t *
3052 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
3053 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
3054 unsigned secsize)
3055 {
3056 int good_one = 0;
3057 RF_ComponentLabel_t *clabel;
3058 RF_AutoConfig_t *ac;
3059
3060 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
3061 if (clabel == NULL) {
3062 oomem:
3063 while(ac_list) {
3064 ac = ac_list;
3065 if (ac->clabel)
3066 free(ac->clabel, M_RAIDFRAME);
3067 ac_list = ac_list->next;
3068 free(ac, M_RAIDFRAME);
3069 }
3070 printf("RAID auto config: out of memory!\n");
3071 return NULL; /* XXX probably should panic? */
3072 }
3073
3074 if (!raidread_component_label(secsize, dev, vp, clabel)) {
3075 /* Got the label. Does it look reasonable? */
3076 if (rf_reasonable_label(clabel, numsecs) &&
3077 (rf_component_label_partitionsize(clabel) <= size)) {
3078 #ifdef DEBUG
3079 printf("Component on: %s: %llu\n",
3080 cname, (unsigned long long)size);
3081 rf_print_component_label(clabel);
3082 #endif
3083 /* if it's reasonable, add it, else ignore it. */
3084 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
3085 M_NOWAIT);
3086 if (ac == NULL) {
3087 free(clabel, M_RAIDFRAME);
3088 goto oomem;
3089 }
3090 strlcpy(ac->devname, cname, sizeof(ac->devname));
3091 ac->dev = dev;
3092 ac->vp = vp;
3093 ac->clabel = clabel;
3094 ac->next = ac_list;
3095 ac_list = ac;
3096 good_one = 1;
3097 }
3098 }
3099 if (!good_one) {
3100 /* cleanup */
3101 free(clabel, M_RAIDFRAME);
3102 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3103 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3104 vput(vp);
3105 }
3106 return ac_list;
3107 }
3108
3109 RF_AutoConfig_t *
3110 rf_find_raid_components(void)
3111 {
3112 struct vnode *vp;
3113 struct disklabel label;
3114 device_t dv;
3115 deviter_t di;
3116 dev_t dev;
3117 int bmajor, bminor, wedge, rf_part_found;
3118 int error;
3119 int i;
3120 RF_AutoConfig_t *ac_list;
3121 uint64_t numsecs;
3122 unsigned secsize;
3123
3124 /* initialize the AutoConfig list */
3125 ac_list = NULL;
3126
3127 /* we begin by trolling through *all* the devices on the system */
3128
3129 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
3130 dv = deviter_next(&di)) {
3131
3132 /* we are only interested in disks... */
3133 if (device_class(dv) != DV_DISK)
3134 continue;
3135
3136 /* we don't care about floppies... */
3137 if (device_is_a(dv, "fd")) {
3138 continue;
3139 }
3140
3141 /* we don't care about CD's... */
3142 if (device_is_a(dv, "cd")) {
3143 continue;
3144 }
3145
3146 /* we don't care about md's... */
3147 if (device_is_a(dv, "md")) {
3148 continue;
3149 }
3150
3151 /* hdfd is the Atari/Hades floppy driver */
3152 if (device_is_a(dv, "hdfd")) {
3153 continue;
3154 }
3155
3156 /* fdisa is the Atari/Milan floppy driver */
3157 if (device_is_a(dv, "fdisa")) {
3158 continue;
3159 }
3160
3161 /* need to find the device_name_to_block_device_major stuff */
3162 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
3163
3164 rf_part_found = 0; /*No raid partition as yet*/
3165
3166 /* get a vnode for the raw partition of this disk */
3167
3168 wedge = device_is_a(dv, "dk");
3169 bminor = minor(device_unit(dv));
3170 dev = wedge ? makedev(bmajor, bminor) :
3171 MAKEDISKDEV(bmajor, bminor, RAW_PART);
3172 if (bdevvp(dev, &vp))
3173 panic("RAID can't alloc vnode");
3174
3175 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
3176
3177 if (error) {
3178 /* "Who cares." Continue looking
3179 for something that exists*/
3180 vput(vp);
3181 continue;
3182 }
3183
3184 error = getdisksize(vp, &numsecs, &secsize);
3185 if (error) {
3186 vput(vp);
3187 continue;
3188 }
3189 if (wedge) {
3190 struct dkwedge_info dkw;
3191 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
3192 NOCRED);
3193 if (error) {
3194 printf("RAIDframe: can't get wedge info for "
3195 "dev %s (%d)\n", device_xname(dv), error);
3196 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3197 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3198 vput(vp);
3199 continue;
3200 }
3201
3202 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
3203 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3204 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3205 vput(vp);
3206 continue;
3207 }
3208
3209 ac_list = rf_get_component(ac_list, dev, vp,
3210 device_xname(dv), dkw.dkw_size, numsecs, secsize);
3211 rf_part_found = 1; /*There is a raid component on this disk*/
3212 continue;
3213 }
3214
3215 /* Ok, the disk exists. Go get the disklabel. */
3216 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
3217 if (error) {
3218 /*
3219 * XXX can't happen - open() would
3220 * have errored out (or faked up one)
3221 */
3222 if (error != ENOTTY)
3223 printf("RAIDframe: can't get label for dev "
3224 "%s (%d)\n", device_xname(dv), error);
3225 }
3226
3227 /* don't need this any more. We'll allocate it again
3228 a little later if we really do... */
3229 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3230 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3231 vput(vp);
3232
3233 if (error)
3234 continue;
3235
3236 rf_part_found = 0; /*No raid partitions yet*/
3237 for (i = 0; i < label.d_npartitions; i++) {
3238 char cname[sizeof(ac_list->devname)];
3239
3240 /* We only support partitions marked as RAID */
3241 if (label.d_partitions[i].p_fstype != FS_RAID)
3242 continue;
3243
3244 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
3245 if (bdevvp(dev, &vp))
3246 panic("RAID can't alloc vnode");
3247
3248 error = VOP_OPEN(vp, FREAD, NOCRED);
3249 if (error) {
3250 /* Whatever... */
3251 vput(vp);
3252 continue;
3253 }
3254 snprintf(cname, sizeof(cname), "%s%c",
3255 device_xname(dv), 'a' + i);
3256 ac_list = rf_get_component(ac_list, dev, vp, cname,
3257 label.d_partitions[i].p_size, numsecs, secsize);
3258 rf_part_found = 1; /*There is at least one raid partition on this disk*/
3259 }
3260
3261 /*
3262 *If there is no raid component on this disk, either in a
3263 *disklabel or inside a wedge, check the raw partition as well,
3264 *as it is possible to configure raid components on raw disk
3265 *devices.
3266 */
3267
3268 if (!rf_part_found) {
3269 char cname[sizeof(ac_list->devname)];
3270
3271 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
3272 if (bdevvp(dev, &vp))
3273 panic("RAID can't alloc vnode");
3274
3275 error = VOP_OPEN(vp, FREAD, NOCRED);
3276 if (error) {
3277 /* Whatever... */
3278 vput(vp);
3279 continue;
3280 }
3281 snprintf(cname, sizeof(cname), "%s%c",
3282 device_xname(dv), 'a' + RAW_PART);
3283 ac_list = rf_get_component(ac_list, dev, vp, cname,
3284 label.d_partitions[RAW_PART].p_size, numsecs, secsize);
3285 }
3286 }
3287 deviter_release(&di);
3288 return ac_list;
3289 }
3290
3291
3292 int
3293 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3294 {
3295
3296 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
3297 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
3298 ((clabel->clean == RF_RAID_CLEAN) ||
3299 (clabel->clean == RF_RAID_DIRTY)) &&
3300 clabel->row >=0 &&
3301 clabel->column >= 0 &&
3302 clabel->num_rows > 0 &&
3303 clabel->num_columns > 0 &&
3304 clabel->row < clabel->num_rows &&
3305 clabel->column < clabel->num_columns &&
3306 clabel->blockSize > 0 &&
3307 /*
3308 * numBlocksHi may contain garbage, but it is ok since
3309 * the type is unsigned. If it is really garbage,
3310 * rf_fix_old_label_size() will fix it.
3311 */
3312 rf_component_label_numblocks(clabel) > 0) {
3313 /*
3314 * label looks reasonable enough...
3315 * let's make sure it has no old garbage.
3316 */
3317 if (numsecs)
3318 rf_fix_old_label_size(clabel, numsecs);
3319 return(1);
3320 }
3321 return(0);
3322 }
3323
3324
3325 /*
3326 * For reasons yet unknown, some old component labels have garbage in
3327 * the newer numBlocksHi region, and this causes lossage. Since those
3328 * disks will also have numsecs set to less than 32 bits of sectors,
3329 * we can determine when this corruption has occurred, and fix it.
3330 *
3331 * The exact same problem, with the same unknown reason, happens to
3332 * the partitionSizeHi member as well.
3333 */
3334 static void
3335 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3336 {
3337
3338 if (numsecs < ((uint64_t)1 << 32)) {
3339 if (clabel->numBlocksHi) {
3340 printf("WARNING: total sectors < 32 bits, yet "
3341 "numBlocksHi set\n"
3342 "WARNING: resetting numBlocksHi to zero.\n");
3343 clabel->numBlocksHi = 0;
3344 }
3345
3346 if (clabel->partitionSizeHi) {
3347 printf("WARNING: total sectors < 32 bits, yet "
3348 "partitionSizeHi set\n"
3349 "WARNING: resetting partitionSizeHi to zero.\n");
3350 clabel->partitionSizeHi = 0;
3351 }
3352 }
3353 }
3354
3355
3356 #ifdef DEBUG
3357 void
3358 rf_print_component_label(RF_ComponentLabel_t *clabel)
3359 {
3360 uint64_t numBlocks;
3361 static const char *rp[] = {
3362 "No", "Force", "Soft", "*invalid*"
3363 };
3364
3365
3366 numBlocks = rf_component_label_numblocks(clabel);
3367
3368 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3369 clabel->row, clabel->column,
3370 clabel->num_rows, clabel->num_columns);
3371 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3372 clabel->version, clabel->serial_number,
3373 clabel->mod_counter);
3374 printf(" Clean: %s Status: %d\n",
3375 clabel->clean ? "Yes" : "No", clabel->status);
3376 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3377 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3378 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3379 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3380 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3381 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]);
3382 printf(" Last configured as: raid%d\n", clabel->last_unit);
3383 #if 0
3384 printf(" Config order: %d\n", clabel->config_order);
3385 #endif
3386
3387 }
3388 #endif
3389
3390 RF_ConfigSet_t *
3391 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3392 {
3393 RF_AutoConfig_t *ac;
3394 RF_ConfigSet_t *config_sets;
3395 RF_ConfigSet_t *cset;
3396 RF_AutoConfig_t *ac_next;
3397
3398
3399 config_sets = NULL;
3400
3401 /* Go through the AutoConfig list, and figure out which components
3402 belong to what sets. */
3403 ac = ac_list;
3404 while(ac!=NULL) {
3405 /* we're going to putz with ac->next, so save it here
3406 for use at the end of the loop */
3407 ac_next = ac->next;
3408
3409 if (config_sets == NULL) {
3410 /* will need at least this one... */
3411 config_sets = (RF_ConfigSet_t *)
3412 malloc(sizeof(RF_ConfigSet_t),
3413 M_RAIDFRAME, M_NOWAIT);
3414 if (config_sets == NULL) {
3415 panic("rf_create_auto_sets: No memory!");
3416 }
3417 /* this one is easy :) */
3418 config_sets->ac = ac;
3419 config_sets->next = NULL;
3420 config_sets->rootable = 0;
3421 ac->next = NULL;
3422 } else {
3423 /* which set does this component fit into? */
3424 cset = config_sets;
3425 while(cset!=NULL) {
3426 if (rf_does_it_fit(cset, ac)) {
3427 /* looks like it matches... */
3428 ac->next = cset->ac;
3429 cset->ac = ac;
3430 break;
3431 }
3432 cset = cset->next;
3433 }
3434 if (cset==NULL) {
3435 /* didn't find a match above... new set..*/
3436 cset = (RF_ConfigSet_t *)
3437 malloc(sizeof(RF_ConfigSet_t),
3438 M_RAIDFRAME, M_NOWAIT);
3439 if (cset == NULL) {
3440 panic("rf_create_auto_sets: No memory!");
3441 }
3442 cset->ac = ac;
3443 ac->next = NULL;
3444 cset->next = config_sets;
3445 cset->rootable = 0;
3446 config_sets = cset;
3447 }
3448 }
3449 ac = ac_next;
3450 }
3451
3452
3453 return(config_sets);
3454 }
3455
3456 static int
3457 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3458 {
3459 RF_ComponentLabel_t *clabel1, *clabel2;
3460
3461 /* If this one matches the *first* one in the set, that's good
3462 enough, since the other members of the set would have been
3463 through here too... */
3464 /* note that we are not checking partitionSize here..
3465
3466 Note that we are also not checking the mod_counters here.
3467 If everything else matches except the mod_counter, that's
3468 good enough for this test. We will deal with the mod_counters
3469 a little later in the autoconfiguration process.
3470
3471 (clabel1->mod_counter == clabel2->mod_counter) &&
3472
3473 The reason we don't check for this is that failed disks
3474 will have lower modification counts. If those disks are
3475 not added to the set they used to belong to, then they will
3476 form their own set, which may result in 2 different sets,
3477 for example, competing to be configured at raid0, and
3478 perhaps competing to be the root filesystem set. If the
3479 wrong ones get configured, or both attempt to become /,
3480 weird behaviour and or serious lossage will occur. Thus we
3481 need to bring them into the fold here, and kick them out at
3482 a later point.
3483
3484 */
3485
3486 clabel1 = cset->ac->clabel;
3487 clabel2 = ac->clabel;
3488 if ((clabel1->version == clabel2->version) &&
3489 (clabel1->serial_number == clabel2->serial_number) &&
3490 (clabel1->num_rows == clabel2->num_rows) &&
3491 (clabel1->num_columns == clabel2->num_columns) &&
3492 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3493 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3494 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3495 (clabel1->parityConfig == clabel2->parityConfig) &&
3496 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3497 (clabel1->blockSize == clabel2->blockSize) &&
3498 rf_component_label_numblocks(clabel1) ==
3499 rf_component_label_numblocks(clabel2) &&
3500 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3501 (clabel1->root_partition == clabel2->root_partition) &&
3502 (clabel1->last_unit == clabel2->last_unit) &&
3503 (clabel1->config_order == clabel2->config_order)) {
3504 /* if it get's here, it almost *has* to be a match */
3505 } else {
3506 /* it's not consistent with somebody in the set..
3507 punt */
3508 return(0);
3509 }
3510 /* all was fine.. it must fit... */
3511 return(1);
3512 }
3513
3514 int
3515 rf_have_enough_components(RF_ConfigSet_t *cset)
3516 {
3517 RF_AutoConfig_t *ac;
3518 RF_AutoConfig_t *auto_config;
3519 RF_ComponentLabel_t *clabel;
3520 int c;
3521 int num_cols;
3522 int num_missing;
3523 int mod_counter;
3524 int mod_counter_found;
3525 int even_pair_failed;
3526 char parity_type;
3527
3528
3529 /* check to see that we have enough 'live' components
3530 of this set. If so, we can configure it if necessary */
3531
3532 num_cols = cset->ac->clabel->num_columns;
3533 parity_type = cset->ac->clabel->parityConfig;
3534
3535 /* XXX Check for duplicate components!?!?!? */
3536
3537 /* Determine what the mod_counter is supposed to be for this set. */
3538
3539 mod_counter_found = 0;
3540 mod_counter = 0;
3541 ac = cset->ac;
3542 while(ac!=NULL) {
3543 if (mod_counter_found==0) {
3544 mod_counter = ac->clabel->mod_counter;
3545 mod_counter_found = 1;
3546 } else {
3547 if (ac->clabel->mod_counter > mod_counter) {
3548 mod_counter = ac->clabel->mod_counter;
3549 }
3550 }
3551 ac = ac->next;
3552 }
3553
3554 num_missing = 0;
3555 auto_config = cset->ac;
3556
3557 even_pair_failed = 0;
3558 for(c=0; c<num_cols; c++) {
3559 ac = auto_config;
3560 while(ac!=NULL) {
3561 if ((ac->clabel->column == c) &&
3562 (ac->clabel->mod_counter == mod_counter)) {
3563 /* it's this one... */
3564 #ifdef DEBUG
3565 printf("Found: %s at %d\n",
3566 ac->devname,c);
3567 #endif
3568 break;
3569 }
3570 ac=ac->next;
3571 }
3572 if (ac==NULL) {
3573 /* Didn't find one here! */
3574 /* special case for RAID 1, especially
3575 where there are more than 2
3576 components (where RAIDframe treats
3577 things a little differently :( ) */
3578 if (parity_type == '1') {
3579 if (c%2 == 0) { /* even component */
3580 even_pair_failed = 1;
3581 } else { /* odd component. If
3582 we're failed, and
3583 so is the even
3584 component, it's
3585 "Good Night, Charlie" */
3586 if (even_pair_failed == 1) {
3587 return(0);
3588 }
3589 }
3590 } else {
3591 /* normal accounting */
3592 num_missing++;
3593 }
3594 }
3595 if ((parity_type == '1') && (c%2 == 1)) {
3596 /* Just did an even component, and we didn't
3597 bail.. reset the even_pair_failed flag,
3598 and go on to the next component.... */
3599 even_pair_failed = 0;
3600 }
3601 }
3602
3603 clabel = cset->ac->clabel;
3604
3605 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3606 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3607 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3608 /* XXX this needs to be made *much* more general */
3609 /* Too many failures */
3610 return(0);
3611 }
3612 /* otherwise, all is well, and we've got enough to take a kick
3613 at autoconfiguring this set */
3614 return(1);
3615 }
3616
3617 void
3618 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3619 RF_Raid_t *raidPtr)
3620 {
3621 RF_ComponentLabel_t *clabel;
3622 int i;
3623
3624 clabel = ac->clabel;
3625
3626 /* 1. Fill in the common stuff */
3627 config->numRow = clabel->num_rows = 1;
3628 config->numCol = clabel->num_columns;
3629 config->numSpare = 0; /* XXX should this be set here? */
3630 config->sectPerSU = clabel->sectPerSU;
3631 config->SUsPerPU = clabel->SUsPerPU;
3632 config->SUsPerRU = clabel->SUsPerRU;
3633 config->parityConfig = clabel->parityConfig;
3634 /* XXX... */
3635 strcpy(config->diskQueueType,"fifo");
3636 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3637 config->layoutSpecificSize = 0; /* XXX ?? */
3638
3639 while(ac!=NULL) {
3640 /* row/col values will be in range due to the checks
3641 in reasonable_label() */
3642 strcpy(config->devnames[0][ac->clabel->column],
3643 ac->devname);
3644 ac = ac->next;
3645 }
3646
3647 for(i=0;i<RF_MAXDBGV;i++) {
3648 config->debugVars[i][0] = 0;
3649 }
3650 }
3651
3652 int
3653 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3654 {
3655 RF_ComponentLabel_t *clabel;
3656 int column;
3657 int sparecol;
3658
3659 raidPtr->autoconfigure = new_value;
3660
3661 for(column=0; column<raidPtr->numCol; column++) {
3662 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3663 clabel = raidget_component_label(raidPtr, column);
3664 clabel->autoconfigure = new_value;
3665 raidflush_component_label(raidPtr, column);
3666 }
3667 }
3668 for(column = 0; column < raidPtr->numSpare ; column++) {
3669 sparecol = raidPtr->numCol + column;
3670 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3671 clabel = raidget_component_label(raidPtr, sparecol);
3672 clabel->autoconfigure = new_value;
3673 raidflush_component_label(raidPtr, sparecol);
3674 }
3675 }
3676 return(new_value);
3677 }
3678
3679 int
3680 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3681 {
3682 RF_ComponentLabel_t *clabel;
3683 int column;
3684 int sparecol;
3685
3686 raidPtr->root_partition = new_value;
3687 for(column=0; column<raidPtr->numCol; column++) {
3688 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3689 clabel = raidget_component_label(raidPtr, column);
3690 clabel->root_partition = new_value;
3691 raidflush_component_label(raidPtr, column);
3692 }
3693 }
3694 for(column = 0; column < raidPtr->numSpare ; column++) {
3695 sparecol = raidPtr->numCol + column;
3696 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3697 clabel = raidget_component_label(raidPtr, sparecol);
3698 clabel->root_partition = new_value;
3699 raidflush_component_label(raidPtr, sparecol);
3700 }
3701 }
3702 return(new_value);
3703 }
3704
3705 void
3706 rf_release_all_vps(RF_ConfigSet_t *cset)
3707 {
3708 RF_AutoConfig_t *ac;
3709
3710 ac = cset->ac;
3711 while(ac!=NULL) {
3712 /* Close the vp, and give it back */
3713 if (ac->vp) {
3714 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3715 VOP_CLOSE(ac->vp, FREAD, NOCRED);
3716 vput(ac->vp);
3717 ac->vp = NULL;
3718 }
3719 ac = ac->next;
3720 }
3721 }
3722
3723
3724 void
3725 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3726 {
3727 RF_AutoConfig_t *ac;
3728 RF_AutoConfig_t *next_ac;
3729
3730 ac = cset->ac;
3731 while(ac!=NULL) {
3732 next_ac = ac->next;
3733 /* nuke the label */
3734 free(ac->clabel, M_RAIDFRAME);
3735 /* cleanup the config structure */
3736 free(ac, M_RAIDFRAME);
3737 /* "next.." */
3738 ac = next_ac;
3739 }
3740 /* and, finally, nuke the config set */
3741 free(cset, M_RAIDFRAME);
3742 }
3743
3744
3745 void
3746 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3747 {
3748 /* current version number */
3749 clabel->version = RF_COMPONENT_LABEL_VERSION;
3750 clabel->serial_number = raidPtr->serial_number;
3751 clabel->mod_counter = raidPtr->mod_counter;
3752
3753 clabel->num_rows = 1;
3754 clabel->num_columns = raidPtr->numCol;
3755 clabel->clean = RF_RAID_DIRTY; /* not clean */
3756 clabel->status = rf_ds_optimal; /* "It's good!" */
3757
3758 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3759 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3760 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3761
3762 clabel->blockSize = raidPtr->bytesPerSector;
3763 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3764
3765 /* XXX not portable */
3766 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3767 clabel->maxOutstanding = raidPtr->maxOutstanding;
3768 clabel->autoconfigure = raidPtr->autoconfigure;
3769 clabel->root_partition = raidPtr->root_partition;
3770 clabel->last_unit = raidPtr->raidid;
3771 clabel->config_order = raidPtr->config_order;
3772
3773 #ifndef RF_NO_PARITY_MAP
3774 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3775 #endif
3776 }
3777
3778 struct raid_softc *
3779 rf_auto_config_set(RF_ConfigSet_t *cset)
3780 {
3781 RF_Raid_t *raidPtr;
3782 RF_Config_t *config;
3783 int raidID;
3784 struct raid_softc *sc;
3785
3786 #ifdef DEBUG
3787 printf("RAID autoconfigure\n");
3788 #endif
3789
3790 /* 1. Create a config structure */
3791 config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO);
3792 if (config == NULL) {
3793 printf("Out of mem!?!?\n");
3794 /* XXX do something more intelligent here. */
3795 return NULL;
3796 }
3797
3798 /*
3799 2. Figure out what RAID ID this one is supposed to live at
3800 See if we can get the same RAID dev that it was configured
3801 on last time..
3802 */
3803
3804 raidID = cset->ac->clabel->last_unit;
3805 for (sc = raidget(raidID); sc->sc_r.valid != 0; sc = raidget(++raidID))
3806 continue;
3807 #ifdef DEBUG
3808 printf("Configuring raid%d:\n",raidID);
3809 #endif
3810
3811 raidPtr = &sc->sc_r;
3812
3813 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3814 raidPtr->softc = sc;
3815 raidPtr->raidid = raidID;
3816 raidPtr->openings = RAIDOUTSTANDING;
3817
3818 /* 3. Build the configuration structure */
3819 rf_create_configuration(cset->ac, config, raidPtr);
3820
3821 /* 4. Do the configuration */
3822 if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3823 raidinit(sc);
3824
3825 rf_markalldirty(raidPtr);
3826 raidPtr->autoconfigure = 1; /* XXX do this here? */
3827 switch (cset->ac->clabel->root_partition) {
3828 case 1: /* Force Root */
3829 case 2: /* Soft Root: root when boot partition part of raid */
3830 /*
3831 * everything configured just fine. Make a note
3832 * that this set is eligible to be root,
3833 * or forced to be root
3834 */
3835 cset->rootable = cset->ac->clabel->root_partition;
3836 /* XXX do this here? */
3837 raidPtr->root_partition = cset->rootable;
3838 break;
3839 default:
3840 break;
3841 }
3842 } else {
3843 raidput(sc);
3844 sc = NULL;
3845 }
3846
3847 /* 5. Cleanup */
3848 free(config, M_RAIDFRAME);
3849 return sc;
3850 }
3851
3852 void
3853 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3854 {
3855 struct buf *bp;
3856 struct raid_softc *rs;
3857
3858 bp = (struct buf *)desc->bp;
3859 rs = desc->raidPtr->softc;
3860 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid),
3861 (bp->b_flags & B_READ));
3862 }
3863
3864 void
3865 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3866 size_t xmin, size_t xmax)
3867 {
3868 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3869 pool_sethiwat(p, xmax);
3870 pool_prime(p, xmin);
3871 pool_setlowat(p, xmin);
3872 }
3873
3874 /*
3875 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buf_queue to see
3876 * if there is IO pending and if that IO could possibly be done for a
3877 * given RAID set. Returns 0 if IO is waiting and can be done, 1
3878 * otherwise.
3879 *
3880 */
3881
3882 int
3883 rf_buf_queue_check(RF_Raid_t *raidPtr)
3884 {
3885 struct raid_softc *rs = raidPtr->softc;
3886 if ((bufq_peek(rs->buf_queue) != NULL) && raidPtr->openings > 0) {
3887 /* there is work to do */
3888 return 0;
3889 }
3890 /* default is nothing to do */
3891 return 1;
3892 }
3893
3894 int
3895 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3896 {
3897 uint64_t numsecs;
3898 unsigned secsize;
3899 int error;
3900
3901 error = getdisksize(vp, &numsecs, &secsize);
3902 if (error == 0) {
3903 diskPtr->blockSize = secsize;
3904 diskPtr->numBlocks = numsecs - rf_protectedSectors;
3905 diskPtr->partitionSize = numsecs;
3906 return 0;
3907 }
3908 return error;
3909 }
3910
3911 static int
3912 raid_match(device_t self, cfdata_t cfdata, void *aux)
3913 {
3914 return 1;
3915 }
3916
3917 static void
3918 raid_attach(device_t parent, device_t self, void *aux)
3919 {
3920
3921 }
3922
3923
3924 static int
3925 raid_detach(device_t self, int flags)
3926 {
3927 int error;
3928 struct raid_softc *rs = raidget(device_unit(self));
3929
3930 if (rs == NULL)
3931 return ENXIO;
3932
3933 if ((error = raidlock(rs)) != 0)
3934 return (error);
3935
3936 error = raid_detach_unlocked(rs);
3937
3938 raidunlock(rs);
3939
3940 /* XXXkd: raidput(rs) ??? */
3941
3942 return error;
3943 }
3944
3945 static void
3946 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
3947 {
3948 struct disk_geom *dg = &rs->sc_dkdev.dk_geom;
3949
3950 memset(dg, 0, sizeof(*dg));
3951
3952 dg->dg_secperunit = raidPtr->totalSectors;
3953 dg->dg_secsize = raidPtr->bytesPerSector;
3954 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
3955 dg->dg_ntracks = 4 * raidPtr->numCol;
3956
3957 disk_set_info(rs->sc_dev, &rs->sc_dkdev, NULL);
3958 }
3959
3960 /*
3961 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3962 * We end up returning whatever error was returned by the first cache flush
3963 * that fails.
3964 */
3965
3966 int
3967 rf_sync_component_caches(RF_Raid_t *raidPtr)
3968 {
3969 int c, sparecol;
3970 int e,error;
3971 int force = 1;
3972
3973 error = 0;
3974 for (c = 0; c < raidPtr->numCol; c++) {
3975 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3976 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3977 &force, FWRITE, NOCRED);
3978 if (e) {
3979 if (e != ENODEV)
3980 printf("raid%d: cache flush to component %s failed.\n",
3981 raidPtr->raidid, raidPtr->Disks[c].devname);
3982 if (error == 0) {
3983 error = e;
3984 }
3985 }
3986 }
3987 }
3988
3989 for( c = 0; c < raidPtr->numSpare ; c++) {
3990 sparecol = raidPtr->numCol + c;
3991 /* Need to ensure that the reconstruct actually completed! */
3992 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3993 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3994 DIOCCACHESYNC, &force, FWRITE, NOCRED);
3995 if (e) {
3996 if (e != ENODEV)
3997 printf("raid%d: cache flush to component %s failed.\n",
3998 raidPtr->raidid, raidPtr->Disks[sparecol].devname);
3999 if (error == 0) {
4000 error = e;
4001 }
4002 }
4003 }
4004 }
4005 return error;
4006 }
4007