rf_netbsdkintf.c revision 1.324 1 /* $NetBSD: rf_netbsdkintf.c,v 1.324 2015/07/10 09:49:56 mrg Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: cd.c 1.6 90/11/28$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
89 * School of Computer Science
90 * Carnegie Mellon University
91 * Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97 /***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.324 2015/07/10 09:49:56 mrg Exp $");
105
106 #ifdef _KERNEL_OPT
107 #include "opt_compat_netbsd.h"
108 #include "opt_raid_autoconfig.h"
109 #endif
110
111 #include <sys/param.h>
112 #include <sys/errno.h>
113 #include <sys/pool.h>
114 #include <sys/proc.h>
115 #include <sys/queue.h>
116 #include <sys/disk.h>
117 #include <sys/device.h>
118 #include <sys/stat.h>
119 #include <sys/ioctl.h>
120 #include <sys/fcntl.h>
121 #include <sys/systm.h>
122 #include <sys/vnode.h>
123 #include <sys/disklabel.h>
124 #include <sys/conf.h>
125 #include <sys/buf.h>
126 #include <sys/bufq.h>
127 #include <sys/reboot.h>
128 #include <sys/kauth.h>
129
130 #include <prop/proplib.h>
131
132 #include <dev/raidframe/raidframevar.h>
133 #include <dev/raidframe/raidframeio.h>
134 #include <dev/raidframe/rf_paritymap.h>
135
136 #include "rf_raid.h"
137 #include "rf_copyback.h"
138 #include "rf_dag.h"
139 #include "rf_dagflags.h"
140 #include "rf_desc.h"
141 #include "rf_diskqueue.h"
142 #include "rf_etimer.h"
143 #include "rf_general.h"
144 #include "rf_kintf.h"
145 #include "rf_options.h"
146 #include "rf_driver.h"
147 #include "rf_parityscan.h"
148 #include "rf_threadstuff.h"
149
150 #ifdef COMPAT_50
151 #include "rf_compat50.h"
152 #endif
153
154 #ifdef DEBUG
155 int rf_kdebug_level = 0;
156 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
157 #else /* DEBUG */
158 #define db1_printf(a) { }
159 #endif /* DEBUG */
160
161 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
162 static rf_declare_mutex2(rf_sparet_wait_mutex);
163 static rf_declare_cond2(rf_sparet_wait_cv);
164 static rf_declare_cond2(rf_sparet_resp_cv);
165
166 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
167 * spare table */
168 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
169 * installation process */
170 #endif
171
172 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
173
174 /* prototypes */
175 static void KernelWakeupFunc(struct buf *);
176 static void InitBP(struct buf *, struct vnode *, unsigned,
177 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
178 void *, int, struct proc *);
179 struct raid_softc;
180 static void raidinit(struct raid_softc *);
181
182 void raidattach(int);
183 static int raid_match(device_t, cfdata_t, void *);
184 static void raid_attach(device_t, device_t, void *);
185 static int raid_detach(device_t, int);
186
187 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
188 daddr_t, daddr_t);
189 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
190 daddr_t, daddr_t, int);
191
192 static int raidwrite_component_label(unsigned,
193 dev_t, struct vnode *, RF_ComponentLabel_t *);
194 static int raidread_component_label(unsigned,
195 dev_t, struct vnode *, RF_ComponentLabel_t *);
196
197
198 static dev_type_open(raidopen);
199 static dev_type_close(raidclose);
200 static dev_type_read(raidread);
201 static dev_type_write(raidwrite);
202 static dev_type_ioctl(raidioctl);
203 static dev_type_strategy(raidstrategy);
204 static dev_type_dump(raiddump);
205 static dev_type_size(raidsize);
206
207 const struct bdevsw raid_bdevsw = {
208 .d_open = raidopen,
209 .d_close = raidclose,
210 .d_strategy = raidstrategy,
211 .d_ioctl = raidioctl,
212 .d_dump = raiddump,
213 .d_psize = raidsize,
214 .d_discard = nodiscard,
215 .d_flag = D_DISK
216 };
217
218 const struct cdevsw raid_cdevsw = {
219 .d_open = raidopen,
220 .d_close = raidclose,
221 .d_read = raidread,
222 .d_write = raidwrite,
223 .d_ioctl = raidioctl,
224 .d_stop = nostop,
225 .d_tty = notty,
226 .d_poll = nopoll,
227 .d_mmap = nommap,
228 .d_kqfilter = nokqfilter,
229 .d_discard = nodiscard,
230 .d_flag = D_DISK
231 };
232
233 static struct dkdriver rf_dkdriver = {
234 .d_strategy = raidstrategy,
235 .d_minphys = minphys
236 };
237
238 struct raid_softc {
239 device_t sc_dev;
240 int sc_unit;
241 int sc_flags; /* flags */
242 int sc_cflags; /* configuration flags */
243 uint64_t sc_size; /* size of the raid device */
244 char sc_xname[20]; /* XXX external name */
245 struct disk sc_dkdev; /* generic disk device info */
246 struct bufq_state *buf_queue; /* used for the device queue */
247 RF_Raid_t sc_r;
248 LIST_ENTRY(raid_softc) sc_link;
249 };
250 /* sc_flags */
251 #define RAIDF_INITED 0x01 /* unit has been initialized */
252 #define RAIDF_WLABEL 0x02 /* label area is writable */
253 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
254 #define RAIDF_SHUTDOWN 0x08 /* unit is being shutdown */
255 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
256 #define RAIDF_LOCKED 0x80 /* unit is locked */
257
258 #define raidunit(x) DISKUNIT(x)
259
260 extern struct cfdriver raid_cd;
261 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
262 raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
263 DVF_DETACH_SHUTDOWN);
264
265 /*
266 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
267 * Be aware that large numbers can allow the driver to consume a lot of
268 * kernel memory, especially on writes, and in degraded mode reads.
269 *
270 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
271 * a single 64K write will typically require 64K for the old data,
272 * 64K for the old parity, and 64K for the new parity, for a total
273 * of 192K (if the parity buffer is not re-used immediately).
274 * Even it if is used immediately, that's still 128K, which when multiplied
275 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
276 *
277 * Now in degraded mode, for example, a 64K read on the above setup may
278 * require data reconstruction, which will require *all* of the 4 remaining
279 * disks to participate -- 4 * 32K/disk == 128K again.
280 */
281
282 #ifndef RAIDOUTSTANDING
283 #define RAIDOUTSTANDING 6
284 #endif
285
286 #define RAIDLABELDEV(dev) \
287 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
288
289 /* declared here, and made public, for the benefit of KVM stuff.. */
290
291 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
292 struct disklabel *);
293 static void raidgetdisklabel(dev_t);
294 static void raidmakedisklabel(struct raid_softc *);
295
296 static int raidlock(struct raid_softc *);
297 static void raidunlock(struct raid_softc *);
298
299 static int raid_detach_unlocked(struct raid_softc *);
300
301 static void rf_markalldirty(RF_Raid_t *);
302 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
303
304 void rf_ReconThread(struct rf_recon_req *);
305 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
306 void rf_CopybackThread(RF_Raid_t *raidPtr);
307 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
308 int rf_autoconfig(device_t);
309 void rf_buildroothack(RF_ConfigSet_t *);
310
311 RF_AutoConfig_t *rf_find_raid_components(void);
312 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
313 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
314 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
315 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
316 int rf_set_autoconfig(RF_Raid_t *, int);
317 int rf_set_rootpartition(RF_Raid_t *, int);
318 void rf_release_all_vps(RF_ConfigSet_t *);
319 void rf_cleanup_config_set(RF_ConfigSet_t *);
320 int rf_have_enough_components(RF_ConfigSet_t *);
321 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
322 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
323
324 /*
325 * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
326 * Note that this is overridden by having RAID_AUTOCONFIG as an option
327 * in the kernel config file.
328 */
329 #ifdef RAID_AUTOCONFIG
330 int raidautoconfig = 1;
331 #else
332 int raidautoconfig = 0;
333 #endif
334 static bool raidautoconfigdone = false;
335
336 struct RF_Pools_s rf_pools;
337
338 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
339 static kmutex_t raid_lock;
340
341 static struct raid_softc *
342 raidcreate(int unit) {
343 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
344 if (sc == NULL) {
345 #ifdef DIAGNOSTIC
346 printf("%s: out of memory\n", __func__);
347 #endif
348 return NULL;
349 }
350 sc->sc_unit = unit;
351 bufq_alloc(&sc->buf_queue, "fcfs", BUFQ_SORT_RAWBLOCK);
352 return sc;
353 }
354
355 static void
356 raiddestroy(struct raid_softc *sc) {
357 bufq_free(sc->buf_queue);
358 kmem_free(sc, sizeof(*sc));
359 }
360
361 static struct raid_softc *
362 raidget(int unit) {
363 struct raid_softc *sc;
364 if (unit < 0) {
365 #ifdef DIAGNOSTIC
366 panic("%s: unit %d!", __func__, unit);
367 #endif
368 return NULL;
369 }
370 mutex_enter(&raid_lock);
371 LIST_FOREACH(sc, &raids, sc_link) {
372 if (sc->sc_unit == unit) {
373 mutex_exit(&raid_lock);
374 return sc;
375 }
376 }
377 mutex_exit(&raid_lock);
378 if ((sc = raidcreate(unit)) == NULL)
379 return NULL;
380 mutex_enter(&raid_lock);
381 LIST_INSERT_HEAD(&raids, sc, sc_link);
382 mutex_exit(&raid_lock);
383 return sc;
384 }
385
386 static void
387 raidput(struct raid_softc *sc) {
388 mutex_enter(&raid_lock);
389 LIST_REMOVE(sc, sc_link);
390 mutex_exit(&raid_lock);
391 raiddestroy(sc);
392 }
393
394 void
395 raidattach(int num)
396 {
397 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
398 /* This is where all the initialization stuff gets done. */
399
400 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
401 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
402 rf_init_cond2(rf_sparet_wait_cv, "sparetw");
403 rf_init_cond2(rf_sparet_resp_cv, "rfgst");
404
405 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
406 #endif
407
408 if (rf_BootRaidframe() == 0)
409 aprint_verbose("Kernelized RAIDframe activated\n");
410 else
411 panic("Serious error booting RAID!!");
412
413 if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) {
414 aprint_error("raidattach: config_cfattach_attach failed?\n");
415 }
416
417 raidautoconfigdone = false;
418
419 /*
420 * Register a finalizer which will be used to auto-config RAID
421 * sets once all real hardware devices have been found.
422 */
423 if (config_finalize_register(NULL, rf_autoconfig) != 0)
424 aprint_error("WARNING: unable to register RAIDframe finalizer\n");
425 }
426
427 int
428 rf_autoconfig(device_t self)
429 {
430 RF_AutoConfig_t *ac_list;
431 RF_ConfigSet_t *config_sets;
432
433 if (!raidautoconfig || raidautoconfigdone == true)
434 return (0);
435
436 /* XXX This code can only be run once. */
437 raidautoconfigdone = true;
438
439 #ifdef __HAVE_CPU_BOOTCONF
440 /*
441 * 0. find the boot device if needed first so we can use it later
442 * this needs to be done before we autoconfigure any raid sets,
443 * because if we use wedges we are not going to be able to open
444 * the boot device later
445 */
446 if (booted_device == NULL)
447 cpu_bootconf();
448 #endif
449 /* 1. locate all RAID components on the system */
450 aprint_debug("Searching for RAID components...\n");
451 ac_list = rf_find_raid_components();
452
453 /* 2. Sort them into their respective sets. */
454 config_sets = rf_create_auto_sets(ac_list);
455
456 /*
457 * 3. Evaluate each set and configure the valid ones.
458 * This gets done in rf_buildroothack().
459 */
460 rf_buildroothack(config_sets);
461
462 return 1;
463 }
464
465 static int
466 rf_containsboot(RF_Raid_t *r, device_t bdv) {
467 const char *bootname = device_xname(bdv);
468 size_t len = strlen(bootname);
469
470 for (int col = 0; col < r->numCol; col++) {
471 const char *devname = r->Disks[col].devname;
472 devname += sizeof("/dev/") - 1;
473 if (strncmp(devname, "dk", 2) == 0) {
474 const char *parent =
475 dkwedge_get_parent_name(r->Disks[col].dev);
476 if (parent != NULL)
477 devname = parent;
478 }
479 if (strncmp(devname, bootname, len) == 0) {
480 struct raid_softc *sc = r->softc;
481 aprint_debug("raid%d includes boot device %s\n",
482 sc->sc_unit, devname);
483 return 1;
484 }
485 }
486 return 0;
487 }
488
489 void
490 rf_buildroothack(RF_ConfigSet_t *config_sets)
491 {
492 RF_ConfigSet_t *cset;
493 RF_ConfigSet_t *next_cset;
494 int num_root;
495 struct raid_softc *sc, *rsc;
496
497 sc = rsc = NULL;
498 num_root = 0;
499 cset = config_sets;
500 while (cset != NULL) {
501 next_cset = cset->next;
502 if (rf_have_enough_components(cset) &&
503 cset->ac->clabel->autoconfigure == 1) {
504 sc = rf_auto_config_set(cset);
505 if (sc != NULL) {
506 aprint_debug("raid%d: configured ok\n",
507 sc->sc_unit);
508 if (cset->rootable) {
509 rsc = sc;
510 num_root++;
511 }
512 } else {
513 /* The autoconfig didn't work :( */
514 aprint_debug("Autoconfig failed\n");
515 rf_release_all_vps(cset);
516 }
517 } else {
518 /* we're not autoconfiguring this set...
519 release the associated resources */
520 rf_release_all_vps(cset);
521 }
522 /* cleanup */
523 rf_cleanup_config_set(cset);
524 cset = next_cset;
525 }
526
527 /* if the user has specified what the root device should be
528 then we don't touch booted_device or boothowto... */
529
530 if (rootspec != NULL)
531 return;
532
533 /* we found something bootable... */
534
535 /*
536 * XXX: The following code assumes that the root raid
537 * is the first ('a') partition. This is about the best
538 * we can do with a BSD disklabel, but we might be able
539 * to do better with a GPT label, by setting a specified
540 * attribute to indicate the root partition. We can then
541 * stash the partition number in the r->root_partition
542 * high bits (the bottom 2 bits are already used). For
543 * now we just set booted_partition to 0 when we override
544 * root.
545 */
546 if (num_root == 1) {
547 device_t candidate_root;
548 if (rsc->sc_dkdev.dk_nwedges != 0) {
549 char cname[sizeof(cset->ac->devname)];
550 /* XXX: assume 'a' */
551 snprintf(cname, sizeof(cname), "%s%c",
552 device_xname(rsc->sc_dev), 'a');
553 candidate_root = dkwedge_find_by_wname(cname);
554 } else
555 candidate_root = rsc->sc_dev;
556 if (booted_device == NULL ||
557 rsc->sc_r.root_partition == 1 ||
558 rf_containsboot(&rsc->sc_r, booted_device)) {
559 booted_device = candidate_root;
560 booted_partition = 0; /* XXX assume 'a' */
561 }
562 } else if (num_root > 1) {
563
564 /*
565 * Maybe the MD code can help. If it cannot, then
566 * setroot() will discover that we have no
567 * booted_device and will ask the user if nothing was
568 * hardwired in the kernel config file
569 */
570 if (booted_device == NULL)
571 return;
572
573 num_root = 0;
574 mutex_enter(&raid_lock);
575 LIST_FOREACH(sc, &raids, sc_link) {
576 RF_Raid_t *r = &sc->sc_r;
577 if (r->valid == 0)
578 continue;
579
580 if (r->root_partition == 0)
581 continue;
582
583 if (rf_containsboot(r, booted_device)) {
584 num_root++;
585 rsc = sc;
586 }
587 }
588 mutex_exit(&raid_lock);
589
590 if (num_root == 1) {
591 booted_device = rsc->sc_dev;
592 booted_partition = 0; /* XXX assume 'a' */
593 } else {
594 /* we can't guess.. require the user to answer... */
595 boothowto |= RB_ASKNAME;
596 }
597 }
598 }
599
600 static int
601 raidsize(dev_t dev)
602 {
603 struct raid_softc *rs;
604 struct disklabel *lp;
605 int part, unit, omask, size;
606
607 unit = raidunit(dev);
608 if ((rs = raidget(unit)) == NULL)
609 return -1;
610 if ((rs->sc_flags & RAIDF_INITED) == 0)
611 return (-1);
612
613 part = DISKPART(dev);
614 omask = rs->sc_dkdev.dk_openmask & (1 << part);
615 lp = rs->sc_dkdev.dk_label;
616
617 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
618 return (-1);
619
620 if (lp->d_partitions[part].p_fstype != FS_SWAP)
621 size = -1;
622 else
623 size = lp->d_partitions[part].p_size *
624 (lp->d_secsize / DEV_BSIZE);
625
626 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
627 return (-1);
628
629 return (size);
630
631 }
632
633 static int
634 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
635 {
636 int unit = raidunit(dev);
637 struct raid_softc *rs;
638 const struct bdevsw *bdev;
639 struct disklabel *lp;
640 RF_Raid_t *raidPtr;
641 daddr_t offset;
642 int part, c, sparecol, j, scol, dumpto;
643 int error = 0;
644
645 if ((rs = raidget(unit)) == NULL)
646 return ENXIO;
647
648 raidPtr = &rs->sc_r;
649
650 if ((rs->sc_flags & RAIDF_INITED) == 0)
651 return ENXIO;
652
653 /* we only support dumping to RAID 1 sets */
654 if (raidPtr->Layout.numDataCol != 1 ||
655 raidPtr->Layout.numParityCol != 1)
656 return EINVAL;
657
658
659 if ((error = raidlock(rs)) != 0)
660 return error;
661
662 if (size % DEV_BSIZE != 0) {
663 error = EINVAL;
664 goto out;
665 }
666
667 if (blkno + size / DEV_BSIZE > rs->sc_size) {
668 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
669 "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
670 size / DEV_BSIZE, rs->sc_size);
671 error = EINVAL;
672 goto out;
673 }
674
675 part = DISKPART(dev);
676 lp = rs->sc_dkdev.dk_label;
677 offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
678
679 /* figure out what device is alive.. */
680
681 /*
682 Look for a component to dump to. The preference for the
683 component to dump to is as follows:
684 1) the master
685 2) a used_spare of the master
686 3) the slave
687 4) a used_spare of the slave
688 */
689
690 dumpto = -1;
691 for (c = 0; c < raidPtr->numCol; c++) {
692 if (raidPtr->Disks[c].status == rf_ds_optimal) {
693 /* this might be the one */
694 dumpto = c;
695 break;
696 }
697 }
698
699 /*
700 At this point we have possibly selected a live master or a
701 live slave. We now check to see if there is a spared
702 master (or a spared slave), if we didn't find a live master
703 or a live slave.
704 */
705
706 for (c = 0; c < raidPtr->numSpare; c++) {
707 sparecol = raidPtr->numCol + c;
708 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
709 /* How about this one? */
710 scol = -1;
711 for(j=0;j<raidPtr->numCol;j++) {
712 if (raidPtr->Disks[j].spareCol == sparecol) {
713 scol = j;
714 break;
715 }
716 }
717 if (scol == 0) {
718 /*
719 We must have found a spared master!
720 We'll take that over anything else
721 found so far. (We couldn't have
722 found a real master before, since
723 this is a used spare, and it's
724 saying that it's replacing the
725 master.) On reboot (with
726 autoconfiguration turned on)
727 sparecol will become the 1st
728 component (component0) of this set.
729 */
730 dumpto = sparecol;
731 break;
732 } else if (scol != -1) {
733 /*
734 Must be a spared slave. We'll dump
735 to that if we havn't found anything
736 else so far.
737 */
738 if (dumpto == -1)
739 dumpto = sparecol;
740 }
741 }
742 }
743
744 if (dumpto == -1) {
745 /* we couldn't find any live components to dump to!?!?
746 */
747 error = EINVAL;
748 goto out;
749 }
750
751 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
752
753 /*
754 Note that blkno is relative to this particular partition.
755 By adding the offset of this partition in the RAID
756 set, and also adding RF_PROTECTED_SECTORS, we get a
757 value that is relative to the partition used for the
758 underlying component.
759 */
760
761 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
762 blkno + offset, va, size);
763
764 out:
765 raidunlock(rs);
766
767 return error;
768 }
769
770 /* ARGSUSED */
771 static int
772 raidopen(dev_t dev, int flags, int fmt,
773 struct lwp *l)
774 {
775 int unit = raidunit(dev);
776 struct raid_softc *rs;
777 struct disklabel *lp;
778 int part, pmask;
779 int error = 0;
780
781 if ((rs = raidget(unit)) == NULL)
782 return ENXIO;
783 if ((error = raidlock(rs)) != 0)
784 return (error);
785
786 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
787 error = EBUSY;
788 goto bad;
789 }
790
791 lp = rs->sc_dkdev.dk_label;
792
793 part = DISKPART(dev);
794
795 /*
796 * If there are wedges, and this is not RAW_PART, then we
797 * need to fail.
798 */
799 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
800 error = EBUSY;
801 goto bad;
802 }
803 pmask = (1 << part);
804
805 if ((rs->sc_flags & RAIDF_INITED) &&
806 (rs->sc_dkdev.dk_nwedges == 0) &&
807 (rs->sc_dkdev.dk_openmask == 0))
808 raidgetdisklabel(dev);
809
810 /* make sure that this partition exists */
811
812 if (part != RAW_PART) {
813 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
814 ((part >= lp->d_npartitions) ||
815 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
816 error = ENXIO;
817 goto bad;
818 }
819 }
820 /* Prevent this unit from being unconfigured while open. */
821 switch (fmt) {
822 case S_IFCHR:
823 rs->sc_dkdev.dk_copenmask |= pmask;
824 break;
825
826 case S_IFBLK:
827 rs->sc_dkdev.dk_bopenmask |= pmask;
828 break;
829 }
830
831 if ((rs->sc_dkdev.dk_openmask == 0) &&
832 ((rs->sc_flags & RAIDF_INITED) != 0)) {
833 /* First one... mark things as dirty... Note that we *MUST*
834 have done a configure before this. I DO NOT WANT TO BE
835 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
836 THAT THEY BELONG TOGETHER!!!!! */
837 /* XXX should check to see if we're only open for reading
838 here... If so, we needn't do this, but then need some
839 other way of keeping track of what's happened.. */
840
841 rf_markalldirty(&rs->sc_r);
842 }
843
844
845 rs->sc_dkdev.dk_openmask =
846 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
847
848 bad:
849 raidunlock(rs);
850
851 return (error);
852
853
854 }
855
856 /* ARGSUSED */
857 static int
858 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
859 {
860 int unit = raidunit(dev);
861 struct raid_softc *rs;
862 int error = 0;
863 int part;
864
865 if ((rs = raidget(unit)) == NULL)
866 return ENXIO;
867
868 if ((error = raidlock(rs)) != 0)
869 return (error);
870
871 part = DISKPART(dev);
872
873 /* ...that much closer to allowing unconfiguration... */
874 switch (fmt) {
875 case S_IFCHR:
876 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
877 break;
878
879 case S_IFBLK:
880 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
881 break;
882 }
883 rs->sc_dkdev.dk_openmask =
884 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
885
886 if ((rs->sc_dkdev.dk_openmask == 0) &&
887 ((rs->sc_flags & RAIDF_INITED) != 0)) {
888 /* Last one... device is not unconfigured yet.
889 Device shutdown has taken care of setting the
890 clean bits if RAIDF_INITED is not set
891 mark things as clean... */
892
893 rf_update_component_labels(&rs->sc_r,
894 RF_FINAL_COMPONENT_UPDATE);
895
896 /* If the kernel is shutting down, it will detach
897 * this RAID set soon enough.
898 */
899 }
900
901 raidunlock(rs);
902 return (0);
903
904 }
905
906 static void
907 raidstrategy(struct buf *bp)
908 {
909 unsigned int unit = raidunit(bp->b_dev);
910 RF_Raid_t *raidPtr;
911 int wlabel;
912 struct raid_softc *rs;
913
914 if ((rs = raidget(unit)) == NULL) {
915 bp->b_error = ENXIO;
916 goto done;
917 }
918 if ((rs->sc_flags & RAIDF_INITED) == 0) {
919 bp->b_error = ENXIO;
920 goto done;
921 }
922 raidPtr = &rs->sc_r;
923 if (!raidPtr->valid) {
924 bp->b_error = ENODEV;
925 goto done;
926 }
927 if (bp->b_bcount == 0) {
928 db1_printf(("b_bcount is zero..\n"));
929 goto done;
930 }
931
932 /*
933 * Do bounds checking and adjust transfer. If there's an
934 * error, the bounds check will flag that for us.
935 */
936
937 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
938 if (DISKPART(bp->b_dev) == RAW_PART) {
939 uint64_t size; /* device size in DEV_BSIZE unit */
940
941 if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
942 size = raidPtr->totalSectors <<
943 (raidPtr->logBytesPerSector - DEV_BSHIFT);
944 } else {
945 size = raidPtr->totalSectors >>
946 (DEV_BSHIFT - raidPtr->logBytesPerSector);
947 }
948 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
949 goto done;
950 }
951 } else {
952 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
953 db1_printf(("Bounds check failed!!:%d %d\n",
954 (int) bp->b_blkno, (int) wlabel));
955 goto done;
956 }
957 }
958
959 rf_lock_mutex2(raidPtr->iodone_lock);
960
961 bp->b_resid = 0;
962
963 /* stuff it onto our queue */
964 bufq_put(rs->buf_queue, bp);
965
966 /* scheduled the IO to happen at the next convenient time */
967 rf_signal_cond2(raidPtr->iodone_cv);
968 rf_unlock_mutex2(raidPtr->iodone_lock);
969
970 return;
971
972 done:
973 bp->b_resid = bp->b_bcount;
974 biodone(bp);
975 }
976
977 /* ARGSUSED */
978 static int
979 raidread(dev_t dev, struct uio *uio, int flags)
980 {
981 int unit = raidunit(dev);
982 struct raid_softc *rs;
983
984 if ((rs = raidget(unit)) == NULL)
985 return ENXIO;
986
987 if ((rs->sc_flags & RAIDF_INITED) == 0)
988 return (ENXIO);
989
990 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
991
992 }
993
994 /* ARGSUSED */
995 static int
996 raidwrite(dev_t dev, struct uio *uio, int flags)
997 {
998 int unit = raidunit(dev);
999 struct raid_softc *rs;
1000
1001 if ((rs = raidget(unit)) == NULL)
1002 return ENXIO;
1003
1004 if ((rs->sc_flags & RAIDF_INITED) == 0)
1005 return (ENXIO);
1006
1007 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
1008
1009 }
1010
1011 static int
1012 raid_detach_unlocked(struct raid_softc *rs)
1013 {
1014 int error;
1015 RF_Raid_t *raidPtr;
1016
1017 raidPtr = &rs->sc_r;
1018
1019 /*
1020 * If somebody has a partition mounted, we shouldn't
1021 * shutdown.
1022 */
1023 if (rs->sc_dkdev.dk_openmask != 0)
1024 return EBUSY;
1025
1026 if ((rs->sc_flags & RAIDF_INITED) == 0)
1027 ; /* not initialized: nothing to do */
1028 else if ((error = rf_Shutdown(raidPtr)) != 0)
1029 return error;
1030 else
1031 rs->sc_flags &= ~(RAIDF_INITED|RAIDF_SHUTDOWN);
1032
1033 /* Detach the disk. */
1034 dkwedge_delall(&rs->sc_dkdev);
1035 disk_detach(&rs->sc_dkdev);
1036 disk_destroy(&rs->sc_dkdev);
1037
1038 aprint_normal_dev(rs->sc_dev, "detached\n");
1039
1040 return 0;
1041 }
1042
1043 static int
1044 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1045 {
1046 int unit = raidunit(dev);
1047 int error = 0;
1048 int part, pmask, s;
1049 cfdata_t cf;
1050 struct raid_softc *rs;
1051 RF_Config_t *k_cfg, *u_cfg;
1052 RF_Raid_t *raidPtr;
1053 RF_RaidDisk_t *diskPtr;
1054 RF_AccTotals_t *totals;
1055 RF_DeviceConfig_t *d_cfg, **ucfgp;
1056 u_char *specific_buf;
1057 int retcode = 0;
1058 int column;
1059 /* int raidid; */
1060 struct rf_recon_req *rrcopy, *rr;
1061 RF_ComponentLabel_t *clabel;
1062 RF_ComponentLabel_t *ci_label;
1063 RF_ComponentLabel_t **clabel_ptr;
1064 RF_SingleComponent_t *sparePtr,*componentPtr;
1065 RF_SingleComponent_t component;
1066 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
1067 int i, j, d;
1068 #ifdef __HAVE_OLD_DISKLABEL
1069 struct disklabel newlabel;
1070 #endif
1071
1072 if ((rs = raidget(unit)) == NULL)
1073 return ENXIO;
1074 raidPtr = &rs->sc_r;
1075
1076 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1077 (int) DISKPART(dev), (int) unit, cmd));
1078
1079 /* Must be open for writes for these commands... */
1080 switch (cmd) {
1081 #ifdef DIOCGSECTORSIZE
1082 case DIOCGSECTORSIZE:
1083 *(u_int *)data = raidPtr->bytesPerSector;
1084 return 0;
1085 case DIOCGMEDIASIZE:
1086 *(off_t *)data =
1087 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
1088 return 0;
1089 #endif
1090 case DIOCSDINFO:
1091 case DIOCWDINFO:
1092 #ifdef __HAVE_OLD_DISKLABEL
1093 case ODIOCWDINFO:
1094 case ODIOCSDINFO:
1095 #endif
1096 case DIOCWLABEL:
1097 case DIOCAWEDGE:
1098 case DIOCDWEDGE:
1099 case DIOCMWEDGES:
1100 case DIOCSSTRATEGY:
1101 if ((flag & FWRITE) == 0)
1102 return (EBADF);
1103 }
1104
1105 /* Must be initialized for these... */
1106 switch (cmd) {
1107 case DIOCGDINFO:
1108 case DIOCSDINFO:
1109 case DIOCWDINFO:
1110 #ifdef __HAVE_OLD_DISKLABEL
1111 case ODIOCGDINFO:
1112 case ODIOCWDINFO:
1113 case ODIOCSDINFO:
1114 case ODIOCGDEFLABEL:
1115 #endif
1116 case DIOCGPART:
1117 case DIOCWLABEL:
1118 case DIOCGDEFLABEL:
1119 case DIOCAWEDGE:
1120 case DIOCDWEDGE:
1121 case DIOCLWEDGES:
1122 case DIOCMWEDGES:
1123 case DIOCCACHESYNC:
1124 case RAIDFRAME_SHUTDOWN:
1125 case RAIDFRAME_REWRITEPARITY:
1126 case RAIDFRAME_GET_INFO:
1127 case RAIDFRAME_RESET_ACCTOTALS:
1128 case RAIDFRAME_GET_ACCTOTALS:
1129 case RAIDFRAME_KEEP_ACCTOTALS:
1130 case RAIDFRAME_GET_SIZE:
1131 case RAIDFRAME_FAIL_DISK:
1132 case RAIDFRAME_COPYBACK:
1133 case RAIDFRAME_CHECK_RECON_STATUS:
1134 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1135 case RAIDFRAME_GET_COMPONENT_LABEL:
1136 case RAIDFRAME_SET_COMPONENT_LABEL:
1137 case RAIDFRAME_ADD_HOT_SPARE:
1138 case RAIDFRAME_REMOVE_HOT_SPARE:
1139 case RAIDFRAME_INIT_LABELS:
1140 case RAIDFRAME_REBUILD_IN_PLACE:
1141 case RAIDFRAME_CHECK_PARITY:
1142 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1143 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1144 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1145 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1146 case RAIDFRAME_SET_AUTOCONFIG:
1147 case RAIDFRAME_SET_ROOT:
1148 case RAIDFRAME_DELETE_COMPONENT:
1149 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1150 case RAIDFRAME_PARITYMAP_STATUS:
1151 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1152 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1153 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1154 case DIOCGSTRATEGY:
1155 case DIOCSSTRATEGY:
1156 if ((rs->sc_flags & RAIDF_INITED) == 0)
1157 return (ENXIO);
1158 }
1159
1160 switch (cmd) {
1161 #ifdef COMPAT_50
1162 case RAIDFRAME_GET_INFO50:
1163 return rf_get_info50(raidPtr, data);
1164
1165 case RAIDFRAME_CONFIGURE50:
1166 if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0)
1167 return retcode;
1168 goto config;
1169 #endif
1170 /* configure the system */
1171 case RAIDFRAME_CONFIGURE:
1172
1173 if (raidPtr->valid) {
1174 /* There is a valid RAID set running on this unit! */
1175 printf("raid%d: Device already configured!\n",unit);
1176 return(EINVAL);
1177 }
1178
1179 /* copy-in the configuration information */
1180 /* data points to a pointer to the configuration structure */
1181
1182 u_cfg = *((RF_Config_t **) data);
1183 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1184 if (k_cfg == NULL) {
1185 return (ENOMEM);
1186 }
1187 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1188 if (retcode) {
1189 RF_Free(k_cfg, sizeof(RF_Config_t));
1190 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1191 retcode));
1192 return (retcode);
1193 }
1194 goto config;
1195 config:
1196 /* allocate a buffer for the layout-specific data, and copy it
1197 * in */
1198 if (k_cfg->layoutSpecificSize) {
1199 if (k_cfg->layoutSpecificSize > 10000) {
1200 /* sanity check */
1201 RF_Free(k_cfg, sizeof(RF_Config_t));
1202 return (EINVAL);
1203 }
1204 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1205 (u_char *));
1206 if (specific_buf == NULL) {
1207 RF_Free(k_cfg, sizeof(RF_Config_t));
1208 return (ENOMEM);
1209 }
1210 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1211 k_cfg->layoutSpecificSize);
1212 if (retcode) {
1213 RF_Free(k_cfg, sizeof(RF_Config_t));
1214 RF_Free(specific_buf,
1215 k_cfg->layoutSpecificSize);
1216 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1217 retcode));
1218 return (retcode);
1219 }
1220 } else
1221 specific_buf = NULL;
1222 k_cfg->layoutSpecific = specific_buf;
1223
1224 /* should do some kind of sanity check on the configuration.
1225 * Store the sum of all the bytes in the last byte? */
1226
1227 /* configure the system */
1228
1229 /*
1230 * Clear the entire RAID descriptor, just to make sure
1231 * there is no stale data left in the case of a
1232 * reconfiguration
1233 */
1234 memset(raidPtr, 0, sizeof(*raidPtr));
1235 raidPtr->softc = rs;
1236 raidPtr->raidid = unit;
1237
1238 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1239
1240 if (retcode == 0) {
1241
1242 /* allow this many simultaneous IO's to
1243 this RAID device */
1244 raidPtr->openings = RAIDOUTSTANDING;
1245
1246 raidinit(rs);
1247 rf_markalldirty(raidPtr);
1248 }
1249 /* free the buffers. No return code here. */
1250 if (k_cfg->layoutSpecificSize) {
1251 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1252 }
1253 RF_Free(k_cfg, sizeof(RF_Config_t));
1254
1255 return (retcode);
1256
1257 /* shutdown the system */
1258 case RAIDFRAME_SHUTDOWN:
1259
1260 part = DISKPART(dev);
1261 pmask = (1 << part);
1262
1263 if ((error = raidlock(rs)) != 0)
1264 return (error);
1265
1266 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
1267 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1268 (rs->sc_dkdev.dk_copenmask & pmask)))
1269 retcode = EBUSY;
1270 else {
1271 rs->sc_flags |= RAIDF_SHUTDOWN;
1272 rs->sc_dkdev.dk_copenmask &= ~pmask;
1273 rs->sc_dkdev.dk_bopenmask &= ~pmask;
1274 rs->sc_dkdev.dk_openmask &= ~pmask;
1275 retcode = 0;
1276 }
1277
1278 raidunlock(rs);
1279
1280 if (retcode != 0)
1281 return retcode;
1282
1283 /* free the pseudo device attach bits */
1284
1285 cf = device_cfdata(rs->sc_dev);
1286 if ((retcode = config_detach(rs->sc_dev, DETACH_QUIET)) == 0)
1287 free(cf, M_RAIDFRAME);
1288
1289 return (retcode);
1290 case RAIDFRAME_GET_COMPONENT_LABEL:
1291 clabel_ptr = (RF_ComponentLabel_t **) data;
1292 /* need to read the component label for the disk indicated
1293 by row,column in clabel */
1294
1295 /*
1296 * Perhaps there should be an option to skip the in-core
1297 * copy and hit the disk, as with disklabel(8).
1298 */
1299 RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *));
1300
1301 retcode = copyin(*clabel_ptr, clabel, sizeof(*clabel));
1302
1303 if (retcode) {
1304 RF_Free(clabel, sizeof(*clabel));
1305 return retcode;
1306 }
1307
1308 clabel->row = 0; /* Don't allow looking at anything else.*/
1309
1310 column = clabel->column;
1311
1312 if ((column < 0) || (column >= raidPtr->numCol +
1313 raidPtr->numSpare)) {
1314 RF_Free(clabel, sizeof(*clabel));
1315 return EINVAL;
1316 }
1317
1318 RF_Free(clabel, sizeof(*clabel));
1319
1320 clabel = raidget_component_label(raidPtr, column);
1321
1322 return copyout(clabel, *clabel_ptr, sizeof(**clabel_ptr));
1323
1324 #if 0
1325 case RAIDFRAME_SET_COMPONENT_LABEL:
1326 clabel = (RF_ComponentLabel_t *) data;
1327
1328 /* XXX check the label for valid stuff... */
1329 /* Note that some things *should not* get modified --
1330 the user should be re-initing the labels instead of
1331 trying to patch things.
1332 */
1333
1334 raidid = raidPtr->raidid;
1335 #ifdef DEBUG
1336 printf("raid%d: Got component label:\n", raidid);
1337 printf("raid%d: Version: %d\n", raidid, clabel->version);
1338 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1339 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1340 printf("raid%d: Column: %d\n", raidid, clabel->column);
1341 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1342 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1343 printf("raid%d: Status: %d\n", raidid, clabel->status);
1344 #endif
1345 clabel->row = 0;
1346 column = clabel->column;
1347
1348 if ((column < 0) || (column >= raidPtr->numCol)) {
1349 return(EINVAL);
1350 }
1351
1352 /* XXX this isn't allowed to do anything for now :-) */
1353
1354 /* XXX and before it is, we need to fill in the rest
1355 of the fields!?!?!?! */
1356 memcpy(raidget_component_label(raidPtr, column),
1357 clabel, sizeof(*clabel));
1358 raidflush_component_label(raidPtr, column);
1359 return (0);
1360 #endif
1361
1362 case RAIDFRAME_INIT_LABELS:
1363 clabel = (RF_ComponentLabel_t *) data;
1364 /*
1365 we only want the serial number from
1366 the above. We get all the rest of the information
1367 from the config that was used to create this RAID
1368 set.
1369 */
1370
1371 raidPtr->serial_number = clabel->serial_number;
1372
1373 for(column=0;column<raidPtr->numCol;column++) {
1374 diskPtr = &raidPtr->Disks[column];
1375 if (!RF_DEAD_DISK(diskPtr->status)) {
1376 ci_label = raidget_component_label(raidPtr,
1377 column);
1378 /* Zeroing this is important. */
1379 memset(ci_label, 0, sizeof(*ci_label));
1380 raid_init_component_label(raidPtr, ci_label);
1381 ci_label->serial_number =
1382 raidPtr->serial_number;
1383 ci_label->row = 0; /* we dont' pretend to support more */
1384 rf_component_label_set_partitionsize(ci_label,
1385 diskPtr->partitionSize);
1386 ci_label->column = column;
1387 raidflush_component_label(raidPtr, column);
1388 }
1389 /* XXXjld what about the spares? */
1390 }
1391
1392 return (retcode);
1393 case RAIDFRAME_SET_AUTOCONFIG:
1394 d = rf_set_autoconfig(raidPtr, *(int *) data);
1395 printf("raid%d: New autoconfig value is: %d\n",
1396 raidPtr->raidid, d);
1397 *(int *) data = d;
1398 return (retcode);
1399
1400 case RAIDFRAME_SET_ROOT:
1401 d = rf_set_rootpartition(raidPtr, *(int *) data);
1402 printf("raid%d: New rootpartition value is: %d\n",
1403 raidPtr->raidid, d);
1404 *(int *) data = d;
1405 return (retcode);
1406
1407 /* initialize all parity */
1408 case RAIDFRAME_REWRITEPARITY:
1409
1410 if (raidPtr->Layout.map->faultsTolerated == 0) {
1411 /* Parity for RAID 0 is trivially correct */
1412 raidPtr->parity_good = RF_RAID_CLEAN;
1413 return(0);
1414 }
1415
1416 if (raidPtr->parity_rewrite_in_progress == 1) {
1417 /* Re-write is already in progress! */
1418 return(EINVAL);
1419 }
1420
1421 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1422 rf_RewriteParityThread,
1423 raidPtr,"raid_parity");
1424 return (retcode);
1425
1426
1427 case RAIDFRAME_ADD_HOT_SPARE:
1428 sparePtr = (RF_SingleComponent_t *) data;
1429 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1430 retcode = rf_add_hot_spare(raidPtr, &component);
1431 return(retcode);
1432
1433 case RAIDFRAME_REMOVE_HOT_SPARE:
1434 return(retcode);
1435
1436 case RAIDFRAME_DELETE_COMPONENT:
1437 componentPtr = (RF_SingleComponent_t *)data;
1438 memcpy( &component, componentPtr,
1439 sizeof(RF_SingleComponent_t));
1440 retcode = rf_delete_component(raidPtr, &component);
1441 return(retcode);
1442
1443 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1444 componentPtr = (RF_SingleComponent_t *)data;
1445 memcpy( &component, componentPtr,
1446 sizeof(RF_SingleComponent_t));
1447 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1448 return(retcode);
1449
1450 case RAIDFRAME_REBUILD_IN_PLACE:
1451
1452 if (raidPtr->Layout.map->faultsTolerated == 0) {
1453 /* Can't do this on a RAID 0!! */
1454 return(EINVAL);
1455 }
1456
1457 if (raidPtr->recon_in_progress == 1) {
1458 /* a reconstruct is already in progress! */
1459 return(EINVAL);
1460 }
1461
1462 componentPtr = (RF_SingleComponent_t *) data;
1463 memcpy( &component, componentPtr,
1464 sizeof(RF_SingleComponent_t));
1465 component.row = 0; /* we don't support any more */
1466 column = component.column;
1467
1468 if ((column < 0) || (column >= raidPtr->numCol)) {
1469 return(EINVAL);
1470 }
1471
1472 rf_lock_mutex2(raidPtr->mutex);
1473 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1474 (raidPtr->numFailures > 0)) {
1475 /* XXX 0 above shouldn't be constant!!! */
1476 /* some component other than this has failed.
1477 Let's not make things worse than they already
1478 are... */
1479 printf("raid%d: Unable to reconstruct to disk at:\n",
1480 raidPtr->raidid);
1481 printf("raid%d: Col: %d Too many failures.\n",
1482 raidPtr->raidid, column);
1483 rf_unlock_mutex2(raidPtr->mutex);
1484 return (EINVAL);
1485 }
1486 if (raidPtr->Disks[column].status ==
1487 rf_ds_reconstructing) {
1488 printf("raid%d: Unable to reconstruct to disk at:\n",
1489 raidPtr->raidid);
1490 printf("raid%d: Col: %d Reconstruction already occurring!\n", raidPtr->raidid, column);
1491
1492 rf_unlock_mutex2(raidPtr->mutex);
1493 return (EINVAL);
1494 }
1495 if (raidPtr->Disks[column].status == rf_ds_spared) {
1496 rf_unlock_mutex2(raidPtr->mutex);
1497 return (EINVAL);
1498 }
1499 rf_unlock_mutex2(raidPtr->mutex);
1500
1501 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1502 if (rrcopy == NULL)
1503 return(ENOMEM);
1504
1505 rrcopy->raidPtr = (void *) raidPtr;
1506 rrcopy->col = column;
1507
1508 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1509 rf_ReconstructInPlaceThread,
1510 rrcopy,"raid_reconip");
1511 return(retcode);
1512
1513 case RAIDFRAME_GET_INFO:
1514 if (!raidPtr->valid)
1515 return (ENODEV);
1516 ucfgp = (RF_DeviceConfig_t **) data;
1517 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1518 (RF_DeviceConfig_t *));
1519 if (d_cfg == NULL)
1520 return (ENOMEM);
1521 d_cfg->rows = 1; /* there is only 1 row now */
1522 d_cfg->cols = raidPtr->numCol;
1523 d_cfg->ndevs = raidPtr->numCol;
1524 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1525 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1526 return (ENOMEM);
1527 }
1528 d_cfg->nspares = raidPtr->numSpare;
1529 if (d_cfg->nspares >= RF_MAX_DISKS) {
1530 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1531 return (ENOMEM);
1532 }
1533 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1534 d = 0;
1535 for (j = 0; j < d_cfg->cols; j++) {
1536 d_cfg->devs[d] = raidPtr->Disks[j];
1537 d++;
1538 }
1539 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1540 d_cfg->spares[i] = raidPtr->Disks[j];
1541 if (d_cfg->spares[i].status == rf_ds_rebuilding_spare) {
1542 /* XXX: raidctl(8) expects to see this as a used spare */
1543 d_cfg->spares[i].status = rf_ds_used_spare;
1544 }
1545 }
1546 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1547 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1548
1549 return (retcode);
1550
1551 case RAIDFRAME_CHECK_PARITY:
1552 *(int *) data = raidPtr->parity_good;
1553 return (0);
1554
1555 case RAIDFRAME_PARITYMAP_STATUS:
1556 if (rf_paritymap_ineligible(raidPtr))
1557 return EINVAL;
1558 rf_paritymap_status(raidPtr->parity_map,
1559 (struct rf_pmstat *)data);
1560 return 0;
1561
1562 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1563 if (rf_paritymap_ineligible(raidPtr))
1564 return EINVAL;
1565 if (raidPtr->parity_map == NULL)
1566 return ENOENT; /* ??? */
1567 if (0 != rf_paritymap_set_params(raidPtr->parity_map,
1568 (struct rf_pmparams *)data, 1))
1569 return EINVAL;
1570 return 0;
1571
1572 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1573 if (rf_paritymap_ineligible(raidPtr))
1574 return EINVAL;
1575 *(int *) data = rf_paritymap_get_disable(raidPtr);
1576 return 0;
1577
1578 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1579 if (rf_paritymap_ineligible(raidPtr))
1580 return EINVAL;
1581 rf_paritymap_set_disable(raidPtr, *(int *)data);
1582 /* XXX should errors be passed up? */
1583 return 0;
1584
1585 case RAIDFRAME_RESET_ACCTOTALS:
1586 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1587 return (0);
1588
1589 case RAIDFRAME_GET_ACCTOTALS:
1590 totals = (RF_AccTotals_t *) data;
1591 *totals = raidPtr->acc_totals;
1592 return (0);
1593
1594 case RAIDFRAME_KEEP_ACCTOTALS:
1595 raidPtr->keep_acc_totals = *(int *)data;
1596 return (0);
1597
1598 case RAIDFRAME_GET_SIZE:
1599 *(int *) data = raidPtr->totalSectors;
1600 return (0);
1601
1602 /* fail a disk & optionally start reconstruction */
1603 case RAIDFRAME_FAIL_DISK:
1604
1605 if (raidPtr->Layout.map->faultsTolerated == 0) {
1606 /* Can't do this on a RAID 0!! */
1607 return(EINVAL);
1608 }
1609
1610 rr = (struct rf_recon_req *) data;
1611 rr->row = 0;
1612 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1613 return (EINVAL);
1614
1615
1616 rf_lock_mutex2(raidPtr->mutex);
1617 if (raidPtr->status == rf_rs_reconstructing) {
1618 /* you can't fail a disk while we're reconstructing! */
1619 /* XXX wrong for RAID6 */
1620 rf_unlock_mutex2(raidPtr->mutex);
1621 return (EINVAL);
1622 }
1623 if ((raidPtr->Disks[rr->col].status ==
1624 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1625 /* some other component has failed. Let's not make
1626 things worse. XXX wrong for RAID6 */
1627 rf_unlock_mutex2(raidPtr->mutex);
1628 return (EINVAL);
1629 }
1630 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1631 /* Can't fail a spared disk! */
1632 rf_unlock_mutex2(raidPtr->mutex);
1633 return (EINVAL);
1634 }
1635 rf_unlock_mutex2(raidPtr->mutex);
1636
1637 /* make a copy of the recon request so that we don't rely on
1638 * the user's buffer */
1639 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1640 if (rrcopy == NULL)
1641 return(ENOMEM);
1642 memcpy(rrcopy, rr, sizeof(*rr));
1643 rrcopy->raidPtr = (void *) raidPtr;
1644
1645 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1646 rf_ReconThread,
1647 rrcopy,"raid_recon");
1648 return (0);
1649
1650 /* invoke a copyback operation after recon on whatever disk
1651 * needs it, if any */
1652 case RAIDFRAME_COPYBACK:
1653
1654 if (raidPtr->Layout.map->faultsTolerated == 0) {
1655 /* This makes no sense on a RAID 0!! */
1656 return(EINVAL);
1657 }
1658
1659 if (raidPtr->copyback_in_progress == 1) {
1660 /* Copyback is already in progress! */
1661 return(EINVAL);
1662 }
1663
1664 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1665 rf_CopybackThread,
1666 raidPtr,"raid_copyback");
1667 return (retcode);
1668
1669 /* return the percentage completion of reconstruction */
1670 case RAIDFRAME_CHECK_RECON_STATUS:
1671 if (raidPtr->Layout.map->faultsTolerated == 0) {
1672 /* This makes no sense on a RAID 0, so tell the
1673 user it's done. */
1674 *(int *) data = 100;
1675 return(0);
1676 }
1677 if (raidPtr->status != rf_rs_reconstructing)
1678 *(int *) data = 100;
1679 else {
1680 if (raidPtr->reconControl->numRUsTotal > 0) {
1681 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1682 } else {
1683 *(int *) data = 0;
1684 }
1685 }
1686 return (0);
1687 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1688 progressInfoPtr = (RF_ProgressInfo_t **) data;
1689 if (raidPtr->status != rf_rs_reconstructing) {
1690 progressInfo.remaining = 0;
1691 progressInfo.completed = 100;
1692 progressInfo.total = 100;
1693 } else {
1694 progressInfo.total =
1695 raidPtr->reconControl->numRUsTotal;
1696 progressInfo.completed =
1697 raidPtr->reconControl->numRUsComplete;
1698 progressInfo.remaining = progressInfo.total -
1699 progressInfo.completed;
1700 }
1701 retcode = copyout(&progressInfo, *progressInfoPtr,
1702 sizeof(RF_ProgressInfo_t));
1703 return (retcode);
1704
1705 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1706 if (raidPtr->Layout.map->faultsTolerated == 0) {
1707 /* This makes no sense on a RAID 0, so tell the
1708 user it's done. */
1709 *(int *) data = 100;
1710 return(0);
1711 }
1712 if (raidPtr->parity_rewrite_in_progress == 1) {
1713 *(int *) data = 100 *
1714 raidPtr->parity_rewrite_stripes_done /
1715 raidPtr->Layout.numStripe;
1716 } else {
1717 *(int *) data = 100;
1718 }
1719 return (0);
1720
1721 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1722 progressInfoPtr = (RF_ProgressInfo_t **) data;
1723 if (raidPtr->parity_rewrite_in_progress == 1) {
1724 progressInfo.total = raidPtr->Layout.numStripe;
1725 progressInfo.completed =
1726 raidPtr->parity_rewrite_stripes_done;
1727 progressInfo.remaining = progressInfo.total -
1728 progressInfo.completed;
1729 } else {
1730 progressInfo.remaining = 0;
1731 progressInfo.completed = 100;
1732 progressInfo.total = 100;
1733 }
1734 retcode = copyout(&progressInfo, *progressInfoPtr,
1735 sizeof(RF_ProgressInfo_t));
1736 return (retcode);
1737
1738 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1739 if (raidPtr->Layout.map->faultsTolerated == 0) {
1740 /* This makes no sense on a RAID 0 */
1741 *(int *) data = 100;
1742 return(0);
1743 }
1744 if (raidPtr->copyback_in_progress == 1) {
1745 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1746 raidPtr->Layout.numStripe;
1747 } else {
1748 *(int *) data = 100;
1749 }
1750 return (0);
1751
1752 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1753 progressInfoPtr = (RF_ProgressInfo_t **) data;
1754 if (raidPtr->copyback_in_progress == 1) {
1755 progressInfo.total = raidPtr->Layout.numStripe;
1756 progressInfo.completed =
1757 raidPtr->copyback_stripes_done;
1758 progressInfo.remaining = progressInfo.total -
1759 progressInfo.completed;
1760 } else {
1761 progressInfo.remaining = 0;
1762 progressInfo.completed = 100;
1763 progressInfo.total = 100;
1764 }
1765 retcode = copyout(&progressInfo, *progressInfoPtr,
1766 sizeof(RF_ProgressInfo_t));
1767 return (retcode);
1768
1769 /* the sparetable daemon calls this to wait for the kernel to
1770 * need a spare table. this ioctl does not return until a
1771 * spare table is needed. XXX -- calling mpsleep here in the
1772 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1773 * -- I should either compute the spare table in the kernel,
1774 * or have a different -- XXX XXX -- interface (a different
1775 * character device) for delivering the table -- XXX */
1776 #if 0
1777 case RAIDFRAME_SPARET_WAIT:
1778 rf_lock_mutex2(rf_sparet_wait_mutex);
1779 while (!rf_sparet_wait_queue)
1780 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1781 waitreq = rf_sparet_wait_queue;
1782 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1783 rf_unlock_mutex2(rf_sparet_wait_mutex);
1784
1785 /* structure assignment */
1786 *((RF_SparetWait_t *) data) = *waitreq;
1787
1788 RF_Free(waitreq, sizeof(*waitreq));
1789 return (0);
1790
1791 /* wakes up a process waiting on SPARET_WAIT and puts an error
1792 * code in it that will cause the dameon to exit */
1793 case RAIDFRAME_ABORT_SPARET_WAIT:
1794 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1795 waitreq->fcol = -1;
1796 rf_lock_mutex2(rf_sparet_wait_mutex);
1797 waitreq->next = rf_sparet_wait_queue;
1798 rf_sparet_wait_queue = waitreq;
1799 rf_broadcast_conf2(rf_sparet_wait_cv);
1800 rf_unlock_mutex2(rf_sparet_wait_mutex);
1801 return (0);
1802
1803 /* used by the spare table daemon to deliver a spare table
1804 * into the kernel */
1805 case RAIDFRAME_SEND_SPARET:
1806
1807 /* install the spare table */
1808 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1809
1810 /* respond to the requestor. the return status of the spare
1811 * table installation is passed in the "fcol" field */
1812 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1813 waitreq->fcol = retcode;
1814 rf_lock_mutex2(rf_sparet_wait_mutex);
1815 waitreq->next = rf_sparet_resp_queue;
1816 rf_sparet_resp_queue = waitreq;
1817 rf_broadcast_cond2(rf_sparet_resp_cv);
1818 rf_unlock_mutex2(rf_sparet_wait_mutex);
1819
1820 return (retcode);
1821 #endif
1822
1823 default:
1824 break; /* fall through to the os-specific code below */
1825
1826 }
1827
1828 if (!raidPtr->valid)
1829 return (EINVAL);
1830
1831 /*
1832 * Add support for "regular" device ioctls here.
1833 */
1834
1835 error = disk_ioctl(&rs->sc_dkdev, dev, cmd, data, flag, l);
1836 if (error != EPASSTHROUGH)
1837 return (error);
1838
1839 switch (cmd) {
1840 case DIOCWDINFO:
1841 case DIOCSDINFO:
1842 #ifdef __HAVE_OLD_DISKLABEL
1843 case ODIOCWDINFO:
1844 case ODIOCSDINFO:
1845 #endif
1846 {
1847 struct disklabel *lp;
1848 #ifdef __HAVE_OLD_DISKLABEL
1849 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1850 memset(&newlabel, 0, sizeof newlabel);
1851 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1852 lp = &newlabel;
1853 } else
1854 #endif
1855 lp = (struct disklabel *)data;
1856
1857 if ((error = raidlock(rs)) != 0)
1858 return (error);
1859
1860 rs->sc_flags |= RAIDF_LABELLING;
1861
1862 error = setdisklabel(rs->sc_dkdev.dk_label,
1863 lp, 0, rs->sc_dkdev.dk_cpulabel);
1864 if (error == 0) {
1865 if (cmd == DIOCWDINFO
1866 #ifdef __HAVE_OLD_DISKLABEL
1867 || cmd == ODIOCWDINFO
1868 #endif
1869 )
1870 error = writedisklabel(RAIDLABELDEV(dev),
1871 raidstrategy, rs->sc_dkdev.dk_label,
1872 rs->sc_dkdev.dk_cpulabel);
1873 }
1874 rs->sc_flags &= ~RAIDF_LABELLING;
1875
1876 raidunlock(rs);
1877
1878 if (error)
1879 return (error);
1880 break;
1881 }
1882
1883 case DIOCWLABEL:
1884 if (*(int *) data != 0)
1885 rs->sc_flags |= RAIDF_WLABEL;
1886 else
1887 rs->sc_flags &= ~RAIDF_WLABEL;
1888 break;
1889
1890 case DIOCGDEFLABEL:
1891 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1892 break;
1893
1894 #ifdef __HAVE_OLD_DISKLABEL
1895 case ODIOCGDEFLABEL:
1896 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1897 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1898 return ENOTTY;
1899 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1900 break;
1901 #endif
1902
1903 case DIOCCACHESYNC:
1904 return rf_sync_component_caches(raidPtr);
1905
1906 case DIOCGSTRATEGY:
1907 {
1908 struct disk_strategy *dks = (void *)data;
1909
1910 s = splbio();
1911 strlcpy(dks->dks_name, bufq_getstrategyname(rs->buf_queue),
1912 sizeof(dks->dks_name));
1913 splx(s);
1914 dks->dks_paramlen = 0;
1915
1916 return 0;
1917 }
1918
1919 case DIOCSSTRATEGY:
1920 {
1921 struct disk_strategy *dks = (void *)data;
1922 struct bufq_state *new;
1923 struct bufq_state *old;
1924
1925 if (dks->dks_param != NULL) {
1926 return EINVAL;
1927 }
1928 dks->dks_name[sizeof(dks->dks_name) - 1] = 0; /* ensure term */
1929 error = bufq_alloc(&new, dks->dks_name,
1930 BUFQ_EXACT|BUFQ_SORT_RAWBLOCK);
1931 if (error) {
1932 return error;
1933 }
1934 s = splbio();
1935 old = rs->buf_queue;
1936 bufq_move(new, old);
1937 rs->buf_queue = new;
1938 splx(s);
1939 bufq_free(old);
1940
1941 return 0;
1942 }
1943
1944 default:
1945 retcode = ENOTTY;
1946 }
1947 return (retcode);
1948
1949 }
1950
1951
1952 /* raidinit -- complete the rest of the initialization for the
1953 RAIDframe device. */
1954
1955
1956 static void
1957 raidinit(struct raid_softc *rs)
1958 {
1959 cfdata_t cf;
1960 int unit;
1961 RF_Raid_t *raidPtr = &rs->sc_r;
1962
1963 unit = raidPtr->raidid;
1964
1965
1966 /* XXX should check return code first... */
1967 rs->sc_flags |= RAIDF_INITED;
1968
1969 /* XXX doesn't check bounds. */
1970 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1971
1972 /* attach the pseudo device */
1973 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1974 cf->cf_name = raid_cd.cd_name;
1975 cf->cf_atname = raid_cd.cd_name;
1976 cf->cf_unit = unit;
1977 cf->cf_fstate = FSTATE_STAR;
1978
1979 rs->sc_dev = config_attach_pseudo(cf);
1980
1981 if (rs->sc_dev == NULL) {
1982 printf("raid%d: config_attach_pseudo failed\n",
1983 raidPtr->raidid);
1984 rs->sc_flags &= ~RAIDF_INITED;
1985 free(cf, M_RAIDFRAME);
1986 return;
1987 }
1988
1989 /* disk_attach actually creates space for the CPU disklabel, among
1990 * other things, so it's critical to call this *BEFORE* we try putzing
1991 * with disklabels. */
1992
1993 disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1994 disk_attach(&rs->sc_dkdev);
1995
1996 /* XXX There may be a weird interaction here between this, and
1997 * protectedSectors, as used in RAIDframe. */
1998
1999 rs->sc_size = raidPtr->totalSectors;
2000
2001 rf_set_geometry(rs, raidPtr);
2002
2003 dkwedge_discover(&rs->sc_dkdev);
2004
2005 }
2006 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
2007 /* wake up the daemon & tell it to get us a spare table
2008 * XXX
2009 * the entries in the queues should be tagged with the raidPtr
2010 * so that in the extremely rare case that two recons happen at once,
2011 * we know for which device were requesting a spare table
2012 * XXX
2013 *
2014 * XXX This code is not currently used. GO
2015 */
2016 int
2017 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
2018 {
2019 int retcode;
2020
2021 rf_lock_mutex2(rf_sparet_wait_mutex);
2022 req->next = rf_sparet_wait_queue;
2023 rf_sparet_wait_queue = req;
2024 rf_broadcast_cond2(rf_sparet_wait_cv);
2025
2026 /* mpsleep unlocks the mutex */
2027 while (!rf_sparet_resp_queue) {
2028 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
2029 }
2030 req = rf_sparet_resp_queue;
2031 rf_sparet_resp_queue = req->next;
2032 rf_unlock_mutex2(rf_sparet_wait_mutex);
2033
2034 retcode = req->fcol;
2035 RF_Free(req, sizeof(*req)); /* this is not the same req as we
2036 * alloc'd */
2037 return (retcode);
2038 }
2039 #endif
2040
2041 /* a wrapper around rf_DoAccess that extracts appropriate info from the
2042 * bp & passes it down.
2043 * any calls originating in the kernel must use non-blocking I/O
2044 * do some extra sanity checking to return "appropriate" error values for
2045 * certain conditions (to make some standard utilities work)
2046 *
2047 * Formerly known as: rf_DoAccessKernel
2048 */
2049 void
2050 raidstart(RF_Raid_t *raidPtr)
2051 {
2052 RF_SectorCount_t num_blocks, pb, sum;
2053 RF_RaidAddr_t raid_addr;
2054 struct partition *pp;
2055 daddr_t blocknum;
2056 struct raid_softc *rs;
2057 int do_async;
2058 struct buf *bp;
2059 int rc;
2060
2061 rs = raidPtr->softc;
2062 /* quick check to see if anything has died recently */
2063 rf_lock_mutex2(raidPtr->mutex);
2064 if (raidPtr->numNewFailures > 0) {
2065 rf_unlock_mutex2(raidPtr->mutex);
2066 rf_update_component_labels(raidPtr,
2067 RF_NORMAL_COMPONENT_UPDATE);
2068 rf_lock_mutex2(raidPtr->mutex);
2069 raidPtr->numNewFailures--;
2070 }
2071
2072 /* Check to see if we're at the limit... */
2073 while (raidPtr->openings > 0) {
2074 rf_unlock_mutex2(raidPtr->mutex);
2075
2076 /* get the next item, if any, from the queue */
2077 if ((bp = bufq_get(rs->buf_queue)) == NULL) {
2078 /* nothing more to do */
2079 return;
2080 }
2081
2082 /* Ok, for the bp we have here, bp->b_blkno is relative to the
2083 * partition.. Need to make it absolute to the underlying
2084 * device.. */
2085
2086 blocknum = bp->b_blkno << DEV_BSHIFT >> raidPtr->logBytesPerSector;
2087 if (DISKPART(bp->b_dev) != RAW_PART) {
2088 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
2089 blocknum += pp->p_offset;
2090 }
2091
2092 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
2093 (int) blocknum));
2094
2095 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
2096 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
2097
2098 /* *THIS* is where we adjust what block we're going to...
2099 * but DO NOT TOUCH bp->b_blkno!!! */
2100 raid_addr = blocknum;
2101
2102 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
2103 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
2104 sum = raid_addr + num_blocks + pb;
2105 if (1 || rf_debugKernelAccess) {
2106 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
2107 (int) raid_addr, (int) sum, (int) num_blocks,
2108 (int) pb, (int) bp->b_resid));
2109 }
2110 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
2111 || (sum < num_blocks) || (sum < pb)) {
2112 bp->b_error = ENOSPC;
2113 bp->b_resid = bp->b_bcount;
2114 biodone(bp);
2115 rf_lock_mutex2(raidPtr->mutex);
2116 continue;
2117 }
2118 /*
2119 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
2120 */
2121
2122 if (bp->b_bcount & raidPtr->sectorMask) {
2123 bp->b_error = EINVAL;
2124 bp->b_resid = bp->b_bcount;
2125 biodone(bp);
2126 rf_lock_mutex2(raidPtr->mutex);
2127 continue;
2128
2129 }
2130 db1_printf(("Calling DoAccess..\n"));
2131
2132
2133 rf_lock_mutex2(raidPtr->mutex);
2134 raidPtr->openings--;
2135 rf_unlock_mutex2(raidPtr->mutex);
2136
2137 /*
2138 * Everything is async.
2139 */
2140 do_async = 1;
2141
2142 disk_busy(&rs->sc_dkdev);
2143
2144 /* XXX we're still at splbio() here... do we *really*
2145 need to be? */
2146
2147 /* don't ever condition on bp->b_flags & B_WRITE.
2148 * always condition on B_READ instead */
2149
2150 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
2151 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
2152 do_async, raid_addr, num_blocks,
2153 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
2154
2155 if (rc) {
2156 bp->b_error = rc;
2157 bp->b_resid = bp->b_bcount;
2158 biodone(bp);
2159 /* continue loop */
2160 }
2161
2162 rf_lock_mutex2(raidPtr->mutex);
2163 }
2164 rf_unlock_mutex2(raidPtr->mutex);
2165 }
2166
2167
2168
2169
2170 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
2171
2172 int
2173 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
2174 {
2175 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2176 struct buf *bp;
2177
2178 req->queue = queue;
2179 bp = req->bp;
2180
2181 switch (req->type) {
2182 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2183 /* XXX need to do something extra here.. */
2184 /* I'm leaving this in, as I've never actually seen it used,
2185 * and I'd like folks to report it... GO */
2186 printf(("WAKEUP CALLED\n"));
2187 queue->numOutstanding++;
2188
2189 bp->b_flags = 0;
2190 bp->b_private = req;
2191
2192 KernelWakeupFunc(bp);
2193 break;
2194
2195 case RF_IO_TYPE_READ:
2196 case RF_IO_TYPE_WRITE:
2197 #if RF_ACC_TRACE > 0
2198 if (req->tracerec) {
2199 RF_ETIMER_START(req->tracerec->timer);
2200 }
2201 #endif
2202 InitBP(bp, queue->rf_cinfo->ci_vp,
2203 op, queue->rf_cinfo->ci_dev,
2204 req->sectorOffset, req->numSector,
2205 req->buf, KernelWakeupFunc, (void *) req,
2206 queue->raidPtr->logBytesPerSector, req->b_proc);
2207
2208 if (rf_debugKernelAccess) {
2209 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2210 (long) bp->b_blkno));
2211 }
2212 queue->numOutstanding++;
2213 queue->last_deq_sector = req->sectorOffset;
2214 /* acc wouldn't have been let in if there were any pending
2215 * reqs at any other priority */
2216 queue->curPriority = req->priority;
2217
2218 db1_printf(("Going for %c to unit %d col %d\n",
2219 req->type, queue->raidPtr->raidid,
2220 queue->col));
2221 db1_printf(("sector %d count %d (%d bytes) %d\n",
2222 (int) req->sectorOffset, (int) req->numSector,
2223 (int) (req->numSector <<
2224 queue->raidPtr->logBytesPerSector),
2225 (int) queue->raidPtr->logBytesPerSector));
2226
2227 /*
2228 * XXX: drop lock here since this can block at
2229 * least with backing SCSI devices. Retake it
2230 * to minimize fuss with calling interfaces.
2231 */
2232
2233 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2234 bdev_strategy(bp);
2235 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2236 break;
2237
2238 default:
2239 panic("bad req->type in rf_DispatchKernelIO");
2240 }
2241 db1_printf(("Exiting from DispatchKernelIO\n"));
2242
2243 return (0);
2244 }
2245 /* this is the callback function associated with a I/O invoked from
2246 kernel code.
2247 */
2248 static void
2249 KernelWakeupFunc(struct buf *bp)
2250 {
2251 RF_DiskQueueData_t *req = NULL;
2252 RF_DiskQueue_t *queue;
2253
2254 db1_printf(("recovering the request queue:\n"));
2255
2256 req = bp->b_private;
2257
2258 queue = (RF_DiskQueue_t *) req->queue;
2259
2260 rf_lock_mutex2(queue->raidPtr->iodone_lock);
2261
2262 #if RF_ACC_TRACE > 0
2263 if (req->tracerec) {
2264 RF_ETIMER_STOP(req->tracerec->timer);
2265 RF_ETIMER_EVAL(req->tracerec->timer);
2266 rf_lock_mutex2(rf_tracing_mutex);
2267 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2268 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2269 req->tracerec->num_phys_ios++;
2270 rf_unlock_mutex2(rf_tracing_mutex);
2271 }
2272 #endif
2273
2274 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2275 * ballistic, and mark the component as hosed... */
2276
2277 if (bp->b_error != 0) {
2278 /* Mark the disk as dead */
2279 /* but only mark it once... */
2280 /* and only if it wouldn't leave this RAID set
2281 completely broken */
2282 if (((queue->raidPtr->Disks[queue->col].status ==
2283 rf_ds_optimal) ||
2284 (queue->raidPtr->Disks[queue->col].status ==
2285 rf_ds_used_spare)) &&
2286 (queue->raidPtr->numFailures <
2287 queue->raidPtr->Layout.map->faultsTolerated)) {
2288 printf("raid%d: IO Error (%d). Marking %s as failed.\n",
2289 queue->raidPtr->raidid,
2290 bp->b_error,
2291 queue->raidPtr->Disks[queue->col].devname);
2292 queue->raidPtr->Disks[queue->col].status =
2293 rf_ds_failed;
2294 queue->raidPtr->status = rf_rs_degraded;
2295 queue->raidPtr->numFailures++;
2296 queue->raidPtr->numNewFailures++;
2297 } else { /* Disk is already dead... */
2298 /* printf("Disk already marked as dead!\n"); */
2299 }
2300
2301 }
2302
2303 /* Fill in the error value */
2304 req->error = bp->b_error;
2305
2306 /* Drop this one on the "finished" queue... */
2307 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2308
2309 /* Let the raidio thread know there is work to be done. */
2310 rf_signal_cond2(queue->raidPtr->iodone_cv);
2311
2312 rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2313 }
2314
2315
2316 /*
2317 * initialize a buf structure for doing an I/O in the kernel.
2318 */
2319 static void
2320 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2321 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2322 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2323 struct proc *b_proc)
2324 {
2325 /* bp->b_flags = B_PHYS | rw_flag; */
2326 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */
2327 bp->b_oflags = 0;
2328 bp->b_cflags = 0;
2329 bp->b_bcount = numSect << logBytesPerSector;
2330 bp->b_bufsize = bp->b_bcount;
2331 bp->b_error = 0;
2332 bp->b_dev = dev;
2333 bp->b_data = bf;
2334 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2335 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2336 if (bp->b_bcount == 0) {
2337 panic("bp->b_bcount is zero in InitBP!!");
2338 }
2339 bp->b_proc = b_proc;
2340 bp->b_iodone = cbFunc;
2341 bp->b_private = cbArg;
2342 }
2343
2344 static void
2345 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2346 struct disklabel *lp)
2347 {
2348 memset(lp, 0, sizeof(*lp));
2349
2350 /* fabricate a label... */
2351 if (raidPtr->totalSectors > UINT32_MAX)
2352 lp->d_secperunit = UINT32_MAX;
2353 else
2354 lp->d_secperunit = raidPtr->totalSectors;
2355 lp->d_secsize = raidPtr->bytesPerSector;
2356 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2357 lp->d_ntracks = 4 * raidPtr->numCol;
2358 lp->d_ncylinders = raidPtr->totalSectors /
2359 (lp->d_nsectors * lp->d_ntracks);
2360 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2361
2362 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2363 lp->d_type = DKTYPE_RAID;
2364 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2365 lp->d_rpm = 3600;
2366 lp->d_interleave = 1;
2367 lp->d_flags = 0;
2368
2369 lp->d_partitions[RAW_PART].p_offset = 0;
2370 lp->d_partitions[RAW_PART].p_size = lp->d_secperunit;
2371 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2372 lp->d_npartitions = RAW_PART + 1;
2373
2374 lp->d_magic = DISKMAGIC;
2375 lp->d_magic2 = DISKMAGIC;
2376 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2377
2378 }
2379 /*
2380 * Read the disklabel from the raid device. If one is not present, fake one
2381 * up.
2382 */
2383 static void
2384 raidgetdisklabel(dev_t dev)
2385 {
2386 int unit = raidunit(dev);
2387 struct raid_softc *rs;
2388 const char *errstring;
2389 struct disklabel *lp;
2390 struct cpu_disklabel *clp;
2391 RF_Raid_t *raidPtr;
2392
2393 if ((rs = raidget(unit)) == NULL)
2394 return;
2395
2396 lp = rs->sc_dkdev.dk_label;
2397 clp = rs->sc_dkdev.dk_cpulabel;
2398
2399 db1_printf(("Getting the disklabel...\n"));
2400
2401 memset(clp, 0, sizeof(*clp));
2402
2403 raidPtr = &rs->sc_r;
2404
2405 raidgetdefaultlabel(raidPtr, rs, lp);
2406
2407 /*
2408 * Call the generic disklabel extraction routine.
2409 */
2410 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2411 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2412 if (errstring)
2413 raidmakedisklabel(rs);
2414 else {
2415 int i;
2416 struct partition *pp;
2417
2418 /*
2419 * Sanity check whether the found disklabel is valid.
2420 *
2421 * This is necessary since total size of the raid device
2422 * may vary when an interleave is changed even though exactly
2423 * same components are used, and old disklabel may used
2424 * if that is found.
2425 */
2426 if (lp->d_secperunit < UINT32_MAX ?
2427 lp->d_secperunit != rs->sc_size :
2428 lp->d_secperunit > rs->sc_size)
2429 printf("raid%d: WARNING: %s: "
2430 "total sector size in disklabel (%ju) != "
2431 "the size of raid (%ju)\n", unit, rs->sc_xname,
2432 (uintmax_t)lp->d_secperunit,
2433 (uintmax_t)rs->sc_size);
2434 for (i = 0; i < lp->d_npartitions; i++) {
2435 pp = &lp->d_partitions[i];
2436 if (pp->p_offset + pp->p_size > rs->sc_size)
2437 printf("raid%d: WARNING: %s: end of partition `%c' "
2438 "exceeds the size of raid (%ju)\n",
2439 unit, rs->sc_xname, 'a' + i,
2440 (uintmax_t)rs->sc_size);
2441 }
2442 }
2443
2444 }
2445 /*
2446 * Take care of things one might want to take care of in the event
2447 * that a disklabel isn't present.
2448 */
2449 static void
2450 raidmakedisklabel(struct raid_softc *rs)
2451 {
2452 struct disklabel *lp = rs->sc_dkdev.dk_label;
2453 db1_printf(("Making a label..\n"));
2454
2455 /*
2456 * For historical reasons, if there's no disklabel present
2457 * the raw partition must be marked FS_BSDFFS.
2458 */
2459
2460 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2461
2462 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2463
2464 lp->d_checksum = dkcksum(lp);
2465 }
2466 /*
2467 * Wait interruptibly for an exclusive lock.
2468 *
2469 * XXX
2470 * Several drivers do this; it should be abstracted and made MP-safe.
2471 * (Hmm... where have we seen this warning before :-> GO )
2472 */
2473 static int
2474 raidlock(struct raid_softc *rs)
2475 {
2476 int error;
2477
2478 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2479 rs->sc_flags |= RAIDF_WANTED;
2480 if ((error =
2481 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2482 return (error);
2483 }
2484 rs->sc_flags |= RAIDF_LOCKED;
2485 return (0);
2486 }
2487 /*
2488 * Unlock and wake up any waiters.
2489 */
2490 static void
2491 raidunlock(struct raid_softc *rs)
2492 {
2493
2494 rs->sc_flags &= ~RAIDF_LOCKED;
2495 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2496 rs->sc_flags &= ~RAIDF_WANTED;
2497 wakeup(rs);
2498 }
2499 }
2500
2501
2502 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2503 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2504 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2505
2506 static daddr_t
2507 rf_component_info_offset(void)
2508 {
2509
2510 return RF_COMPONENT_INFO_OFFSET;
2511 }
2512
2513 static daddr_t
2514 rf_component_info_size(unsigned secsize)
2515 {
2516 daddr_t info_size;
2517
2518 KASSERT(secsize);
2519 if (secsize > RF_COMPONENT_INFO_SIZE)
2520 info_size = secsize;
2521 else
2522 info_size = RF_COMPONENT_INFO_SIZE;
2523
2524 return info_size;
2525 }
2526
2527 static daddr_t
2528 rf_parity_map_offset(RF_Raid_t *raidPtr)
2529 {
2530 daddr_t map_offset;
2531
2532 KASSERT(raidPtr->bytesPerSector);
2533 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2534 map_offset = raidPtr->bytesPerSector;
2535 else
2536 map_offset = RF_COMPONENT_INFO_SIZE;
2537 map_offset += rf_component_info_offset();
2538
2539 return map_offset;
2540 }
2541
2542 static daddr_t
2543 rf_parity_map_size(RF_Raid_t *raidPtr)
2544 {
2545 daddr_t map_size;
2546
2547 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2548 map_size = raidPtr->bytesPerSector;
2549 else
2550 map_size = RF_PARITY_MAP_SIZE;
2551
2552 return map_size;
2553 }
2554
2555 int
2556 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2557 {
2558 RF_ComponentLabel_t *clabel;
2559
2560 clabel = raidget_component_label(raidPtr, col);
2561 clabel->clean = RF_RAID_CLEAN;
2562 raidflush_component_label(raidPtr, col);
2563 return(0);
2564 }
2565
2566
2567 int
2568 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2569 {
2570 RF_ComponentLabel_t *clabel;
2571
2572 clabel = raidget_component_label(raidPtr, col);
2573 clabel->clean = RF_RAID_DIRTY;
2574 raidflush_component_label(raidPtr, col);
2575 return(0);
2576 }
2577
2578 int
2579 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2580 {
2581 KASSERT(raidPtr->bytesPerSector);
2582 return raidread_component_label(raidPtr->bytesPerSector,
2583 raidPtr->Disks[col].dev,
2584 raidPtr->raid_cinfo[col].ci_vp,
2585 &raidPtr->raid_cinfo[col].ci_label);
2586 }
2587
2588 RF_ComponentLabel_t *
2589 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2590 {
2591 return &raidPtr->raid_cinfo[col].ci_label;
2592 }
2593
2594 int
2595 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2596 {
2597 RF_ComponentLabel_t *label;
2598
2599 label = &raidPtr->raid_cinfo[col].ci_label;
2600 label->mod_counter = raidPtr->mod_counter;
2601 #ifndef RF_NO_PARITY_MAP
2602 label->parity_map_modcount = label->mod_counter;
2603 #endif
2604 return raidwrite_component_label(raidPtr->bytesPerSector,
2605 raidPtr->Disks[col].dev,
2606 raidPtr->raid_cinfo[col].ci_vp, label);
2607 }
2608
2609
2610 static int
2611 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2612 RF_ComponentLabel_t *clabel)
2613 {
2614 return raidread_component_area(dev, b_vp, clabel,
2615 sizeof(RF_ComponentLabel_t),
2616 rf_component_info_offset(),
2617 rf_component_info_size(secsize));
2618 }
2619
2620 /* ARGSUSED */
2621 static int
2622 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2623 size_t msize, daddr_t offset, daddr_t dsize)
2624 {
2625 struct buf *bp;
2626 const struct bdevsw *bdev;
2627 int error;
2628
2629 /* XXX should probably ensure that we don't try to do this if
2630 someone has changed rf_protected_sectors. */
2631
2632 if (b_vp == NULL) {
2633 /* For whatever reason, this component is not valid.
2634 Don't try to read a component label from it. */
2635 return(EINVAL);
2636 }
2637
2638 /* get a block of the appropriate size... */
2639 bp = geteblk((int)dsize);
2640 bp->b_dev = dev;
2641
2642 /* get our ducks in a row for the read */
2643 bp->b_blkno = offset / DEV_BSIZE;
2644 bp->b_bcount = dsize;
2645 bp->b_flags |= B_READ;
2646 bp->b_resid = dsize;
2647
2648 bdev = bdevsw_lookup(bp->b_dev);
2649 if (bdev == NULL)
2650 return (ENXIO);
2651 (*bdev->d_strategy)(bp);
2652
2653 error = biowait(bp);
2654
2655 if (!error) {
2656 memcpy(data, bp->b_data, msize);
2657 }
2658
2659 brelse(bp, 0);
2660 return(error);
2661 }
2662
2663
2664 static int
2665 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2666 RF_ComponentLabel_t *clabel)
2667 {
2668 return raidwrite_component_area(dev, b_vp, clabel,
2669 sizeof(RF_ComponentLabel_t),
2670 rf_component_info_offset(),
2671 rf_component_info_size(secsize), 0);
2672 }
2673
2674 /* ARGSUSED */
2675 static int
2676 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2677 size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2678 {
2679 struct buf *bp;
2680 const struct bdevsw *bdev;
2681 int error;
2682
2683 /* get a block of the appropriate size... */
2684 bp = geteblk((int)dsize);
2685 bp->b_dev = dev;
2686
2687 /* get our ducks in a row for the write */
2688 bp->b_blkno = offset / DEV_BSIZE;
2689 bp->b_bcount = dsize;
2690 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2691 bp->b_resid = dsize;
2692
2693 memset(bp->b_data, 0, dsize);
2694 memcpy(bp->b_data, data, msize);
2695
2696 bdev = bdevsw_lookup(bp->b_dev);
2697 if (bdev == NULL)
2698 return (ENXIO);
2699 (*bdev->d_strategy)(bp);
2700 if (asyncp)
2701 return 0;
2702 error = biowait(bp);
2703 brelse(bp, 0);
2704 if (error) {
2705 #if 1
2706 printf("Failed to write RAID component info!\n");
2707 #endif
2708 }
2709
2710 return(error);
2711 }
2712
2713 void
2714 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2715 {
2716 int c;
2717
2718 for (c = 0; c < raidPtr->numCol; c++) {
2719 /* Skip dead disks. */
2720 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2721 continue;
2722 /* XXXjld: what if an error occurs here? */
2723 raidwrite_component_area(raidPtr->Disks[c].dev,
2724 raidPtr->raid_cinfo[c].ci_vp, map,
2725 RF_PARITYMAP_NBYTE,
2726 rf_parity_map_offset(raidPtr),
2727 rf_parity_map_size(raidPtr), 0);
2728 }
2729 }
2730
2731 void
2732 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2733 {
2734 struct rf_paritymap_ondisk tmp;
2735 int c,first;
2736
2737 first=1;
2738 for (c = 0; c < raidPtr->numCol; c++) {
2739 /* Skip dead disks. */
2740 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2741 continue;
2742 raidread_component_area(raidPtr->Disks[c].dev,
2743 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2744 RF_PARITYMAP_NBYTE,
2745 rf_parity_map_offset(raidPtr),
2746 rf_parity_map_size(raidPtr));
2747 if (first) {
2748 memcpy(map, &tmp, sizeof(*map));
2749 first = 0;
2750 } else {
2751 rf_paritymap_merge(map, &tmp);
2752 }
2753 }
2754 }
2755
2756 void
2757 rf_markalldirty(RF_Raid_t *raidPtr)
2758 {
2759 RF_ComponentLabel_t *clabel;
2760 int sparecol;
2761 int c;
2762 int j;
2763 int scol = -1;
2764
2765 raidPtr->mod_counter++;
2766 for (c = 0; c < raidPtr->numCol; c++) {
2767 /* we don't want to touch (at all) a disk that has
2768 failed */
2769 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2770 clabel = raidget_component_label(raidPtr, c);
2771 if (clabel->status == rf_ds_spared) {
2772 /* XXX do something special...
2773 but whatever you do, don't
2774 try to access it!! */
2775 } else {
2776 raidmarkdirty(raidPtr, c);
2777 }
2778 }
2779 }
2780
2781 for( c = 0; c < raidPtr->numSpare ; c++) {
2782 sparecol = raidPtr->numCol + c;
2783 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2784 /*
2785
2786 we claim this disk is "optimal" if it's
2787 rf_ds_used_spare, as that means it should be
2788 directly substitutable for the disk it replaced.
2789 We note that too...
2790
2791 */
2792
2793 for(j=0;j<raidPtr->numCol;j++) {
2794 if (raidPtr->Disks[j].spareCol == sparecol) {
2795 scol = j;
2796 break;
2797 }
2798 }
2799
2800 clabel = raidget_component_label(raidPtr, sparecol);
2801 /* make sure status is noted */
2802
2803 raid_init_component_label(raidPtr, clabel);
2804
2805 clabel->row = 0;
2806 clabel->column = scol;
2807 /* Note: we *don't* change status from rf_ds_used_spare
2808 to rf_ds_optimal */
2809 /* clabel.status = rf_ds_optimal; */
2810
2811 raidmarkdirty(raidPtr, sparecol);
2812 }
2813 }
2814 }
2815
2816
2817 void
2818 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2819 {
2820 RF_ComponentLabel_t *clabel;
2821 int sparecol;
2822 int c;
2823 int j;
2824 int scol;
2825
2826 scol = -1;
2827
2828 /* XXX should do extra checks to make sure things really are clean,
2829 rather than blindly setting the clean bit... */
2830
2831 raidPtr->mod_counter++;
2832
2833 for (c = 0; c < raidPtr->numCol; c++) {
2834 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2835 clabel = raidget_component_label(raidPtr, c);
2836 /* make sure status is noted */
2837 clabel->status = rf_ds_optimal;
2838
2839 /* note what unit we are configured as */
2840 clabel->last_unit = raidPtr->raidid;
2841
2842 raidflush_component_label(raidPtr, c);
2843 if (final == RF_FINAL_COMPONENT_UPDATE) {
2844 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2845 raidmarkclean(raidPtr, c);
2846 }
2847 }
2848 }
2849 /* else we don't touch it.. */
2850 }
2851
2852 for( c = 0; c < raidPtr->numSpare ; c++) {
2853 sparecol = raidPtr->numCol + c;
2854 /* Need to ensure that the reconstruct actually completed! */
2855 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2856 /*
2857
2858 we claim this disk is "optimal" if it's
2859 rf_ds_used_spare, as that means it should be
2860 directly substitutable for the disk it replaced.
2861 We note that too...
2862
2863 */
2864
2865 for(j=0;j<raidPtr->numCol;j++) {
2866 if (raidPtr->Disks[j].spareCol == sparecol) {
2867 scol = j;
2868 break;
2869 }
2870 }
2871
2872 /* XXX shouldn't *really* need this... */
2873 clabel = raidget_component_label(raidPtr, sparecol);
2874 /* make sure status is noted */
2875
2876 raid_init_component_label(raidPtr, clabel);
2877
2878 clabel->column = scol;
2879 clabel->status = rf_ds_optimal;
2880 clabel->last_unit = raidPtr->raidid;
2881
2882 raidflush_component_label(raidPtr, sparecol);
2883 if (final == RF_FINAL_COMPONENT_UPDATE) {
2884 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2885 raidmarkclean(raidPtr, sparecol);
2886 }
2887 }
2888 }
2889 }
2890 }
2891
2892 void
2893 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2894 {
2895
2896 if (vp != NULL) {
2897 if (auto_configured == 1) {
2898 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2899 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2900 vput(vp);
2901
2902 } else {
2903 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2904 }
2905 }
2906 }
2907
2908
2909 void
2910 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2911 {
2912 int r,c;
2913 struct vnode *vp;
2914 int acd;
2915
2916
2917 /* We take this opportunity to close the vnodes like we should.. */
2918
2919 for (c = 0; c < raidPtr->numCol; c++) {
2920 vp = raidPtr->raid_cinfo[c].ci_vp;
2921 acd = raidPtr->Disks[c].auto_configured;
2922 rf_close_component(raidPtr, vp, acd);
2923 raidPtr->raid_cinfo[c].ci_vp = NULL;
2924 raidPtr->Disks[c].auto_configured = 0;
2925 }
2926
2927 for (r = 0; r < raidPtr->numSpare; r++) {
2928 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2929 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2930 rf_close_component(raidPtr, vp, acd);
2931 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2932 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2933 }
2934 }
2935
2936
2937 void
2938 rf_ReconThread(struct rf_recon_req *req)
2939 {
2940 int s;
2941 RF_Raid_t *raidPtr;
2942
2943 s = splbio();
2944 raidPtr = (RF_Raid_t *) req->raidPtr;
2945 raidPtr->recon_in_progress = 1;
2946
2947 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2948 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2949
2950 RF_Free(req, sizeof(*req));
2951
2952 raidPtr->recon_in_progress = 0;
2953 splx(s);
2954
2955 /* That's all... */
2956 kthread_exit(0); /* does not return */
2957 }
2958
2959 void
2960 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2961 {
2962 int retcode;
2963 int s;
2964
2965 raidPtr->parity_rewrite_stripes_done = 0;
2966 raidPtr->parity_rewrite_in_progress = 1;
2967 s = splbio();
2968 retcode = rf_RewriteParity(raidPtr);
2969 splx(s);
2970 if (retcode) {
2971 printf("raid%d: Error re-writing parity (%d)!\n",
2972 raidPtr->raidid, retcode);
2973 } else {
2974 /* set the clean bit! If we shutdown correctly,
2975 the clean bit on each component label will get
2976 set */
2977 raidPtr->parity_good = RF_RAID_CLEAN;
2978 }
2979 raidPtr->parity_rewrite_in_progress = 0;
2980
2981 /* Anyone waiting for us to stop? If so, inform them... */
2982 if (raidPtr->waitShutdown) {
2983 wakeup(&raidPtr->parity_rewrite_in_progress);
2984 }
2985
2986 /* That's all... */
2987 kthread_exit(0); /* does not return */
2988 }
2989
2990
2991 void
2992 rf_CopybackThread(RF_Raid_t *raidPtr)
2993 {
2994 int s;
2995
2996 raidPtr->copyback_in_progress = 1;
2997 s = splbio();
2998 rf_CopybackReconstructedData(raidPtr);
2999 splx(s);
3000 raidPtr->copyback_in_progress = 0;
3001
3002 /* That's all... */
3003 kthread_exit(0); /* does not return */
3004 }
3005
3006
3007 void
3008 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
3009 {
3010 int s;
3011 RF_Raid_t *raidPtr;
3012
3013 s = splbio();
3014 raidPtr = req->raidPtr;
3015 raidPtr->recon_in_progress = 1;
3016 rf_ReconstructInPlace(raidPtr, req->col);
3017 RF_Free(req, sizeof(*req));
3018 raidPtr->recon_in_progress = 0;
3019 splx(s);
3020
3021 /* That's all... */
3022 kthread_exit(0); /* does not return */
3023 }
3024
3025 static RF_AutoConfig_t *
3026 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
3027 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
3028 unsigned secsize)
3029 {
3030 int good_one = 0;
3031 RF_ComponentLabel_t *clabel;
3032 RF_AutoConfig_t *ac;
3033
3034 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
3035 if (clabel == NULL) {
3036 oomem:
3037 while(ac_list) {
3038 ac = ac_list;
3039 if (ac->clabel)
3040 free(ac->clabel, M_RAIDFRAME);
3041 ac_list = ac_list->next;
3042 free(ac, M_RAIDFRAME);
3043 }
3044 printf("RAID auto config: out of memory!\n");
3045 return NULL; /* XXX probably should panic? */
3046 }
3047
3048 if (!raidread_component_label(secsize, dev, vp, clabel)) {
3049 /* Got the label. Does it look reasonable? */
3050 if (rf_reasonable_label(clabel, numsecs) &&
3051 (rf_component_label_partitionsize(clabel) <= size)) {
3052 #ifdef DEBUG
3053 printf("Component on: %s: %llu\n",
3054 cname, (unsigned long long)size);
3055 rf_print_component_label(clabel);
3056 #endif
3057 /* if it's reasonable, add it, else ignore it. */
3058 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
3059 M_NOWAIT);
3060 if (ac == NULL) {
3061 free(clabel, M_RAIDFRAME);
3062 goto oomem;
3063 }
3064 strlcpy(ac->devname, cname, sizeof(ac->devname));
3065 ac->dev = dev;
3066 ac->vp = vp;
3067 ac->clabel = clabel;
3068 ac->next = ac_list;
3069 ac_list = ac;
3070 good_one = 1;
3071 }
3072 }
3073 if (!good_one) {
3074 /* cleanup */
3075 free(clabel, M_RAIDFRAME);
3076 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3077 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3078 vput(vp);
3079 }
3080 return ac_list;
3081 }
3082
3083 RF_AutoConfig_t *
3084 rf_find_raid_components(void)
3085 {
3086 struct vnode *vp;
3087 struct disklabel label;
3088 device_t dv;
3089 deviter_t di;
3090 dev_t dev;
3091 int bmajor, bminor, wedge, rf_part_found;
3092 int error;
3093 int i;
3094 RF_AutoConfig_t *ac_list;
3095 uint64_t numsecs;
3096 unsigned secsize;
3097
3098 /* initialize the AutoConfig list */
3099 ac_list = NULL;
3100
3101 /* we begin by trolling through *all* the devices on the system */
3102
3103 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
3104 dv = deviter_next(&di)) {
3105
3106 /* we are only interested in disks... */
3107 if (device_class(dv) != DV_DISK)
3108 continue;
3109
3110 /* we don't care about floppies... */
3111 if (device_is_a(dv, "fd")) {
3112 continue;
3113 }
3114
3115 /* we don't care about CD's... */
3116 if (device_is_a(dv, "cd")) {
3117 continue;
3118 }
3119
3120 /* we don't care about md's... */
3121 if (device_is_a(dv, "md")) {
3122 continue;
3123 }
3124
3125 /* hdfd is the Atari/Hades floppy driver */
3126 if (device_is_a(dv, "hdfd")) {
3127 continue;
3128 }
3129
3130 /* fdisa is the Atari/Milan floppy driver */
3131 if (device_is_a(dv, "fdisa")) {
3132 continue;
3133 }
3134
3135 /* need to find the device_name_to_block_device_major stuff */
3136 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
3137
3138 rf_part_found = 0; /*No raid partition as yet*/
3139
3140 /* get a vnode for the raw partition of this disk */
3141
3142 wedge = device_is_a(dv, "dk");
3143 bminor = minor(device_unit(dv));
3144 dev = wedge ? makedev(bmajor, bminor) :
3145 MAKEDISKDEV(bmajor, bminor, RAW_PART);
3146 if (bdevvp(dev, &vp))
3147 panic("RAID can't alloc vnode");
3148
3149 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
3150
3151 if (error) {
3152 /* "Who cares." Continue looking
3153 for something that exists*/
3154 vput(vp);
3155 continue;
3156 }
3157
3158 error = getdisksize(vp, &numsecs, &secsize);
3159 if (error) {
3160 vput(vp);
3161 continue;
3162 }
3163 if (wedge) {
3164 struct dkwedge_info dkw;
3165 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
3166 NOCRED);
3167 if (error) {
3168 printf("RAIDframe: can't get wedge info for "
3169 "dev %s (%d)\n", device_xname(dv), error);
3170 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3171 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3172 vput(vp);
3173 continue;
3174 }
3175
3176 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
3177 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3178 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3179 vput(vp);
3180 continue;
3181 }
3182
3183 ac_list = rf_get_component(ac_list, dev, vp,
3184 device_xname(dv), dkw.dkw_size, numsecs, secsize);
3185 rf_part_found = 1; /*There is a raid component on this disk*/
3186 continue;
3187 }
3188
3189 /* Ok, the disk exists. Go get the disklabel. */
3190 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
3191 if (error) {
3192 /*
3193 * XXX can't happen - open() would
3194 * have errored out (or faked up one)
3195 */
3196 if (error != ENOTTY)
3197 printf("RAIDframe: can't get label for dev "
3198 "%s (%d)\n", device_xname(dv), error);
3199 }
3200
3201 /* don't need this any more. We'll allocate it again
3202 a little later if we really do... */
3203 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3204 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3205 vput(vp);
3206
3207 if (error)
3208 continue;
3209
3210 rf_part_found = 0; /*No raid partitions yet*/
3211 for (i = 0; i < label.d_npartitions; i++) {
3212 char cname[sizeof(ac_list->devname)];
3213
3214 /* We only support partitions marked as RAID */
3215 if (label.d_partitions[i].p_fstype != FS_RAID)
3216 continue;
3217
3218 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
3219 if (bdevvp(dev, &vp))
3220 panic("RAID can't alloc vnode");
3221
3222 error = VOP_OPEN(vp, FREAD, NOCRED);
3223 if (error) {
3224 /* Whatever... */
3225 vput(vp);
3226 continue;
3227 }
3228 snprintf(cname, sizeof(cname), "%s%c",
3229 device_xname(dv), 'a' + i);
3230 ac_list = rf_get_component(ac_list, dev, vp, cname,
3231 label.d_partitions[i].p_size, numsecs, secsize);
3232 rf_part_found = 1; /*There is at least one raid partition on this disk*/
3233 }
3234
3235 /*
3236 *If there is no raid component on this disk, either in a
3237 *disklabel or inside a wedge, check the raw partition as well,
3238 *as it is possible to configure raid components on raw disk
3239 *devices.
3240 */
3241
3242 if (!rf_part_found) {
3243 char cname[sizeof(ac_list->devname)];
3244
3245 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
3246 if (bdevvp(dev, &vp))
3247 panic("RAID can't alloc vnode");
3248
3249 error = VOP_OPEN(vp, FREAD, NOCRED);
3250 if (error) {
3251 /* Whatever... */
3252 vput(vp);
3253 continue;
3254 }
3255 snprintf(cname, sizeof(cname), "%s%c",
3256 device_xname(dv), 'a' + RAW_PART);
3257 ac_list = rf_get_component(ac_list, dev, vp, cname,
3258 label.d_partitions[RAW_PART].p_size, numsecs, secsize);
3259 }
3260 }
3261 deviter_release(&di);
3262 return ac_list;
3263 }
3264
3265
3266 int
3267 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3268 {
3269
3270 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
3271 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
3272 ((clabel->clean == RF_RAID_CLEAN) ||
3273 (clabel->clean == RF_RAID_DIRTY)) &&
3274 clabel->row >=0 &&
3275 clabel->column >= 0 &&
3276 clabel->num_rows > 0 &&
3277 clabel->num_columns > 0 &&
3278 clabel->row < clabel->num_rows &&
3279 clabel->column < clabel->num_columns &&
3280 clabel->blockSize > 0 &&
3281 /*
3282 * numBlocksHi may contain garbage, but it is ok since
3283 * the type is unsigned. If it is really garbage,
3284 * rf_fix_old_label_size() will fix it.
3285 */
3286 rf_component_label_numblocks(clabel) > 0) {
3287 /*
3288 * label looks reasonable enough...
3289 * let's make sure it has no old garbage.
3290 */
3291 if (numsecs)
3292 rf_fix_old_label_size(clabel, numsecs);
3293 return(1);
3294 }
3295 return(0);
3296 }
3297
3298
3299 /*
3300 * For reasons yet unknown, some old component labels have garbage in
3301 * the newer numBlocksHi region, and this causes lossage. Since those
3302 * disks will also have numsecs set to less than 32 bits of sectors,
3303 * we can determine when this corruption has occurred, and fix it.
3304 *
3305 * The exact same problem, with the same unknown reason, happens to
3306 * the partitionSizeHi member as well.
3307 */
3308 static void
3309 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3310 {
3311
3312 if (numsecs < ((uint64_t)1 << 32)) {
3313 if (clabel->numBlocksHi) {
3314 printf("WARNING: total sectors < 32 bits, yet "
3315 "numBlocksHi set\n"
3316 "WARNING: resetting numBlocksHi to zero.\n");
3317 clabel->numBlocksHi = 0;
3318 }
3319
3320 if (clabel->partitionSizeHi) {
3321 printf("WARNING: total sectors < 32 bits, yet "
3322 "partitionSizeHi set\n"
3323 "WARNING: resetting partitionSizeHi to zero.\n");
3324 clabel->partitionSizeHi = 0;
3325 }
3326 }
3327 }
3328
3329
3330 #ifdef DEBUG
3331 void
3332 rf_print_component_label(RF_ComponentLabel_t *clabel)
3333 {
3334 uint64_t numBlocks;
3335 static const char *rp[] = {
3336 "No", "Force", "Soft", "*invalid*"
3337 };
3338
3339
3340 numBlocks = rf_component_label_numblocks(clabel);
3341
3342 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3343 clabel->row, clabel->column,
3344 clabel->num_rows, clabel->num_columns);
3345 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3346 clabel->version, clabel->serial_number,
3347 clabel->mod_counter);
3348 printf(" Clean: %s Status: %d\n",
3349 clabel->clean ? "Yes" : "No", clabel->status);
3350 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3351 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3352 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3353 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3354 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3355 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]);
3356 printf(" Last configured as: raid%d\n", clabel->last_unit);
3357 #if 0
3358 printf(" Config order: %d\n", clabel->config_order);
3359 #endif
3360
3361 }
3362 #endif
3363
3364 RF_ConfigSet_t *
3365 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3366 {
3367 RF_AutoConfig_t *ac;
3368 RF_ConfigSet_t *config_sets;
3369 RF_ConfigSet_t *cset;
3370 RF_AutoConfig_t *ac_next;
3371
3372
3373 config_sets = NULL;
3374
3375 /* Go through the AutoConfig list, and figure out which components
3376 belong to what sets. */
3377 ac = ac_list;
3378 while(ac!=NULL) {
3379 /* we're going to putz with ac->next, so save it here
3380 for use at the end of the loop */
3381 ac_next = ac->next;
3382
3383 if (config_sets == NULL) {
3384 /* will need at least this one... */
3385 config_sets = (RF_ConfigSet_t *)
3386 malloc(sizeof(RF_ConfigSet_t),
3387 M_RAIDFRAME, M_NOWAIT);
3388 if (config_sets == NULL) {
3389 panic("rf_create_auto_sets: No memory!");
3390 }
3391 /* this one is easy :) */
3392 config_sets->ac = ac;
3393 config_sets->next = NULL;
3394 config_sets->rootable = 0;
3395 ac->next = NULL;
3396 } else {
3397 /* which set does this component fit into? */
3398 cset = config_sets;
3399 while(cset!=NULL) {
3400 if (rf_does_it_fit(cset, ac)) {
3401 /* looks like it matches... */
3402 ac->next = cset->ac;
3403 cset->ac = ac;
3404 break;
3405 }
3406 cset = cset->next;
3407 }
3408 if (cset==NULL) {
3409 /* didn't find a match above... new set..*/
3410 cset = (RF_ConfigSet_t *)
3411 malloc(sizeof(RF_ConfigSet_t),
3412 M_RAIDFRAME, M_NOWAIT);
3413 if (cset == NULL) {
3414 panic("rf_create_auto_sets: No memory!");
3415 }
3416 cset->ac = ac;
3417 ac->next = NULL;
3418 cset->next = config_sets;
3419 cset->rootable = 0;
3420 config_sets = cset;
3421 }
3422 }
3423 ac = ac_next;
3424 }
3425
3426
3427 return(config_sets);
3428 }
3429
3430 static int
3431 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3432 {
3433 RF_ComponentLabel_t *clabel1, *clabel2;
3434
3435 /* If this one matches the *first* one in the set, that's good
3436 enough, since the other members of the set would have been
3437 through here too... */
3438 /* note that we are not checking partitionSize here..
3439
3440 Note that we are also not checking the mod_counters here.
3441 If everything else matches except the mod_counter, that's
3442 good enough for this test. We will deal with the mod_counters
3443 a little later in the autoconfiguration process.
3444
3445 (clabel1->mod_counter == clabel2->mod_counter) &&
3446
3447 The reason we don't check for this is that failed disks
3448 will have lower modification counts. If those disks are
3449 not added to the set they used to belong to, then they will
3450 form their own set, which may result in 2 different sets,
3451 for example, competing to be configured at raid0, and
3452 perhaps competing to be the root filesystem set. If the
3453 wrong ones get configured, or both attempt to become /,
3454 weird behaviour and or serious lossage will occur. Thus we
3455 need to bring them into the fold here, and kick them out at
3456 a later point.
3457
3458 */
3459
3460 clabel1 = cset->ac->clabel;
3461 clabel2 = ac->clabel;
3462 if ((clabel1->version == clabel2->version) &&
3463 (clabel1->serial_number == clabel2->serial_number) &&
3464 (clabel1->num_rows == clabel2->num_rows) &&
3465 (clabel1->num_columns == clabel2->num_columns) &&
3466 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3467 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3468 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3469 (clabel1->parityConfig == clabel2->parityConfig) &&
3470 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3471 (clabel1->blockSize == clabel2->blockSize) &&
3472 rf_component_label_numblocks(clabel1) ==
3473 rf_component_label_numblocks(clabel2) &&
3474 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3475 (clabel1->root_partition == clabel2->root_partition) &&
3476 (clabel1->last_unit == clabel2->last_unit) &&
3477 (clabel1->config_order == clabel2->config_order)) {
3478 /* if it get's here, it almost *has* to be a match */
3479 } else {
3480 /* it's not consistent with somebody in the set..
3481 punt */
3482 return(0);
3483 }
3484 /* all was fine.. it must fit... */
3485 return(1);
3486 }
3487
3488 int
3489 rf_have_enough_components(RF_ConfigSet_t *cset)
3490 {
3491 RF_AutoConfig_t *ac;
3492 RF_AutoConfig_t *auto_config;
3493 RF_ComponentLabel_t *clabel;
3494 int c;
3495 int num_cols;
3496 int num_missing;
3497 int mod_counter;
3498 int mod_counter_found;
3499 int even_pair_failed;
3500 char parity_type;
3501
3502
3503 /* check to see that we have enough 'live' components
3504 of this set. If so, we can configure it if necessary */
3505
3506 num_cols = cset->ac->clabel->num_columns;
3507 parity_type = cset->ac->clabel->parityConfig;
3508
3509 /* XXX Check for duplicate components!?!?!? */
3510
3511 /* Determine what the mod_counter is supposed to be for this set. */
3512
3513 mod_counter_found = 0;
3514 mod_counter = 0;
3515 ac = cset->ac;
3516 while(ac!=NULL) {
3517 if (mod_counter_found==0) {
3518 mod_counter = ac->clabel->mod_counter;
3519 mod_counter_found = 1;
3520 } else {
3521 if (ac->clabel->mod_counter > mod_counter) {
3522 mod_counter = ac->clabel->mod_counter;
3523 }
3524 }
3525 ac = ac->next;
3526 }
3527
3528 num_missing = 0;
3529 auto_config = cset->ac;
3530
3531 even_pair_failed = 0;
3532 for(c=0; c<num_cols; c++) {
3533 ac = auto_config;
3534 while(ac!=NULL) {
3535 if ((ac->clabel->column == c) &&
3536 (ac->clabel->mod_counter == mod_counter)) {
3537 /* it's this one... */
3538 #ifdef DEBUG
3539 printf("Found: %s at %d\n",
3540 ac->devname,c);
3541 #endif
3542 break;
3543 }
3544 ac=ac->next;
3545 }
3546 if (ac==NULL) {
3547 /* Didn't find one here! */
3548 /* special case for RAID 1, especially
3549 where there are more than 2
3550 components (where RAIDframe treats
3551 things a little differently :( ) */
3552 if (parity_type == '1') {
3553 if (c%2 == 0) { /* even component */
3554 even_pair_failed = 1;
3555 } else { /* odd component. If
3556 we're failed, and
3557 so is the even
3558 component, it's
3559 "Good Night, Charlie" */
3560 if (even_pair_failed == 1) {
3561 return(0);
3562 }
3563 }
3564 } else {
3565 /* normal accounting */
3566 num_missing++;
3567 }
3568 }
3569 if ((parity_type == '1') && (c%2 == 1)) {
3570 /* Just did an even component, and we didn't
3571 bail.. reset the even_pair_failed flag,
3572 and go on to the next component.... */
3573 even_pair_failed = 0;
3574 }
3575 }
3576
3577 clabel = cset->ac->clabel;
3578
3579 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3580 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3581 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3582 /* XXX this needs to be made *much* more general */
3583 /* Too many failures */
3584 return(0);
3585 }
3586 /* otherwise, all is well, and we've got enough to take a kick
3587 at autoconfiguring this set */
3588 return(1);
3589 }
3590
3591 void
3592 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3593 RF_Raid_t *raidPtr)
3594 {
3595 RF_ComponentLabel_t *clabel;
3596 int i;
3597
3598 clabel = ac->clabel;
3599
3600 /* 1. Fill in the common stuff */
3601 config->numRow = clabel->num_rows = 1;
3602 config->numCol = clabel->num_columns;
3603 config->numSpare = 0; /* XXX should this be set here? */
3604 config->sectPerSU = clabel->sectPerSU;
3605 config->SUsPerPU = clabel->SUsPerPU;
3606 config->SUsPerRU = clabel->SUsPerRU;
3607 config->parityConfig = clabel->parityConfig;
3608 /* XXX... */
3609 strcpy(config->diskQueueType,"fifo");
3610 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3611 config->layoutSpecificSize = 0; /* XXX ?? */
3612
3613 while(ac!=NULL) {
3614 /* row/col values will be in range due to the checks
3615 in reasonable_label() */
3616 strcpy(config->devnames[0][ac->clabel->column],
3617 ac->devname);
3618 ac = ac->next;
3619 }
3620
3621 for(i=0;i<RF_MAXDBGV;i++) {
3622 config->debugVars[i][0] = 0;
3623 }
3624 }
3625
3626 int
3627 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3628 {
3629 RF_ComponentLabel_t *clabel;
3630 int column;
3631 int sparecol;
3632
3633 raidPtr->autoconfigure = new_value;
3634
3635 for(column=0; column<raidPtr->numCol; column++) {
3636 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3637 clabel = raidget_component_label(raidPtr, column);
3638 clabel->autoconfigure = new_value;
3639 raidflush_component_label(raidPtr, column);
3640 }
3641 }
3642 for(column = 0; column < raidPtr->numSpare ; column++) {
3643 sparecol = raidPtr->numCol + column;
3644 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3645 clabel = raidget_component_label(raidPtr, sparecol);
3646 clabel->autoconfigure = new_value;
3647 raidflush_component_label(raidPtr, sparecol);
3648 }
3649 }
3650 return(new_value);
3651 }
3652
3653 int
3654 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3655 {
3656 RF_ComponentLabel_t *clabel;
3657 int column;
3658 int sparecol;
3659
3660 raidPtr->root_partition = new_value;
3661 for(column=0; column<raidPtr->numCol; column++) {
3662 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3663 clabel = raidget_component_label(raidPtr, column);
3664 clabel->root_partition = new_value;
3665 raidflush_component_label(raidPtr, column);
3666 }
3667 }
3668 for(column = 0; column < raidPtr->numSpare ; column++) {
3669 sparecol = raidPtr->numCol + column;
3670 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3671 clabel = raidget_component_label(raidPtr, sparecol);
3672 clabel->root_partition = new_value;
3673 raidflush_component_label(raidPtr, sparecol);
3674 }
3675 }
3676 return(new_value);
3677 }
3678
3679 void
3680 rf_release_all_vps(RF_ConfigSet_t *cset)
3681 {
3682 RF_AutoConfig_t *ac;
3683
3684 ac = cset->ac;
3685 while(ac!=NULL) {
3686 /* Close the vp, and give it back */
3687 if (ac->vp) {
3688 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3689 VOP_CLOSE(ac->vp, FREAD, NOCRED);
3690 vput(ac->vp);
3691 ac->vp = NULL;
3692 }
3693 ac = ac->next;
3694 }
3695 }
3696
3697
3698 void
3699 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3700 {
3701 RF_AutoConfig_t *ac;
3702 RF_AutoConfig_t *next_ac;
3703
3704 ac = cset->ac;
3705 while(ac!=NULL) {
3706 next_ac = ac->next;
3707 /* nuke the label */
3708 free(ac->clabel, M_RAIDFRAME);
3709 /* cleanup the config structure */
3710 free(ac, M_RAIDFRAME);
3711 /* "next.." */
3712 ac = next_ac;
3713 }
3714 /* and, finally, nuke the config set */
3715 free(cset, M_RAIDFRAME);
3716 }
3717
3718
3719 void
3720 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3721 {
3722 /* current version number */
3723 clabel->version = RF_COMPONENT_LABEL_VERSION;
3724 clabel->serial_number = raidPtr->serial_number;
3725 clabel->mod_counter = raidPtr->mod_counter;
3726
3727 clabel->num_rows = 1;
3728 clabel->num_columns = raidPtr->numCol;
3729 clabel->clean = RF_RAID_DIRTY; /* not clean */
3730 clabel->status = rf_ds_optimal; /* "It's good!" */
3731
3732 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3733 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3734 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3735
3736 clabel->blockSize = raidPtr->bytesPerSector;
3737 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3738
3739 /* XXX not portable */
3740 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3741 clabel->maxOutstanding = raidPtr->maxOutstanding;
3742 clabel->autoconfigure = raidPtr->autoconfigure;
3743 clabel->root_partition = raidPtr->root_partition;
3744 clabel->last_unit = raidPtr->raidid;
3745 clabel->config_order = raidPtr->config_order;
3746
3747 #ifndef RF_NO_PARITY_MAP
3748 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3749 #endif
3750 }
3751
3752 struct raid_softc *
3753 rf_auto_config_set(RF_ConfigSet_t *cset)
3754 {
3755 RF_Raid_t *raidPtr;
3756 RF_Config_t *config;
3757 int raidID;
3758 struct raid_softc *sc;
3759
3760 #ifdef DEBUG
3761 printf("RAID autoconfigure\n");
3762 #endif
3763
3764 /* 1. Create a config structure */
3765 config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO);
3766 if (config == NULL) {
3767 printf("Out of mem!?!?\n");
3768 /* XXX do something more intelligent here. */
3769 return NULL;
3770 }
3771
3772 /*
3773 2. Figure out what RAID ID this one is supposed to live at
3774 See if we can get the same RAID dev that it was configured
3775 on last time..
3776 */
3777
3778 raidID = cset->ac->clabel->last_unit;
3779 for (sc = raidget(raidID); sc->sc_r.valid != 0; sc = raidget(++raidID))
3780 continue;
3781 #ifdef DEBUG
3782 printf("Configuring raid%d:\n",raidID);
3783 #endif
3784
3785 raidPtr = &sc->sc_r;
3786
3787 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3788 raidPtr->softc = sc;
3789 raidPtr->raidid = raidID;
3790 raidPtr->openings = RAIDOUTSTANDING;
3791
3792 /* 3. Build the configuration structure */
3793 rf_create_configuration(cset->ac, config, raidPtr);
3794
3795 /* 4. Do the configuration */
3796 if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3797 raidinit(sc);
3798
3799 rf_markalldirty(raidPtr);
3800 raidPtr->autoconfigure = 1; /* XXX do this here? */
3801 switch (cset->ac->clabel->root_partition) {
3802 case 1: /* Force Root */
3803 case 2: /* Soft Root: root when boot partition part of raid */
3804 /*
3805 * everything configured just fine. Make a note
3806 * that this set is eligible to be root,
3807 * or forced to be root
3808 */
3809 cset->rootable = cset->ac->clabel->root_partition;
3810 /* XXX do this here? */
3811 raidPtr->root_partition = cset->rootable;
3812 break;
3813 default:
3814 break;
3815 }
3816 } else {
3817 raidput(sc);
3818 sc = NULL;
3819 }
3820
3821 /* 5. Cleanup */
3822 free(config, M_RAIDFRAME);
3823 return sc;
3824 }
3825
3826 void
3827 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3828 {
3829 struct buf *bp;
3830 struct raid_softc *rs;
3831
3832 bp = (struct buf *)desc->bp;
3833 rs = desc->raidPtr->softc;
3834 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid),
3835 (bp->b_flags & B_READ));
3836 }
3837
3838 void
3839 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3840 size_t xmin, size_t xmax)
3841 {
3842 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3843 pool_sethiwat(p, xmax);
3844 pool_prime(p, xmin);
3845 pool_setlowat(p, xmin);
3846 }
3847
3848 /*
3849 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buf_queue to see
3850 * if there is IO pending and if that IO could possibly be done for a
3851 * given RAID set. Returns 0 if IO is waiting and can be done, 1
3852 * otherwise.
3853 *
3854 */
3855
3856 int
3857 rf_buf_queue_check(RF_Raid_t *raidPtr)
3858 {
3859 struct raid_softc *rs = raidPtr->softc;
3860 if ((bufq_peek(rs->buf_queue) != NULL) && raidPtr->openings > 0) {
3861 /* there is work to do */
3862 return 0;
3863 }
3864 /* default is nothing to do */
3865 return 1;
3866 }
3867
3868 int
3869 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3870 {
3871 uint64_t numsecs;
3872 unsigned secsize;
3873 int error;
3874
3875 error = getdisksize(vp, &numsecs, &secsize);
3876 if (error == 0) {
3877 diskPtr->blockSize = secsize;
3878 diskPtr->numBlocks = numsecs - rf_protectedSectors;
3879 diskPtr->partitionSize = numsecs;
3880 return 0;
3881 }
3882 return error;
3883 }
3884
3885 static int
3886 raid_match(device_t self, cfdata_t cfdata, void *aux)
3887 {
3888 return 1;
3889 }
3890
3891 static void
3892 raid_attach(device_t parent, device_t self, void *aux)
3893 {
3894
3895 }
3896
3897
3898 static int
3899 raid_detach(device_t self, int flags)
3900 {
3901 int error;
3902 struct raid_softc *rs = raidget(device_unit(self));
3903
3904 if (rs == NULL)
3905 return ENXIO;
3906
3907 if ((error = raidlock(rs)) != 0)
3908 return (error);
3909
3910 error = raid_detach_unlocked(rs);
3911
3912 raidunlock(rs);
3913
3914 /* XXXkd: raidput(rs) ??? */
3915
3916 return error;
3917 }
3918
3919 static void
3920 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
3921 {
3922 struct disk_geom *dg = &rs->sc_dkdev.dk_geom;
3923
3924 memset(dg, 0, sizeof(*dg));
3925
3926 dg->dg_secperunit = raidPtr->totalSectors;
3927 dg->dg_secsize = raidPtr->bytesPerSector;
3928 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
3929 dg->dg_ntracks = 4 * raidPtr->numCol;
3930
3931 disk_set_info(rs->sc_dev, &rs->sc_dkdev, NULL);
3932 }
3933
3934 /*
3935 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3936 * We end up returning whatever error was returned by the first cache flush
3937 * that fails.
3938 */
3939
3940 int
3941 rf_sync_component_caches(RF_Raid_t *raidPtr)
3942 {
3943 int c, sparecol;
3944 int e,error;
3945 int force = 1;
3946
3947 error = 0;
3948 for (c = 0; c < raidPtr->numCol; c++) {
3949 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3950 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3951 &force, FWRITE, NOCRED);
3952 if (e) {
3953 if (e != ENODEV)
3954 printf("raid%d: cache flush to component %s failed.\n",
3955 raidPtr->raidid, raidPtr->Disks[c].devname);
3956 if (error == 0) {
3957 error = e;
3958 }
3959 }
3960 }
3961 }
3962
3963 for( c = 0; c < raidPtr->numSpare ; c++) {
3964 sparecol = raidPtr->numCol + c;
3965 /* Need to ensure that the reconstruct actually completed! */
3966 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3967 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3968 DIOCCACHESYNC, &force, FWRITE, NOCRED);
3969 if (e) {
3970 if (e != ENODEV)
3971 printf("raid%d: cache flush to component %s failed.\n",
3972 raidPtr->raidid, raidPtr->Disks[sparecol].devname);
3973 if (error == 0) {
3974 error = e;
3975 }
3976 }
3977 }
3978 }
3979 return error;
3980 }
3981