rf_netbsdkintf.c revision 1.345 1 /* $NetBSD: rf_netbsdkintf.c,v 1.345 2016/04/27 02:47:39 christos Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: cd.c 1.6 90/11/28$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
89 * School of Computer Science
90 * Carnegie Mellon University
91 * Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97 /***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.345 2016/04/27 02:47:39 christos Exp $");
105
106 #ifdef _KERNEL_OPT
107 #include "opt_compat_netbsd.h"
108 #include "opt_raid_autoconfig.h"
109 #endif
110
111 #include <sys/param.h>
112 #include <sys/errno.h>
113 #include <sys/pool.h>
114 #include <sys/proc.h>
115 #include <sys/queue.h>
116 #include <sys/disk.h>
117 #include <sys/device.h>
118 #include <sys/stat.h>
119 #include <sys/ioctl.h>
120 #include <sys/fcntl.h>
121 #include <sys/systm.h>
122 #include <sys/vnode.h>
123 #include <sys/disklabel.h>
124 #include <sys/conf.h>
125 #include <sys/buf.h>
126 #include <sys/bufq.h>
127 #include <sys/reboot.h>
128 #include <sys/kauth.h>
129 #include <sys/module.h>
130
131 #include <prop/proplib.h>
132
133 #include <dev/raidframe/raidframevar.h>
134 #include <dev/raidframe/raidframeio.h>
135 #include <dev/raidframe/rf_paritymap.h>
136
137 #include "rf_raid.h"
138 #include "rf_copyback.h"
139 #include "rf_dag.h"
140 #include "rf_dagflags.h"
141 #include "rf_desc.h"
142 #include "rf_diskqueue.h"
143 #include "rf_etimer.h"
144 #include "rf_general.h"
145 #include "rf_kintf.h"
146 #include "rf_options.h"
147 #include "rf_driver.h"
148 #include "rf_parityscan.h"
149 #include "rf_threadstuff.h"
150
151 #ifdef COMPAT_50
152 #include "rf_compat50.h"
153 #endif
154
155 #include "ioconf.h"
156
157 #ifdef DEBUG
158 int rf_kdebug_level = 0;
159 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
160 #else /* DEBUG */
161 #define db1_printf(a) { }
162 #endif /* DEBUG */
163
164 #ifdef DEBUG_ROOT
165 #define DPRINTF(a, ...) printf(a, __VA_ARGS__)
166 #else
167 #define DPRINTF(a, ...)
168 #endif
169
170 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
171 static rf_declare_mutex2(rf_sparet_wait_mutex);
172 static rf_declare_cond2(rf_sparet_wait_cv);
173 static rf_declare_cond2(rf_sparet_resp_cv);
174
175 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
176 * spare table */
177 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
178 * installation process */
179 #endif
180
181 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
182
183 /* prototypes */
184 static void KernelWakeupFunc(struct buf *);
185 static void InitBP(struct buf *, struct vnode *, unsigned,
186 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
187 void *, int, struct proc *);
188 struct raid_softc;
189 static void raidinit(struct raid_softc *);
190 static int raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp);
191
192 static int raid_match(device_t, cfdata_t, void *);
193 static void raid_attach(device_t, device_t, void *);
194 static int raid_detach(device_t, int);
195
196 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
197 daddr_t, daddr_t);
198 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
199 daddr_t, daddr_t, int);
200
201 static int raidwrite_component_label(unsigned,
202 dev_t, struct vnode *, RF_ComponentLabel_t *);
203 static int raidread_component_label(unsigned,
204 dev_t, struct vnode *, RF_ComponentLabel_t *);
205
206 static int raid_diskstart(device_t, struct buf *bp);
207 static int raid_dumpblocks(device_t, void *, daddr_t, int);
208 static int raid_lastclose(device_t);
209
210 static dev_type_open(raidopen);
211 static dev_type_close(raidclose);
212 static dev_type_read(raidread);
213 static dev_type_write(raidwrite);
214 static dev_type_ioctl(raidioctl);
215 static dev_type_strategy(raidstrategy);
216 static dev_type_dump(raiddump);
217 static dev_type_size(raidsize);
218
219 const struct bdevsw raid_bdevsw = {
220 .d_open = raidopen,
221 .d_close = raidclose,
222 .d_strategy = raidstrategy,
223 .d_ioctl = raidioctl,
224 .d_dump = raiddump,
225 .d_psize = raidsize,
226 .d_discard = nodiscard,
227 .d_flag = D_DISK
228 };
229
230 const struct cdevsw raid_cdevsw = {
231 .d_open = raidopen,
232 .d_close = raidclose,
233 .d_read = raidread,
234 .d_write = raidwrite,
235 .d_ioctl = raidioctl,
236 .d_stop = nostop,
237 .d_tty = notty,
238 .d_poll = nopoll,
239 .d_mmap = nommap,
240 .d_kqfilter = nokqfilter,
241 .d_discard = nodiscard,
242 .d_flag = D_DISK
243 };
244
245 static struct dkdriver rf_dkdriver = {
246 .d_open = raidopen,
247 .d_close = raidclose,
248 .d_strategy = raidstrategy,
249 .d_diskstart = raid_diskstart,
250 .d_dumpblocks = raid_dumpblocks,
251 .d_lastclose = raid_lastclose,
252 .d_minphys = minphys
253 };
254
255 struct raid_softc {
256 struct dk_softc sc_dksc;
257 int sc_unit;
258 int sc_flags; /* flags */
259 int sc_cflags; /* configuration flags */
260 kmutex_t sc_mutex; /* interlock mutex */
261 kcondvar_t sc_cv; /* and the condvar */
262 uint64_t sc_size; /* size of the raid device */
263 char sc_xname[20]; /* XXX external name */
264 RF_Raid_t sc_r;
265 LIST_ENTRY(raid_softc) sc_link;
266 };
267 /* sc_flags */
268 #define RAIDF_INITED 0x01 /* unit has been initialized */
269 #define RAIDF_SHUTDOWN 0x02 /* unit is being shutdown */
270 #define RAIDF_DETACH 0x04 /* detach after final close */
271 #define RAIDF_WANTED 0x08 /* someone waiting to obtain a lock */
272 #define RAIDF_LOCKED 0x10 /* unit is locked */
273 #define RAIDF_UNIT_CHANGED 0x20 /* unit is being changed */
274
275 #define raidunit(x) DISKUNIT(x)
276 #define raidsoftc(dev) (((struct raid_softc *)device_private(dev))->sc_r.softc)
277
278 extern struct cfdriver raid_cd;
279 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
280 raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
281 DVF_DETACH_SHUTDOWN);
282
283 /*
284 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
285 * Be aware that large numbers can allow the driver to consume a lot of
286 * kernel memory, especially on writes, and in degraded mode reads.
287 *
288 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
289 * a single 64K write will typically require 64K for the old data,
290 * 64K for the old parity, and 64K for the new parity, for a total
291 * of 192K (if the parity buffer is not re-used immediately).
292 * Even it if is used immediately, that's still 128K, which when multiplied
293 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
294 *
295 * Now in degraded mode, for example, a 64K read on the above setup may
296 * require data reconstruction, which will require *all* of the 4 remaining
297 * disks to participate -- 4 * 32K/disk == 128K again.
298 */
299
300 #ifndef RAIDOUTSTANDING
301 #define RAIDOUTSTANDING 6
302 #endif
303
304 #define RAIDLABELDEV(dev) \
305 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
306
307 /* declared here, and made public, for the benefit of KVM stuff.. */
308
309 static int raidlock(struct raid_softc *);
310 static void raidunlock(struct raid_softc *);
311
312 static int raid_detach_unlocked(struct raid_softc *);
313
314 static void rf_markalldirty(RF_Raid_t *);
315 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
316
317 void rf_ReconThread(struct rf_recon_req *);
318 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
319 void rf_CopybackThread(RF_Raid_t *raidPtr);
320 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
321 int rf_autoconfig(device_t);
322 void rf_buildroothack(RF_ConfigSet_t *);
323
324 RF_AutoConfig_t *rf_find_raid_components(void);
325 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
326 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
327 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
328 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
329 int rf_set_autoconfig(RF_Raid_t *, int);
330 int rf_set_rootpartition(RF_Raid_t *, int);
331 void rf_release_all_vps(RF_ConfigSet_t *);
332 void rf_cleanup_config_set(RF_ConfigSet_t *);
333 int rf_have_enough_components(RF_ConfigSet_t *);
334 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
335 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
336
337 /*
338 * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
339 * Note that this is overridden by having RAID_AUTOCONFIG as an option
340 * in the kernel config file.
341 */
342 #ifdef RAID_AUTOCONFIG
343 int raidautoconfig = 1;
344 #else
345 int raidautoconfig = 0;
346 #endif
347 static bool raidautoconfigdone = false;
348
349 struct RF_Pools_s rf_pools;
350
351 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
352 static kmutex_t raid_lock;
353
354 static struct raid_softc *
355 raidcreate(int unit) {
356 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
357 if (sc == NULL) {
358 #ifdef DIAGNOSTIC
359 printf("%s: out of memory\n", __func__);
360 #endif
361 return NULL;
362 }
363 sc->sc_unit = unit;
364 cv_init(&sc->sc_cv, "raidunit");
365 mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_NONE);
366 return sc;
367 }
368
369 static void
370 raiddestroy(struct raid_softc *sc) {
371 cv_destroy(&sc->sc_cv);
372 mutex_destroy(&sc->sc_mutex);
373 kmem_free(sc, sizeof(*sc));
374 }
375
376 static struct raid_softc *
377 raidget(int unit, bool create) {
378 struct raid_softc *sc;
379 if (unit < 0) {
380 #ifdef DIAGNOSTIC
381 panic("%s: unit %d!", __func__, unit);
382 #endif
383 return NULL;
384 }
385 mutex_enter(&raid_lock);
386 LIST_FOREACH(sc, &raids, sc_link) {
387 if (sc->sc_unit == unit) {
388 mutex_exit(&raid_lock);
389 return sc;
390 }
391 }
392 mutex_exit(&raid_lock);
393 if (!create)
394 return NULL;
395 if ((sc = raidcreate(unit)) == NULL)
396 return NULL;
397 mutex_enter(&raid_lock);
398 LIST_INSERT_HEAD(&raids, sc, sc_link);
399 mutex_exit(&raid_lock);
400 return sc;
401 }
402
403 static void
404 raidput(struct raid_softc *sc) {
405 mutex_enter(&raid_lock);
406 LIST_REMOVE(sc, sc_link);
407 mutex_exit(&raid_lock);
408 raiddestroy(sc);
409 }
410
411 void
412 raidattach(int num)
413 {
414
415 /*
416 * Device attachment and associated initialization now occurs
417 * as part of the module initialization.
418 */
419 }
420
421 int
422 rf_autoconfig(device_t self)
423 {
424 RF_AutoConfig_t *ac_list;
425 RF_ConfigSet_t *config_sets;
426
427 if (!raidautoconfig || raidautoconfigdone == true)
428 return (0);
429
430 /* XXX This code can only be run once. */
431 raidautoconfigdone = true;
432
433 #ifdef __HAVE_CPU_BOOTCONF
434 /*
435 * 0. find the boot device if needed first so we can use it later
436 * this needs to be done before we autoconfigure any raid sets,
437 * because if we use wedges we are not going to be able to open
438 * the boot device later
439 */
440 if (booted_device == NULL)
441 cpu_bootconf();
442 #endif
443 /* 1. locate all RAID components on the system */
444 aprint_debug("Searching for RAID components...\n");
445 ac_list = rf_find_raid_components();
446
447 /* 2. Sort them into their respective sets. */
448 config_sets = rf_create_auto_sets(ac_list);
449
450 /*
451 * 3. Evaluate each set and configure the valid ones.
452 * This gets done in rf_buildroothack().
453 */
454 rf_buildroothack(config_sets);
455
456 return 1;
457 }
458
459 static int
460 rf_containsboot(RF_Raid_t *r, device_t bdv) {
461 const char *bootname = device_xname(bdv);
462 size_t len = strlen(bootname);
463
464 for (int col = 0; col < r->numCol; col++) {
465 const char *devname = r->Disks[col].devname;
466 devname += sizeof("/dev/") - 1;
467 if (strncmp(devname, "dk", 2) == 0) {
468 const char *parent =
469 dkwedge_get_parent_name(r->Disks[col].dev);
470 if (parent != NULL)
471 devname = parent;
472 }
473 if (strncmp(devname, bootname, len) == 0) {
474 struct raid_softc *sc = r->softc;
475 aprint_debug("raid%d includes boot device %s\n",
476 sc->sc_unit, devname);
477 return 1;
478 }
479 }
480 return 0;
481 }
482
483 void
484 rf_buildroothack(RF_ConfigSet_t *config_sets)
485 {
486 RF_ConfigSet_t *cset;
487 RF_ConfigSet_t *next_cset;
488 int num_root;
489 struct raid_softc *sc, *rsc;
490 struct dk_softc *dksc;
491
492 sc = rsc = NULL;
493 num_root = 0;
494 cset = config_sets;
495 while (cset != NULL) {
496 next_cset = cset->next;
497 if (rf_have_enough_components(cset) &&
498 cset->ac->clabel->autoconfigure == 1) {
499 sc = rf_auto_config_set(cset);
500 if (sc != NULL) {
501 aprint_debug("raid%d: configured ok\n",
502 sc->sc_unit);
503 if (cset->rootable) {
504 rsc = sc;
505 num_root++;
506 }
507 } else {
508 /* The autoconfig didn't work :( */
509 aprint_debug("Autoconfig failed\n");
510 rf_release_all_vps(cset);
511 }
512 } else {
513 /* we're not autoconfiguring this set...
514 release the associated resources */
515 rf_release_all_vps(cset);
516 }
517 /* cleanup */
518 rf_cleanup_config_set(cset);
519 cset = next_cset;
520 }
521 dksc = &rsc->sc_dksc;
522
523 /* if the user has specified what the root device should be
524 then we don't touch booted_device or boothowto... */
525
526 if (rootspec != NULL)
527 return;
528
529 /* we found something bootable... */
530
531 /*
532 * XXX: The following code assumes that the root raid
533 * is the first ('a') partition. This is about the best
534 * we can do with a BSD disklabel, but we might be able
535 * to do better with a GPT label, by setting a specified
536 * attribute to indicate the root partition. We can then
537 * stash the partition number in the r->root_partition
538 * high bits (the bottom 2 bits are already used). For
539 * now we just set booted_partition to 0 when we override
540 * root.
541 */
542 if (num_root == 1) {
543 device_t candidate_root;
544 if (dksc->sc_dkdev.dk_nwedges != 0) {
545 char cname[sizeof(cset->ac->devname)];
546 /* XXX: assume partition 'a' first */
547 snprintf(cname, sizeof(cname), "%s%c",
548 device_xname(dksc->sc_dev), 'a');
549 candidate_root = dkwedge_find_by_wname(cname);
550 DPRINTF("%s: candidate wedge root=%s\n", __func__,
551 cname);
552 if (candidate_root == NULL) {
553 /*
554 * If that is not found, because we don't use
555 * disklabel, return the first dk child
556 * XXX: we can skip the 'a' check above
557 * and always do this...
558 */
559 size_t i = 0;
560 candidate_root = dkwedge_find_by_parent(
561 device_xname(dksc->sc_dev), &i);
562 }
563 DPRINTF("%s: candidate wedge root=%p\n", __func__,
564 candidate_root);
565 } else
566 candidate_root = dksc->sc_dev;
567 DPRINTF("%s: candidate root=%p\n", __func__, candidate_root);
568 DPRINTF("%s: booted_device=%p root_partition=%d "
569 "contains_boot=%d\n", __func__, booted_device,
570 rsc->sc_r.root_partition,
571 rf_containsboot(&rsc->sc_r, booted_device));
572 if (booted_device == NULL ||
573 rsc->sc_r.root_partition == 1 ||
574 rf_containsboot(&rsc->sc_r, booted_device)) {
575 booted_device = candidate_root;
576 booted_partition = 0; /* XXX assume 'a' */
577 }
578 } else if (num_root > 1) {
579 DPRINTF("%s: many roots=%d, %p\n", __func__, num_root,
580 booted_device);
581
582 /*
583 * Maybe the MD code can help. If it cannot, then
584 * setroot() will discover that we have no
585 * booted_device and will ask the user if nothing was
586 * hardwired in the kernel config file
587 */
588 if (booted_device == NULL)
589 return;
590
591 num_root = 0;
592 mutex_enter(&raid_lock);
593 LIST_FOREACH(sc, &raids, sc_link) {
594 RF_Raid_t *r = &sc->sc_r;
595 if (r->valid == 0)
596 continue;
597
598 if (r->root_partition == 0)
599 continue;
600
601 if (rf_containsboot(r, booted_device)) {
602 num_root++;
603 rsc = sc;
604 dksc = &rsc->sc_dksc;
605 }
606 }
607 mutex_exit(&raid_lock);
608
609 if (num_root == 1) {
610 booted_device = dksc->sc_dev;
611 booted_partition = 0; /* XXX assume 'a' */
612 } else {
613 /* we can't guess.. require the user to answer... */
614 boothowto |= RB_ASKNAME;
615 }
616 }
617 }
618
619 static int
620 raidsize(dev_t dev)
621 {
622 struct raid_softc *rs;
623 struct dk_softc *dksc;
624 unsigned int unit;
625
626 unit = raidunit(dev);
627 if ((rs = raidget(unit, false)) == NULL)
628 return -1;
629 dksc = &rs->sc_dksc;
630
631 if ((rs->sc_flags & RAIDF_INITED) == 0)
632 return -1;
633
634 return dk_size(dksc, dev);
635 }
636
637 static int
638 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
639 {
640 unsigned int unit;
641 struct raid_softc *rs;
642 struct dk_softc *dksc;
643
644 unit = raidunit(dev);
645 if ((rs = raidget(unit, false)) == NULL)
646 return ENXIO;
647 dksc = &rs->sc_dksc;
648
649 if ((rs->sc_flags & RAIDF_INITED) == 0)
650 return ENODEV;
651
652 /*
653 Note that blkno is relative to this particular partition.
654 By adding adding RF_PROTECTED_SECTORS, we get a value that
655 is relative to the partition used for the underlying component.
656 */
657 blkno += RF_PROTECTED_SECTORS;
658
659 return dk_dump(dksc, dev, blkno, va, size);
660 }
661
662 static int
663 raid_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk)
664 {
665 struct raid_softc *rs = raidsoftc(dev);
666 const struct bdevsw *bdev;
667 RF_Raid_t *raidPtr;
668 int c, sparecol, j, scol, dumpto;
669 int error = 0;
670
671 raidPtr = &rs->sc_r;
672
673 /* we only support dumping to RAID 1 sets */
674 if (raidPtr->Layout.numDataCol != 1 ||
675 raidPtr->Layout.numParityCol != 1)
676 return EINVAL;
677
678 if ((error = raidlock(rs)) != 0)
679 return error;
680
681 /* figure out what device is alive.. */
682
683 /*
684 Look for a component to dump to. The preference for the
685 component to dump to is as follows:
686 1) the master
687 2) a used_spare of the master
688 3) the slave
689 4) a used_spare of the slave
690 */
691
692 dumpto = -1;
693 for (c = 0; c < raidPtr->numCol; c++) {
694 if (raidPtr->Disks[c].status == rf_ds_optimal) {
695 /* this might be the one */
696 dumpto = c;
697 break;
698 }
699 }
700
701 /*
702 At this point we have possibly selected a live master or a
703 live slave. We now check to see if there is a spared
704 master (or a spared slave), if we didn't find a live master
705 or a live slave.
706 */
707
708 for (c = 0; c < raidPtr->numSpare; c++) {
709 sparecol = raidPtr->numCol + c;
710 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
711 /* How about this one? */
712 scol = -1;
713 for(j=0;j<raidPtr->numCol;j++) {
714 if (raidPtr->Disks[j].spareCol == sparecol) {
715 scol = j;
716 break;
717 }
718 }
719 if (scol == 0) {
720 /*
721 We must have found a spared master!
722 We'll take that over anything else
723 found so far. (We couldn't have
724 found a real master before, since
725 this is a used spare, and it's
726 saying that it's replacing the
727 master.) On reboot (with
728 autoconfiguration turned on)
729 sparecol will become the 1st
730 component (component0) of this set.
731 */
732 dumpto = sparecol;
733 break;
734 } else if (scol != -1) {
735 /*
736 Must be a spared slave. We'll dump
737 to that if we havn't found anything
738 else so far.
739 */
740 if (dumpto == -1)
741 dumpto = sparecol;
742 }
743 }
744 }
745
746 if (dumpto == -1) {
747 /* we couldn't find any live components to dump to!?!?
748 */
749 error = EINVAL;
750 goto out;
751 }
752
753 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
754 if (bdev == NULL) {
755 error = ENXIO;
756 goto out;
757 }
758
759 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
760 blkno, va, nblk * raidPtr->bytesPerSector);
761
762 out:
763 raidunlock(rs);
764
765 return error;
766 }
767
768 /* ARGSUSED */
769 static int
770 raidopen(dev_t dev, int flags, int fmt,
771 struct lwp *l)
772 {
773 int unit = raidunit(dev);
774 struct raid_softc *rs;
775 struct dk_softc *dksc;
776 int error = 0;
777 int part, pmask;
778
779 if ((rs = raidget(unit, true)) == NULL)
780 return ENXIO;
781 if ((error = raidlock(rs)) != 0)
782 return (error);
783
784 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
785 error = EBUSY;
786 goto bad;
787 }
788
789 dksc = &rs->sc_dksc;
790
791 part = DISKPART(dev);
792 pmask = (1 << part);
793
794 if (!DK_BUSY(dksc, pmask) &&
795 ((rs->sc_flags & RAIDF_INITED) != 0)) {
796 /* First one... mark things as dirty... Note that we *MUST*
797 have done a configure before this. I DO NOT WANT TO BE
798 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
799 THAT THEY BELONG TOGETHER!!!!! */
800 /* XXX should check to see if we're only open for reading
801 here... If so, we needn't do this, but then need some
802 other way of keeping track of what's happened.. */
803
804 rf_markalldirty(&rs->sc_r);
805 }
806
807 if ((rs->sc_flags & RAIDF_INITED) != 0)
808 error = dk_open(dksc, dev, flags, fmt, l);
809
810 bad:
811 raidunlock(rs);
812
813 return (error);
814
815
816 }
817
818 static int
819 raid_lastclose(device_t self)
820 {
821 struct raid_softc *rs = raidsoftc(self);
822
823 /* Last one... device is not unconfigured yet.
824 Device shutdown has taken care of setting the
825 clean bits if RAIDF_INITED is not set
826 mark things as clean... */
827
828 rf_update_component_labels(&rs->sc_r,
829 RF_FINAL_COMPONENT_UPDATE);
830
831 /* pass to unlocked code */
832 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
833 rs->sc_flags |= RAIDF_DETACH;
834
835 return 0;
836 }
837
838 /* ARGSUSED */
839 static int
840 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
841 {
842 int unit = raidunit(dev);
843 struct raid_softc *rs;
844 struct dk_softc *dksc;
845 cfdata_t cf;
846 int error = 0, do_detach = 0, do_put = 0;
847
848 if ((rs = raidget(unit, false)) == NULL)
849 return ENXIO;
850 dksc = &rs->sc_dksc;
851
852 if ((error = raidlock(rs)) != 0)
853 return (error);
854
855 if ((rs->sc_flags & RAIDF_INITED) != 0) {
856 error = dk_close(dksc, dev, flags, fmt, l);
857 if ((rs->sc_flags & RAIDF_DETACH) != 0)
858 do_detach = 1;
859 } else if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
860 do_put = 1;
861
862 raidunlock(rs);
863
864 if (do_detach) {
865 /* free the pseudo device attach bits */
866 cf = device_cfdata(dksc->sc_dev);
867 error = config_detach(dksc->sc_dev, 0);
868 if (error == 0)
869 free(cf, M_RAIDFRAME);
870 } else if (do_put) {
871 raidput(rs);
872 }
873
874 return (error);
875
876 }
877
878 static void
879 raid_wakeup(RF_Raid_t *raidPtr)
880 {
881 rf_lock_mutex2(raidPtr->iodone_lock);
882 rf_signal_cond2(raidPtr->iodone_cv);
883 rf_unlock_mutex2(raidPtr->iodone_lock);
884 }
885
886 static void
887 raidstrategy(struct buf *bp)
888 {
889 unsigned int unit;
890 struct raid_softc *rs;
891 struct dk_softc *dksc;
892 RF_Raid_t *raidPtr;
893
894 unit = raidunit(bp->b_dev);
895 if ((rs = raidget(unit, false)) == NULL) {
896 bp->b_error = ENXIO;
897 goto fail;
898 }
899 if ((rs->sc_flags & RAIDF_INITED) == 0) {
900 bp->b_error = ENXIO;
901 goto fail;
902 }
903 dksc = &rs->sc_dksc;
904 raidPtr = &rs->sc_r;
905
906 /* Queue IO only */
907 if (dk_strategy_defer(dksc, bp))
908 goto done;
909
910 /* schedule the IO to happen at the next convenient time */
911 raid_wakeup(raidPtr);
912
913 done:
914 return;
915
916 fail:
917 bp->b_resid = bp->b_bcount;
918 biodone(bp);
919 }
920
921 static int
922 raid_diskstart(device_t dev, struct buf *bp)
923 {
924 struct raid_softc *rs = raidsoftc(dev);
925 RF_Raid_t *raidPtr;
926
927 raidPtr = &rs->sc_r;
928 if (!raidPtr->valid) {
929 db1_printf(("raid is not valid..\n"));
930 return ENODEV;
931 }
932
933 /* XXX */
934 bp->b_resid = 0;
935
936 return raiddoaccess(raidPtr, bp);
937 }
938
939 void
940 raiddone(RF_Raid_t *raidPtr, struct buf *bp)
941 {
942 struct raid_softc *rs;
943 struct dk_softc *dksc;
944
945 rs = raidPtr->softc;
946 dksc = &rs->sc_dksc;
947
948 dk_done(dksc, bp);
949
950 rf_lock_mutex2(raidPtr->mutex);
951 raidPtr->openings++;
952 rf_unlock_mutex2(raidPtr->mutex);
953
954 /* schedule more IO */
955 raid_wakeup(raidPtr);
956 }
957
958 /* ARGSUSED */
959 static int
960 raidread(dev_t dev, struct uio *uio, int flags)
961 {
962 int unit = raidunit(dev);
963 struct raid_softc *rs;
964
965 if ((rs = raidget(unit, false)) == NULL)
966 return ENXIO;
967
968 if ((rs->sc_flags & RAIDF_INITED) == 0)
969 return (ENXIO);
970
971 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
972
973 }
974
975 /* ARGSUSED */
976 static int
977 raidwrite(dev_t dev, struct uio *uio, int flags)
978 {
979 int unit = raidunit(dev);
980 struct raid_softc *rs;
981
982 if ((rs = raidget(unit, false)) == NULL)
983 return ENXIO;
984
985 if ((rs->sc_flags & RAIDF_INITED) == 0)
986 return (ENXIO);
987
988 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
989
990 }
991
992 static int
993 raid_detach_unlocked(struct raid_softc *rs)
994 {
995 struct dk_softc *dksc = &rs->sc_dksc;
996 RF_Raid_t *raidPtr;
997 int error;
998
999 raidPtr = &rs->sc_r;
1000
1001 if (DK_BUSY(dksc, 0) ||
1002 raidPtr->recon_in_progress != 0 ||
1003 raidPtr->parity_rewrite_in_progress != 0 ||
1004 raidPtr->copyback_in_progress != 0)
1005 return EBUSY;
1006
1007 if ((rs->sc_flags & RAIDF_INITED) == 0)
1008 return 0;
1009
1010 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1011
1012 if ((error = rf_Shutdown(raidPtr)) != 0)
1013 return error;
1014
1015 rs->sc_flags &= ~RAIDF_INITED;
1016
1017 /* Kill off any queued buffers */
1018 dk_drain(dksc);
1019 bufq_free(dksc->sc_bufq);
1020
1021 /* Detach the disk. */
1022 dkwedge_delall(&dksc->sc_dkdev);
1023 disk_detach(&dksc->sc_dkdev);
1024 disk_destroy(&dksc->sc_dkdev);
1025 dk_detach(dksc);
1026
1027 return 0;
1028 }
1029
1030 static int
1031 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1032 {
1033 int unit = raidunit(dev);
1034 int error = 0;
1035 int part, pmask;
1036 struct raid_softc *rs;
1037 struct dk_softc *dksc;
1038 RF_Config_t *k_cfg, *u_cfg;
1039 RF_Raid_t *raidPtr;
1040 RF_RaidDisk_t *diskPtr;
1041 RF_AccTotals_t *totals;
1042 RF_DeviceConfig_t *d_cfg, **ucfgp;
1043 u_char *specific_buf;
1044 int retcode = 0;
1045 int column;
1046 /* int raidid; */
1047 struct rf_recon_req *rrcopy, *rr;
1048 RF_ComponentLabel_t *clabel;
1049 RF_ComponentLabel_t *ci_label;
1050 RF_ComponentLabel_t **clabel_ptr;
1051 RF_SingleComponent_t *sparePtr,*componentPtr;
1052 RF_SingleComponent_t component;
1053 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
1054 int i, j, d;
1055
1056 if ((rs = raidget(unit, false)) == NULL)
1057 return ENXIO;
1058 dksc = &rs->sc_dksc;
1059 raidPtr = &rs->sc_r;
1060
1061 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1062 (int) DISKPART(dev), (int) unit, cmd));
1063
1064 /* Must be initialized for these... */
1065 switch (cmd) {
1066 case RAIDFRAME_REWRITEPARITY:
1067 case RAIDFRAME_GET_INFO:
1068 case RAIDFRAME_RESET_ACCTOTALS:
1069 case RAIDFRAME_GET_ACCTOTALS:
1070 case RAIDFRAME_KEEP_ACCTOTALS:
1071 case RAIDFRAME_GET_SIZE:
1072 case RAIDFRAME_FAIL_DISK:
1073 case RAIDFRAME_COPYBACK:
1074 case RAIDFRAME_CHECK_RECON_STATUS:
1075 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1076 case RAIDFRAME_GET_COMPONENT_LABEL:
1077 case RAIDFRAME_SET_COMPONENT_LABEL:
1078 case RAIDFRAME_ADD_HOT_SPARE:
1079 case RAIDFRAME_REMOVE_HOT_SPARE:
1080 case RAIDFRAME_INIT_LABELS:
1081 case RAIDFRAME_REBUILD_IN_PLACE:
1082 case RAIDFRAME_CHECK_PARITY:
1083 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1084 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1085 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1086 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1087 case RAIDFRAME_SET_AUTOCONFIG:
1088 case RAIDFRAME_SET_ROOT:
1089 case RAIDFRAME_DELETE_COMPONENT:
1090 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1091 case RAIDFRAME_PARITYMAP_STATUS:
1092 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1093 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1094 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1095 if ((rs->sc_flags & RAIDF_INITED) == 0)
1096 return (ENXIO);
1097 }
1098
1099 switch (cmd) {
1100 #ifdef COMPAT_50
1101 case RAIDFRAME_GET_INFO50:
1102 return rf_get_info50(raidPtr, data);
1103
1104 case RAIDFRAME_CONFIGURE50:
1105 if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0)
1106 return retcode;
1107 goto config;
1108 #endif
1109 /* configure the system */
1110 case RAIDFRAME_CONFIGURE:
1111
1112 if (raidPtr->valid) {
1113 /* There is a valid RAID set running on this unit! */
1114 printf("raid%d: Device already configured!\n",unit);
1115 return(EINVAL);
1116 }
1117
1118 /* copy-in the configuration information */
1119 /* data points to a pointer to the configuration structure */
1120
1121 u_cfg = *((RF_Config_t **) data);
1122 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1123 if (k_cfg == NULL) {
1124 return (ENOMEM);
1125 }
1126 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1127 if (retcode) {
1128 RF_Free(k_cfg, sizeof(RF_Config_t));
1129 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1130 retcode));
1131 goto no_config;
1132 }
1133 goto config;
1134 config:
1135 rs->sc_flags &= ~RAIDF_SHUTDOWN;
1136
1137 /* allocate a buffer for the layout-specific data, and copy it
1138 * in */
1139 if (k_cfg->layoutSpecificSize) {
1140 if (k_cfg->layoutSpecificSize > 10000) {
1141 /* sanity check */
1142 RF_Free(k_cfg, sizeof(RF_Config_t));
1143 retcode = EINVAL;
1144 goto no_config;
1145 }
1146 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1147 (u_char *));
1148 if (specific_buf == NULL) {
1149 RF_Free(k_cfg, sizeof(RF_Config_t));
1150 retcode = ENOMEM;
1151 goto no_config;
1152 }
1153 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1154 k_cfg->layoutSpecificSize);
1155 if (retcode) {
1156 RF_Free(k_cfg, sizeof(RF_Config_t));
1157 RF_Free(specific_buf,
1158 k_cfg->layoutSpecificSize);
1159 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1160 retcode));
1161 goto no_config;
1162 }
1163 } else
1164 specific_buf = NULL;
1165 k_cfg->layoutSpecific = specific_buf;
1166
1167 /* should do some kind of sanity check on the configuration.
1168 * Store the sum of all the bytes in the last byte? */
1169
1170 /* configure the system */
1171
1172 /*
1173 * Clear the entire RAID descriptor, just to make sure
1174 * there is no stale data left in the case of a
1175 * reconfiguration
1176 */
1177 memset(raidPtr, 0, sizeof(*raidPtr));
1178 raidPtr->softc = rs;
1179 raidPtr->raidid = unit;
1180
1181 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1182
1183 if (retcode == 0) {
1184
1185 /* allow this many simultaneous IO's to
1186 this RAID device */
1187 raidPtr->openings = RAIDOUTSTANDING;
1188
1189 raidinit(rs);
1190 raid_wakeup(raidPtr);
1191 rf_markalldirty(raidPtr);
1192 }
1193 /* free the buffers. No return code here. */
1194 if (k_cfg->layoutSpecificSize) {
1195 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1196 }
1197 RF_Free(k_cfg, sizeof(RF_Config_t));
1198
1199 no_config:
1200 /*
1201 * If configuration failed, set sc_flags so that we
1202 * will detach the device when we close it.
1203 */
1204 if (retcode != 0)
1205 rs->sc_flags |= RAIDF_SHUTDOWN;
1206 return (retcode);
1207
1208 /* shutdown the system */
1209 case RAIDFRAME_SHUTDOWN:
1210
1211 part = DISKPART(dev);
1212 pmask = (1 << part);
1213
1214 if ((error = raidlock(rs)) != 0)
1215 return (error);
1216
1217 if (DK_BUSY(dksc, pmask) ||
1218 raidPtr->recon_in_progress != 0 ||
1219 raidPtr->parity_rewrite_in_progress != 0 ||
1220 raidPtr->copyback_in_progress != 0)
1221 retcode = EBUSY;
1222 else {
1223 /* detach and free on close */
1224 rs->sc_flags |= RAIDF_SHUTDOWN;
1225 retcode = 0;
1226 }
1227
1228 raidunlock(rs);
1229
1230 return (retcode);
1231 case RAIDFRAME_GET_COMPONENT_LABEL:
1232 clabel_ptr = (RF_ComponentLabel_t **) data;
1233 /* need to read the component label for the disk indicated
1234 by row,column in clabel */
1235
1236 /*
1237 * Perhaps there should be an option to skip the in-core
1238 * copy and hit the disk, as with disklabel(8).
1239 */
1240 RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *));
1241
1242 retcode = copyin(*clabel_ptr, clabel, sizeof(*clabel));
1243
1244 if (retcode) {
1245 RF_Free(clabel, sizeof(*clabel));
1246 return retcode;
1247 }
1248
1249 clabel->row = 0; /* Don't allow looking at anything else.*/
1250
1251 column = clabel->column;
1252
1253 if ((column < 0) || (column >= raidPtr->numCol +
1254 raidPtr->numSpare)) {
1255 RF_Free(clabel, sizeof(*clabel));
1256 return EINVAL;
1257 }
1258
1259 RF_Free(clabel, sizeof(*clabel));
1260
1261 clabel = raidget_component_label(raidPtr, column);
1262
1263 return copyout(clabel, *clabel_ptr, sizeof(**clabel_ptr));
1264
1265 #if 0
1266 case RAIDFRAME_SET_COMPONENT_LABEL:
1267 clabel = (RF_ComponentLabel_t *) data;
1268
1269 /* XXX check the label for valid stuff... */
1270 /* Note that some things *should not* get modified --
1271 the user should be re-initing the labels instead of
1272 trying to patch things.
1273 */
1274
1275 raidid = raidPtr->raidid;
1276 #ifdef DEBUG
1277 printf("raid%d: Got component label:\n", raidid);
1278 printf("raid%d: Version: %d\n", raidid, clabel->version);
1279 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1280 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1281 printf("raid%d: Column: %d\n", raidid, clabel->column);
1282 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1283 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1284 printf("raid%d: Status: %d\n", raidid, clabel->status);
1285 #endif
1286 clabel->row = 0;
1287 column = clabel->column;
1288
1289 if ((column < 0) || (column >= raidPtr->numCol)) {
1290 return(EINVAL);
1291 }
1292
1293 /* XXX this isn't allowed to do anything for now :-) */
1294
1295 /* XXX and before it is, we need to fill in the rest
1296 of the fields!?!?!?! */
1297 memcpy(raidget_component_label(raidPtr, column),
1298 clabel, sizeof(*clabel));
1299 raidflush_component_label(raidPtr, column);
1300 return (0);
1301 #endif
1302
1303 case RAIDFRAME_INIT_LABELS:
1304 clabel = (RF_ComponentLabel_t *) data;
1305 /*
1306 we only want the serial number from
1307 the above. We get all the rest of the information
1308 from the config that was used to create this RAID
1309 set.
1310 */
1311
1312 raidPtr->serial_number = clabel->serial_number;
1313
1314 for(column=0;column<raidPtr->numCol;column++) {
1315 diskPtr = &raidPtr->Disks[column];
1316 if (!RF_DEAD_DISK(diskPtr->status)) {
1317 ci_label = raidget_component_label(raidPtr,
1318 column);
1319 /* Zeroing this is important. */
1320 memset(ci_label, 0, sizeof(*ci_label));
1321 raid_init_component_label(raidPtr, ci_label);
1322 ci_label->serial_number =
1323 raidPtr->serial_number;
1324 ci_label->row = 0; /* we dont' pretend to support more */
1325 rf_component_label_set_partitionsize(ci_label,
1326 diskPtr->partitionSize);
1327 ci_label->column = column;
1328 raidflush_component_label(raidPtr, column);
1329 }
1330 /* XXXjld what about the spares? */
1331 }
1332
1333 return (retcode);
1334 case RAIDFRAME_SET_AUTOCONFIG:
1335 d = rf_set_autoconfig(raidPtr, *(int *) data);
1336 printf("raid%d: New autoconfig value is: %d\n",
1337 raidPtr->raidid, d);
1338 *(int *) data = d;
1339 return (retcode);
1340
1341 case RAIDFRAME_SET_ROOT:
1342 d = rf_set_rootpartition(raidPtr, *(int *) data);
1343 printf("raid%d: New rootpartition value is: %d\n",
1344 raidPtr->raidid, d);
1345 *(int *) data = d;
1346 return (retcode);
1347
1348 /* initialize all parity */
1349 case RAIDFRAME_REWRITEPARITY:
1350
1351 if (raidPtr->Layout.map->faultsTolerated == 0) {
1352 /* Parity for RAID 0 is trivially correct */
1353 raidPtr->parity_good = RF_RAID_CLEAN;
1354 return(0);
1355 }
1356
1357 if (raidPtr->parity_rewrite_in_progress == 1) {
1358 /* Re-write is already in progress! */
1359 return(EINVAL);
1360 }
1361
1362 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1363 rf_RewriteParityThread,
1364 raidPtr,"raid_parity");
1365 return (retcode);
1366
1367
1368 case RAIDFRAME_ADD_HOT_SPARE:
1369 sparePtr = (RF_SingleComponent_t *) data;
1370 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1371 retcode = rf_add_hot_spare(raidPtr, &component);
1372 return(retcode);
1373
1374 case RAIDFRAME_REMOVE_HOT_SPARE:
1375 return(retcode);
1376
1377 case RAIDFRAME_DELETE_COMPONENT:
1378 componentPtr = (RF_SingleComponent_t *)data;
1379 memcpy( &component, componentPtr,
1380 sizeof(RF_SingleComponent_t));
1381 retcode = rf_delete_component(raidPtr, &component);
1382 return(retcode);
1383
1384 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1385 componentPtr = (RF_SingleComponent_t *)data;
1386 memcpy( &component, componentPtr,
1387 sizeof(RF_SingleComponent_t));
1388 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1389 return(retcode);
1390
1391 case RAIDFRAME_REBUILD_IN_PLACE:
1392
1393 if (raidPtr->Layout.map->faultsTolerated == 0) {
1394 /* Can't do this on a RAID 0!! */
1395 return(EINVAL);
1396 }
1397
1398 if (raidPtr->recon_in_progress == 1) {
1399 /* a reconstruct is already in progress! */
1400 return(EINVAL);
1401 }
1402
1403 componentPtr = (RF_SingleComponent_t *) data;
1404 memcpy( &component, componentPtr,
1405 sizeof(RF_SingleComponent_t));
1406 component.row = 0; /* we don't support any more */
1407 column = component.column;
1408
1409 if ((column < 0) || (column >= raidPtr->numCol)) {
1410 return(EINVAL);
1411 }
1412
1413 rf_lock_mutex2(raidPtr->mutex);
1414 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1415 (raidPtr->numFailures > 0)) {
1416 /* XXX 0 above shouldn't be constant!!! */
1417 /* some component other than this has failed.
1418 Let's not make things worse than they already
1419 are... */
1420 printf("raid%d: Unable to reconstruct to disk at:\n",
1421 raidPtr->raidid);
1422 printf("raid%d: Col: %d Too many failures.\n",
1423 raidPtr->raidid, column);
1424 rf_unlock_mutex2(raidPtr->mutex);
1425 return (EINVAL);
1426 }
1427 if (raidPtr->Disks[column].status ==
1428 rf_ds_reconstructing) {
1429 printf("raid%d: Unable to reconstruct to disk at:\n",
1430 raidPtr->raidid);
1431 printf("raid%d: Col: %d Reconstruction already occurring!\n", raidPtr->raidid, column);
1432
1433 rf_unlock_mutex2(raidPtr->mutex);
1434 return (EINVAL);
1435 }
1436 if (raidPtr->Disks[column].status == rf_ds_spared) {
1437 rf_unlock_mutex2(raidPtr->mutex);
1438 return (EINVAL);
1439 }
1440 rf_unlock_mutex2(raidPtr->mutex);
1441
1442 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1443 if (rrcopy == NULL)
1444 return(ENOMEM);
1445
1446 rrcopy->raidPtr = (void *) raidPtr;
1447 rrcopy->col = column;
1448
1449 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1450 rf_ReconstructInPlaceThread,
1451 rrcopy,"raid_reconip");
1452 return(retcode);
1453
1454 case RAIDFRAME_GET_INFO:
1455 if (!raidPtr->valid)
1456 return (ENODEV);
1457 ucfgp = (RF_DeviceConfig_t **) data;
1458 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1459 (RF_DeviceConfig_t *));
1460 if (d_cfg == NULL)
1461 return (ENOMEM);
1462 d_cfg->rows = 1; /* there is only 1 row now */
1463 d_cfg->cols = raidPtr->numCol;
1464 d_cfg->ndevs = raidPtr->numCol;
1465 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1466 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1467 return (ENOMEM);
1468 }
1469 d_cfg->nspares = raidPtr->numSpare;
1470 if (d_cfg->nspares >= RF_MAX_DISKS) {
1471 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1472 return (ENOMEM);
1473 }
1474 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1475 d = 0;
1476 for (j = 0; j < d_cfg->cols; j++) {
1477 d_cfg->devs[d] = raidPtr->Disks[j];
1478 d++;
1479 }
1480 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1481 d_cfg->spares[i] = raidPtr->Disks[j];
1482 if (d_cfg->spares[i].status == rf_ds_rebuilding_spare) {
1483 /* XXX: raidctl(8) expects to see this as a used spare */
1484 d_cfg->spares[i].status = rf_ds_used_spare;
1485 }
1486 }
1487 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1488 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1489
1490 return (retcode);
1491
1492 case RAIDFRAME_CHECK_PARITY:
1493 *(int *) data = raidPtr->parity_good;
1494 return (0);
1495
1496 case RAIDFRAME_PARITYMAP_STATUS:
1497 if (rf_paritymap_ineligible(raidPtr))
1498 return EINVAL;
1499 rf_paritymap_status(raidPtr->parity_map,
1500 (struct rf_pmstat *)data);
1501 return 0;
1502
1503 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1504 if (rf_paritymap_ineligible(raidPtr))
1505 return EINVAL;
1506 if (raidPtr->parity_map == NULL)
1507 return ENOENT; /* ??? */
1508 if (0 != rf_paritymap_set_params(raidPtr->parity_map,
1509 (struct rf_pmparams *)data, 1))
1510 return EINVAL;
1511 return 0;
1512
1513 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1514 if (rf_paritymap_ineligible(raidPtr))
1515 return EINVAL;
1516 *(int *) data = rf_paritymap_get_disable(raidPtr);
1517 return 0;
1518
1519 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1520 if (rf_paritymap_ineligible(raidPtr))
1521 return EINVAL;
1522 rf_paritymap_set_disable(raidPtr, *(int *)data);
1523 /* XXX should errors be passed up? */
1524 return 0;
1525
1526 case RAIDFRAME_RESET_ACCTOTALS:
1527 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1528 return (0);
1529
1530 case RAIDFRAME_GET_ACCTOTALS:
1531 totals = (RF_AccTotals_t *) data;
1532 *totals = raidPtr->acc_totals;
1533 return (0);
1534
1535 case RAIDFRAME_KEEP_ACCTOTALS:
1536 raidPtr->keep_acc_totals = *(int *)data;
1537 return (0);
1538
1539 case RAIDFRAME_GET_SIZE:
1540 *(int *) data = raidPtr->totalSectors;
1541 return (0);
1542
1543 /* fail a disk & optionally start reconstruction */
1544 case RAIDFRAME_FAIL_DISK:
1545
1546 if (raidPtr->Layout.map->faultsTolerated == 0) {
1547 /* Can't do this on a RAID 0!! */
1548 return(EINVAL);
1549 }
1550
1551 rr = (struct rf_recon_req *) data;
1552 rr->row = 0;
1553 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1554 return (EINVAL);
1555
1556
1557 rf_lock_mutex2(raidPtr->mutex);
1558 if (raidPtr->status == rf_rs_reconstructing) {
1559 /* you can't fail a disk while we're reconstructing! */
1560 /* XXX wrong for RAID6 */
1561 rf_unlock_mutex2(raidPtr->mutex);
1562 return (EINVAL);
1563 }
1564 if ((raidPtr->Disks[rr->col].status ==
1565 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1566 /* some other component has failed. Let's not make
1567 things worse. XXX wrong for RAID6 */
1568 rf_unlock_mutex2(raidPtr->mutex);
1569 return (EINVAL);
1570 }
1571 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1572 /* Can't fail a spared disk! */
1573 rf_unlock_mutex2(raidPtr->mutex);
1574 return (EINVAL);
1575 }
1576 rf_unlock_mutex2(raidPtr->mutex);
1577
1578 /* make a copy of the recon request so that we don't rely on
1579 * the user's buffer */
1580 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1581 if (rrcopy == NULL)
1582 return(ENOMEM);
1583 memcpy(rrcopy, rr, sizeof(*rr));
1584 rrcopy->raidPtr = (void *) raidPtr;
1585
1586 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1587 rf_ReconThread,
1588 rrcopy,"raid_recon");
1589 return (0);
1590
1591 /* invoke a copyback operation after recon on whatever disk
1592 * needs it, if any */
1593 case RAIDFRAME_COPYBACK:
1594
1595 if (raidPtr->Layout.map->faultsTolerated == 0) {
1596 /* This makes no sense on a RAID 0!! */
1597 return(EINVAL);
1598 }
1599
1600 if (raidPtr->copyback_in_progress == 1) {
1601 /* Copyback is already in progress! */
1602 return(EINVAL);
1603 }
1604
1605 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1606 rf_CopybackThread,
1607 raidPtr,"raid_copyback");
1608 return (retcode);
1609
1610 /* return the percentage completion of reconstruction */
1611 case RAIDFRAME_CHECK_RECON_STATUS:
1612 if (raidPtr->Layout.map->faultsTolerated == 0) {
1613 /* This makes no sense on a RAID 0, so tell the
1614 user it's done. */
1615 *(int *) data = 100;
1616 return(0);
1617 }
1618 if (raidPtr->status != rf_rs_reconstructing)
1619 *(int *) data = 100;
1620 else {
1621 if (raidPtr->reconControl->numRUsTotal > 0) {
1622 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1623 } else {
1624 *(int *) data = 0;
1625 }
1626 }
1627 return (0);
1628 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1629 progressInfoPtr = (RF_ProgressInfo_t **) data;
1630 if (raidPtr->status != rf_rs_reconstructing) {
1631 progressInfo.remaining = 0;
1632 progressInfo.completed = 100;
1633 progressInfo.total = 100;
1634 } else {
1635 progressInfo.total =
1636 raidPtr->reconControl->numRUsTotal;
1637 progressInfo.completed =
1638 raidPtr->reconControl->numRUsComplete;
1639 progressInfo.remaining = progressInfo.total -
1640 progressInfo.completed;
1641 }
1642 retcode = copyout(&progressInfo, *progressInfoPtr,
1643 sizeof(RF_ProgressInfo_t));
1644 return (retcode);
1645
1646 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1647 if (raidPtr->Layout.map->faultsTolerated == 0) {
1648 /* This makes no sense on a RAID 0, so tell the
1649 user it's done. */
1650 *(int *) data = 100;
1651 return(0);
1652 }
1653 if (raidPtr->parity_rewrite_in_progress == 1) {
1654 *(int *) data = 100 *
1655 raidPtr->parity_rewrite_stripes_done /
1656 raidPtr->Layout.numStripe;
1657 } else {
1658 *(int *) data = 100;
1659 }
1660 return (0);
1661
1662 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1663 progressInfoPtr = (RF_ProgressInfo_t **) data;
1664 if (raidPtr->parity_rewrite_in_progress == 1) {
1665 progressInfo.total = raidPtr->Layout.numStripe;
1666 progressInfo.completed =
1667 raidPtr->parity_rewrite_stripes_done;
1668 progressInfo.remaining = progressInfo.total -
1669 progressInfo.completed;
1670 } else {
1671 progressInfo.remaining = 0;
1672 progressInfo.completed = 100;
1673 progressInfo.total = 100;
1674 }
1675 retcode = copyout(&progressInfo, *progressInfoPtr,
1676 sizeof(RF_ProgressInfo_t));
1677 return (retcode);
1678
1679 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1680 if (raidPtr->Layout.map->faultsTolerated == 0) {
1681 /* This makes no sense on a RAID 0 */
1682 *(int *) data = 100;
1683 return(0);
1684 }
1685 if (raidPtr->copyback_in_progress == 1) {
1686 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1687 raidPtr->Layout.numStripe;
1688 } else {
1689 *(int *) data = 100;
1690 }
1691 return (0);
1692
1693 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1694 progressInfoPtr = (RF_ProgressInfo_t **) data;
1695 if (raidPtr->copyback_in_progress == 1) {
1696 progressInfo.total = raidPtr->Layout.numStripe;
1697 progressInfo.completed =
1698 raidPtr->copyback_stripes_done;
1699 progressInfo.remaining = progressInfo.total -
1700 progressInfo.completed;
1701 } else {
1702 progressInfo.remaining = 0;
1703 progressInfo.completed = 100;
1704 progressInfo.total = 100;
1705 }
1706 retcode = copyout(&progressInfo, *progressInfoPtr,
1707 sizeof(RF_ProgressInfo_t));
1708 return (retcode);
1709
1710 case RAIDFRAME_SET_LAST_UNIT:
1711 for (column = 0; column < raidPtr->numCol; column++)
1712 if (raidPtr->Disks[column].status != rf_ds_optimal)
1713 return EBUSY;
1714
1715 for (column = 0; column < raidPtr->numCol; column++) {
1716 clabel = raidget_component_label(raidPtr, column);
1717 clabel->last_unit = *(int *)data;
1718 raidflush_component_label(raidPtr, column);
1719 }
1720 rs->sc_cflags |= RAIDF_UNIT_CHANGED;
1721 return 0;
1722
1723 /* the sparetable daemon calls this to wait for the kernel to
1724 * need a spare table. this ioctl does not return until a
1725 * spare table is needed. XXX -- calling mpsleep here in the
1726 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1727 * -- I should either compute the spare table in the kernel,
1728 * or have a different -- XXX XXX -- interface (a different
1729 * character device) for delivering the table -- XXX */
1730 #if 0
1731 case RAIDFRAME_SPARET_WAIT:
1732 rf_lock_mutex2(rf_sparet_wait_mutex);
1733 while (!rf_sparet_wait_queue)
1734 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1735 waitreq = rf_sparet_wait_queue;
1736 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1737 rf_unlock_mutex2(rf_sparet_wait_mutex);
1738
1739 /* structure assignment */
1740 *((RF_SparetWait_t *) data) = *waitreq;
1741
1742 RF_Free(waitreq, sizeof(*waitreq));
1743 return (0);
1744
1745 /* wakes up a process waiting on SPARET_WAIT and puts an error
1746 * code in it that will cause the dameon to exit */
1747 case RAIDFRAME_ABORT_SPARET_WAIT:
1748 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1749 waitreq->fcol = -1;
1750 rf_lock_mutex2(rf_sparet_wait_mutex);
1751 waitreq->next = rf_sparet_wait_queue;
1752 rf_sparet_wait_queue = waitreq;
1753 rf_broadcast_conf2(rf_sparet_wait_cv);
1754 rf_unlock_mutex2(rf_sparet_wait_mutex);
1755 return (0);
1756
1757 /* used by the spare table daemon to deliver a spare table
1758 * into the kernel */
1759 case RAIDFRAME_SEND_SPARET:
1760
1761 /* install the spare table */
1762 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1763
1764 /* respond to the requestor. the return status of the spare
1765 * table installation is passed in the "fcol" field */
1766 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1767 waitreq->fcol = retcode;
1768 rf_lock_mutex2(rf_sparet_wait_mutex);
1769 waitreq->next = rf_sparet_resp_queue;
1770 rf_sparet_resp_queue = waitreq;
1771 rf_broadcast_cond2(rf_sparet_resp_cv);
1772 rf_unlock_mutex2(rf_sparet_wait_mutex);
1773
1774 return (retcode);
1775 #endif
1776
1777 default:
1778 break; /* fall through to the os-specific code below */
1779
1780 }
1781
1782 if (!raidPtr->valid)
1783 return (EINVAL);
1784
1785 /*
1786 * Add support for "regular" device ioctls here.
1787 */
1788
1789 error = dk_ioctl(dksc, dev, cmd, data, flag, l);
1790 if (error != EPASSTHROUGH)
1791 return (error);
1792
1793 switch (cmd) {
1794 case DIOCCACHESYNC:
1795 return rf_sync_component_caches(raidPtr);
1796
1797 default:
1798 retcode = ENOTTY;
1799 }
1800 return (retcode);
1801
1802 }
1803
1804
1805 /* raidinit -- complete the rest of the initialization for the
1806 RAIDframe device. */
1807
1808
1809 static void
1810 raidinit(struct raid_softc *rs)
1811 {
1812 cfdata_t cf;
1813 unsigned int unit;
1814 struct dk_softc *dksc = &rs->sc_dksc;
1815 RF_Raid_t *raidPtr = &rs->sc_r;
1816 device_t dev;
1817
1818 unit = raidPtr->raidid;
1819
1820 /* XXX doesn't check bounds. */
1821 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%u", unit);
1822
1823 /* attach the pseudo device */
1824 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1825 cf->cf_name = raid_cd.cd_name;
1826 cf->cf_atname = raid_cd.cd_name;
1827 cf->cf_unit = unit;
1828 cf->cf_fstate = FSTATE_STAR;
1829
1830 dev = config_attach_pseudo(cf);
1831 if (dev == NULL) {
1832 printf("raid%d: config_attach_pseudo failed\n",
1833 raidPtr->raidid);
1834 free(cf, M_RAIDFRAME);
1835 return;
1836 }
1837
1838 /* provide a backpointer to the real softc */
1839 raidsoftc(dev) = rs;
1840
1841 /* disk_attach actually creates space for the CPU disklabel, among
1842 * other things, so it's critical to call this *BEFORE* we try putzing
1843 * with disklabels. */
1844 dk_init(dksc, dev, DKTYPE_RAID);
1845 disk_init(&dksc->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1846
1847 /* XXX There may be a weird interaction here between this, and
1848 * protectedSectors, as used in RAIDframe. */
1849
1850 rs->sc_size = raidPtr->totalSectors;
1851
1852 /* Attach dk and disk subsystems */
1853 dk_attach(dksc);
1854 disk_attach(&dksc->sc_dkdev);
1855 rf_set_geometry(rs, raidPtr);
1856
1857 bufq_alloc(&dksc->sc_bufq, "fcfs", BUFQ_SORT_RAWBLOCK);
1858
1859 /* mark unit as usuable */
1860 rs->sc_flags |= RAIDF_INITED;
1861
1862 dkwedge_discover(&dksc->sc_dkdev);
1863 }
1864
1865 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1866 /* wake up the daemon & tell it to get us a spare table
1867 * XXX
1868 * the entries in the queues should be tagged with the raidPtr
1869 * so that in the extremely rare case that two recons happen at once,
1870 * we know for which device were requesting a spare table
1871 * XXX
1872 *
1873 * XXX This code is not currently used. GO
1874 */
1875 int
1876 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1877 {
1878 int retcode;
1879
1880 rf_lock_mutex2(rf_sparet_wait_mutex);
1881 req->next = rf_sparet_wait_queue;
1882 rf_sparet_wait_queue = req;
1883 rf_broadcast_cond2(rf_sparet_wait_cv);
1884
1885 /* mpsleep unlocks the mutex */
1886 while (!rf_sparet_resp_queue) {
1887 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
1888 }
1889 req = rf_sparet_resp_queue;
1890 rf_sparet_resp_queue = req->next;
1891 rf_unlock_mutex2(rf_sparet_wait_mutex);
1892
1893 retcode = req->fcol;
1894 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1895 * alloc'd */
1896 return (retcode);
1897 }
1898 #endif
1899
1900 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1901 * bp & passes it down.
1902 * any calls originating in the kernel must use non-blocking I/O
1903 * do some extra sanity checking to return "appropriate" error values for
1904 * certain conditions (to make some standard utilities work)
1905 *
1906 * Formerly known as: rf_DoAccessKernel
1907 */
1908 void
1909 raidstart(RF_Raid_t *raidPtr)
1910 {
1911 struct raid_softc *rs;
1912 struct dk_softc *dksc;
1913
1914 rs = raidPtr->softc;
1915 dksc = &rs->sc_dksc;
1916 /* quick check to see if anything has died recently */
1917 rf_lock_mutex2(raidPtr->mutex);
1918 if (raidPtr->numNewFailures > 0) {
1919 rf_unlock_mutex2(raidPtr->mutex);
1920 rf_update_component_labels(raidPtr,
1921 RF_NORMAL_COMPONENT_UPDATE);
1922 rf_lock_mutex2(raidPtr->mutex);
1923 raidPtr->numNewFailures--;
1924 }
1925 rf_unlock_mutex2(raidPtr->mutex);
1926
1927 if ((rs->sc_flags & RAIDF_INITED) == 0) {
1928 printf("raid%d: raidstart not ready\n", raidPtr->raidid);
1929 return;
1930 }
1931
1932 dk_start(dksc, NULL);
1933 }
1934
1935 static int
1936 raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp)
1937 {
1938 RF_SectorCount_t num_blocks, pb, sum;
1939 RF_RaidAddr_t raid_addr;
1940 daddr_t blocknum;
1941 int do_async;
1942 int rc;
1943
1944 rf_lock_mutex2(raidPtr->mutex);
1945 if (raidPtr->openings == 0) {
1946 rf_unlock_mutex2(raidPtr->mutex);
1947 return EAGAIN;
1948 }
1949 rf_unlock_mutex2(raidPtr->mutex);
1950
1951 blocknum = bp->b_rawblkno;
1952
1953 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1954 (int) blocknum));
1955
1956 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1957 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1958
1959 /* *THIS* is where we adjust what block we're going to...
1960 * but DO NOT TOUCH bp->b_blkno!!! */
1961 raid_addr = blocknum;
1962
1963 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1964 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1965 sum = raid_addr + num_blocks + pb;
1966 if (1 || rf_debugKernelAccess) {
1967 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1968 (int) raid_addr, (int) sum, (int) num_blocks,
1969 (int) pb, (int) bp->b_resid));
1970 }
1971 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1972 || (sum < num_blocks) || (sum < pb)) {
1973 rc = ENOSPC;
1974 goto done;
1975 }
1976 /*
1977 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1978 */
1979
1980 if (bp->b_bcount & raidPtr->sectorMask) {
1981 rc = ENOSPC;
1982 goto done;
1983 }
1984 db1_printf(("Calling DoAccess..\n"));
1985
1986
1987 rf_lock_mutex2(raidPtr->mutex);
1988 raidPtr->openings--;
1989 rf_unlock_mutex2(raidPtr->mutex);
1990
1991 /*
1992 * Everything is async.
1993 */
1994 do_async = 1;
1995
1996 /* don't ever condition on bp->b_flags & B_WRITE.
1997 * always condition on B_READ instead */
1998
1999 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
2000 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
2001 do_async, raid_addr, num_blocks,
2002 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
2003
2004 done:
2005 return rc;
2006 }
2007
2008 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
2009
2010 int
2011 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
2012 {
2013 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2014 struct buf *bp;
2015
2016 req->queue = queue;
2017 bp = req->bp;
2018
2019 switch (req->type) {
2020 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2021 /* XXX need to do something extra here.. */
2022 /* I'm leaving this in, as I've never actually seen it used,
2023 * and I'd like folks to report it... GO */
2024 printf(("WAKEUP CALLED\n"));
2025 queue->numOutstanding++;
2026
2027 bp->b_flags = 0;
2028 bp->b_private = req;
2029
2030 KernelWakeupFunc(bp);
2031 break;
2032
2033 case RF_IO_TYPE_READ:
2034 case RF_IO_TYPE_WRITE:
2035 #if RF_ACC_TRACE > 0
2036 if (req->tracerec) {
2037 RF_ETIMER_START(req->tracerec->timer);
2038 }
2039 #endif
2040 InitBP(bp, queue->rf_cinfo->ci_vp,
2041 op, queue->rf_cinfo->ci_dev,
2042 req->sectorOffset, req->numSector,
2043 req->buf, KernelWakeupFunc, (void *) req,
2044 queue->raidPtr->logBytesPerSector, req->b_proc);
2045
2046 if (rf_debugKernelAccess) {
2047 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2048 (long) bp->b_blkno));
2049 }
2050 queue->numOutstanding++;
2051 queue->last_deq_sector = req->sectorOffset;
2052 /* acc wouldn't have been let in if there were any pending
2053 * reqs at any other priority */
2054 queue->curPriority = req->priority;
2055
2056 db1_printf(("Going for %c to unit %d col %d\n",
2057 req->type, queue->raidPtr->raidid,
2058 queue->col));
2059 db1_printf(("sector %d count %d (%d bytes) %d\n",
2060 (int) req->sectorOffset, (int) req->numSector,
2061 (int) (req->numSector <<
2062 queue->raidPtr->logBytesPerSector),
2063 (int) queue->raidPtr->logBytesPerSector));
2064
2065 /*
2066 * XXX: drop lock here since this can block at
2067 * least with backing SCSI devices. Retake it
2068 * to minimize fuss with calling interfaces.
2069 */
2070
2071 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2072 bdev_strategy(bp);
2073 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2074 break;
2075
2076 default:
2077 panic("bad req->type in rf_DispatchKernelIO");
2078 }
2079 db1_printf(("Exiting from DispatchKernelIO\n"));
2080
2081 return (0);
2082 }
2083 /* this is the callback function associated with a I/O invoked from
2084 kernel code.
2085 */
2086 static void
2087 KernelWakeupFunc(struct buf *bp)
2088 {
2089 RF_DiskQueueData_t *req = NULL;
2090 RF_DiskQueue_t *queue;
2091
2092 db1_printf(("recovering the request queue:\n"));
2093
2094 req = bp->b_private;
2095
2096 queue = (RF_DiskQueue_t *) req->queue;
2097
2098 rf_lock_mutex2(queue->raidPtr->iodone_lock);
2099
2100 #if RF_ACC_TRACE > 0
2101 if (req->tracerec) {
2102 RF_ETIMER_STOP(req->tracerec->timer);
2103 RF_ETIMER_EVAL(req->tracerec->timer);
2104 rf_lock_mutex2(rf_tracing_mutex);
2105 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2106 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2107 req->tracerec->num_phys_ios++;
2108 rf_unlock_mutex2(rf_tracing_mutex);
2109 }
2110 #endif
2111
2112 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2113 * ballistic, and mark the component as hosed... */
2114
2115 if (bp->b_error != 0) {
2116 /* Mark the disk as dead */
2117 /* but only mark it once... */
2118 /* and only if it wouldn't leave this RAID set
2119 completely broken */
2120 if (((queue->raidPtr->Disks[queue->col].status ==
2121 rf_ds_optimal) ||
2122 (queue->raidPtr->Disks[queue->col].status ==
2123 rf_ds_used_spare)) &&
2124 (queue->raidPtr->numFailures <
2125 queue->raidPtr->Layout.map->faultsTolerated)) {
2126 printf("raid%d: IO Error (%d). Marking %s as failed.\n",
2127 queue->raidPtr->raidid,
2128 bp->b_error,
2129 queue->raidPtr->Disks[queue->col].devname);
2130 queue->raidPtr->Disks[queue->col].status =
2131 rf_ds_failed;
2132 queue->raidPtr->status = rf_rs_degraded;
2133 queue->raidPtr->numFailures++;
2134 queue->raidPtr->numNewFailures++;
2135 } else { /* Disk is already dead... */
2136 /* printf("Disk already marked as dead!\n"); */
2137 }
2138
2139 }
2140
2141 /* Fill in the error value */
2142 req->error = bp->b_error;
2143
2144 /* Drop this one on the "finished" queue... */
2145 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2146
2147 /* Let the raidio thread know there is work to be done. */
2148 rf_signal_cond2(queue->raidPtr->iodone_cv);
2149
2150 rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2151 }
2152
2153
2154 /*
2155 * initialize a buf structure for doing an I/O in the kernel.
2156 */
2157 static void
2158 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2159 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2160 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2161 struct proc *b_proc)
2162 {
2163 /* bp->b_flags = B_PHYS | rw_flag; */
2164 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */
2165 bp->b_oflags = 0;
2166 bp->b_cflags = 0;
2167 bp->b_bcount = numSect << logBytesPerSector;
2168 bp->b_bufsize = bp->b_bcount;
2169 bp->b_error = 0;
2170 bp->b_dev = dev;
2171 bp->b_data = bf;
2172 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2173 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2174 if (bp->b_bcount == 0) {
2175 panic("bp->b_bcount is zero in InitBP!!");
2176 }
2177 bp->b_proc = b_proc;
2178 bp->b_iodone = cbFunc;
2179 bp->b_private = cbArg;
2180 }
2181
2182 /*
2183 * Wait interruptibly for an exclusive lock.
2184 *
2185 * XXX
2186 * Several drivers do this; it should be abstracted and made MP-safe.
2187 * (Hmm... where have we seen this warning before :-> GO )
2188 */
2189 static int
2190 raidlock(struct raid_softc *rs)
2191 {
2192 int error;
2193
2194 error = 0;
2195 mutex_enter(&rs->sc_mutex);
2196 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2197 rs->sc_flags |= RAIDF_WANTED;
2198 error = cv_wait_sig(&rs->sc_cv, &rs->sc_mutex);
2199 if (error != 0)
2200 goto done;
2201 }
2202 rs->sc_flags |= RAIDF_LOCKED;
2203 done:
2204 mutex_exit(&rs->sc_mutex);
2205 return (error);
2206 }
2207 /*
2208 * Unlock and wake up any waiters.
2209 */
2210 static void
2211 raidunlock(struct raid_softc *rs)
2212 {
2213
2214 mutex_enter(&rs->sc_mutex);
2215 rs->sc_flags &= ~RAIDF_LOCKED;
2216 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2217 rs->sc_flags &= ~RAIDF_WANTED;
2218 cv_broadcast(&rs->sc_cv);
2219 }
2220 mutex_exit(&rs->sc_mutex);
2221 }
2222
2223
2224 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2225 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2226 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2227
2228 static daddr_t
2229 rf_component_info_offset(void)
2230 {
2231
2232 return RF_COMPONENT_INFO_OFFSET;
2233 }
2234
2235 static daddr_t
2236 rf_component_info_size(unsigned secsize)
2237 {
2238 daddr_t info_size;
2239
2240 KASSERT(secsize);
2241 if (secsize > RF_COMPONENT_INFO_SIZE)
2242 info_size = secsize;
2243 else
2244 info_size = RF_COMPONENT_INFO_SIZE;
2245
2246 return info_size;
2247 }
2248
2249 static daddr_t
2250 rf_parity_map_offset(RF_Raid_t *raidPtr)
2251 {
2252 daddr_t map_offset;
2253
2254 KASSERT(raidPtr->bytesPerSector);
2255 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2256 map_offset = raidPtr->bytesPerSector;
2257 else
2258 map_offset = RF_COMPONENT_INFO_SIZE;
2259 map_offset += rf_component_info_offset();
2260
2261 return map_offset;
2262 }
2263
2264 static daddr_t
2265 rf_parity_map_size(RF_Raid_t *raidPtr)
2266 {
2267 daddr_t map_size;
2268
2269 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2270 map_size = raidPtr->bytesPerSector;
2271 else
2272 map_size = RF_PARITY_MAP_SIZE;
2273
2274 return map_size;
2275 }
2276
2277 int
2278 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2279 {
2280 RF_ComponentLabel_t *clabel;
2281
2282 clabel = raidget_component_label(raidPtr, col);
2283 clabel->clean = RF_RAID_CLEAN;
2284 raidflush_component_label(raidPtr, col);
2285 return(0);
2286 }
2287
2288
2289 int
2290 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2291 {
2292 RF_ComponentLabel_t *clabel;
2293
2294 clabel = raidget_component_label(raidPtr, col);
2295 clabel->clean = RF_RAID_DIRTY;
2296 raidflush_component_label(raidPtr, col);
2297 return(0);
2298 }
2299
2300 int
2301 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2302 {
2303 KASSERT(raidPtr->bytesPerSector);
2304 return raidread_component_label(raidPtr->bytesPerSector,
2305 raidPtr->Disks[col].dev,
2306 raidPtr->raid_cinfo[col].ci_vp,
2307 &raidPtr->raid_cinfo[col].ci_label);
2308 }
2309
2310 RF_ComponentLabel_t *
2311 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2312 {
2313 return &raidPtr->raid_cinfo[col].ci_label;
2314 }
2315
2316 int
2317 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2318 {
2319 RF_ComponentLabel_t *label;
2320
2321 label = &raidPtr->raid_cinfo[col].ci_label;
2322 label->mod_counter = raidPtr->mod_counter;
2323 #ifndef RF_NO_PARITY_MAP
2324 label->parity_map_modcount = label->mod_counter;
2325 #endif
2326 return raidwrite_component_label(raidPtr->bytesPerSector,
2327 raidPtr->Disks[col].dev,
2328 raidPtr->raid_cinfo[col].ci_vp, label);
2329 }
2330
2331
2332 static int
2333 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2334 RF_ComponentLabel_t *clabel)
2335 {
2336 return raidread_component_area(dev, b_vp, clabel,
2337 sizeof(RF_ComponentLabel_t),
2338 rf_component_info_offset(),
2339 rf_component_info_size(secsize));
2340 }
2341
2342 /* ARGSUSED */
2343 static int
2344 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2345 size_t msize, daddr_t offset, daddr_t dsize)
2346 {
2347 struct buf *bp;
2348 int error;
2349
2350 /* XXX should probably ensure that we don't try to do this if
2351 someone has changed rf_protected_sectors. */
2352
2353 if (b_vp == NULL) {
2354 /* For whatever reason, this component is not valid.
2355 Don't try to read a component label from it. */
2356 return(EINVAL);
2357 }
2358
2359 /* get a block of the appropriate size... */
2360 bp = geteblk((int)dsize);
2361 bp->b_dev = dev;
2362
2363 /* get our ducks in a row for the read */
2364 bp->b_blkno = offset / DEV_BSIZE;
2365 bp->b_bcount = dsize;
2366 bp->b_flags |= B_READ;
2367 bp->b_resid = dsize;
2368
2369 bdev_strategy(bp);
2370 error = biowait(bp);
2371
2372 if (!error) {
2373 memcpy(data, bp->b_data, msize);
2374 }
2375
2376 brelse(bp, 0);
2377 return(error);
2378 }
2379
2380
2381 static int
2382 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2383 RF_ComponentLabel_t *clabel)
2384 {
2385 return raidwrite_component_area(dev, b_vp, clabel,
2386 sizeof(RF_ComponentLabel_t),
2387 rf_component_info_offset(),
2388 rf_component_info_size(secsize), 0);
2389 }
2390
2391 /* ARGSUSED */
2392 static int
2393 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2394 size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2395 {
2396 struct buf *bp;
2397 int error;
2398
2399 /* get a block of the appropriate size... */
2400 bp = geteblk((int)dsize);
2401 bp->b_dev = dev;
2402
2403 /* get our ducks in a row for the write */
2404 bp->b_blkno = offset / DEV_BSIZE;
2405 bp->b_bcount = dsize;
2406 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2407 bp->b_resid = dsize;
2408
2409 memset(bp->b_data, 0, dsize);
2410 memcpy(bp->b_data, data, msize);
2411
2412 bdev_strategy(bp);
2413 if (asyncp)
2414 return 0;
2415 error = biowait(bp);
2416 brelse(bp, 0);
2417 if (error) {
2418 #if 1
2419 printf("Failed to write RAID component info!\n");
2420 #endif
2421 }
2422
2423 return(error);
2424 }
2425
2426 void
2427 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2428 {
2429 int c;
2430
2431 for (c = 0; c < raidPtr->numCol; c++) {
2432 /* Skip dead disks. */
2433 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2434 continue;
2435 /* XXXjld: what if an error occurs here? */
2436 raidwrite_component_area(raidPtr->Disks[c].dev,
2437 raidPtr->raid_cinfo[c].ci_vp, map,
2438 RF_PARITYMAP_NBYTE,
2439 rf_parity_map_offset(raidPtr),
2440 rf_parity_map_size(raidPtr), 0);
2441 }
2442 }
2443
2444 void
2445 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2446 {
2447 struct rf_paritymap_ondisk tmp;
2448 int c,first;
2449
2450 first=1;
2451 for (c = 0; c < raidPtr->numCol; c++) {
2452 /* Skip dead disks. */
2453 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2454 continue;
2455 raidread_component_area(raidPtr->Disks[c].dev,
2456 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2457 RF_PARITYMAP_NBYTE,
2458 rf_parity_map_offset(raidPtr),
2459 rf_parity_map_size(raidPtr));
2460 if (first) {
2461 memcpy(map, &tmp, sizeof(*map));
2462 first = 0;
2463 } else {
2464 rf_paritymap_merge(map, &tmp);
2465 }
2466 }
2467 }
2468
2469 void
2470 rf_markalldirty(RF_Raid_t *raidPtr)
2471 {
2472 RF_ComponentLabel_t *clabel;
2473 int sparecol;
2474 int c;
2475 int j;
2476 int scol = -1;
2477
2478 raidPtr->mod_counter++;
2479 for (c = 0; c < raidPtr->numCol; c++) {
2480 /* we don't want to touch (at all) a disk that has
2481 failed */
2482 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2483 clabel = raidget_component_label(raidPtr, c);
2484 if (clabel->status == rf_ds_spared) {
2485 /* XXX do something special...
2486 but whatever you do, don't
2487 try to access it!! */
2488 } else {
2489 raidmarkdirty(raidPtr, c);
2490 }
2491 }
2492 }
2493
2494 for( c = 0; c < raidPtr->numSpare ; c++) {
2495 sparecol = raidPtr->numCol + c;
2496 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2497 /*
2498
2499 we claim this disk is "optimal" if it's
2500 rf_ds_used_spare, as that means it should be
2501 directly substitutable for the disk it replaced.
2502 We note that too...
2503
2504 */
2505
2506 for(j=0;j<raidPtr->numCol;j++) {
2507 if (raidPtr->Disks[j].spareCol == sparecol) {
2508 scol = j;
2509 break;
2510 }
2511 }
2512
2513 clabel = raidget_component_label(raidPtr, sparecol);
2514 /* make sure status is noted */
2515
2516 raid_init_component_label(raidPtr, clabel);
2517
2518 clabel->row = 0;
2519 clabel->column = scol;
2520 /* Note: we *don't* change status from rf_ds_used_spare
2521 to rf_ds_optimal */
2522 /* clabel.status = rf_ds_optimal; */
2523
2524 raidmarkdirty(raidPtr, sparecol);
2525 }
2526 }
2527 }
2528
2529
2530 void
2531 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2532 {
2533 RF_ComponentLabel_t *clabel;
2534 int sparecol;
2535 int c;
2536 int j;
2537 int scol;
2538 struct raid_softc *rs = raidPtr->softc;
2539
2540 scol = -1;
2541
2542 /* XXX should do extra checks to make sure things really are clean,
2543 rather than blindly setting the clean bit... */
2544
2545 raidPtr->mod_counter++;
2546
2547 for (c = 0; c < raidPtr->numCol; c++) {
2548 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2549 clabel = raidget_component_label(raidPtr, c);
2550 /* make sure status is noted */
2551 clabel->status = rf_ds_optimal;
2552
2553 /* note what unit we are configured as */
2554 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2555 clabel->last_unit = raidPtr->raidid;
2556
2557 raidflush_component_label(raidPtr, c);
2558 if (final == RF_FINAL_COMPONENT_UPDATE) {
2559 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2560 raidmarkclean(raidPtr, c);
2561 }
2562 }
2563 }
2564 /* else we don't touch it.. */
2565 }
2566
2567 for( c = 0; c < raidPtr->numSpare ; c++) {
2568 sparecol = raidPtr->numCol + c;
2569 /* Need to ensure that the reconstruct actually completed! */
2570 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2571 /*
2572
2573 we claim this disk is "optimal" if it's
2574 rf_ds_used_spare, as that means it should be
2575 directly substitutable for the disk it replaced.
2576 We note that too...
2577
2578 */
2579
2580 for(j=0;j<raidPtr->numCol;j++) {
2581 if (raidPtr->Disks[j].spareCol == sparecol) {
2582 scol = j;
2583 break;
2584 }
2585 }
2586
2587 /* XXX shouldn't *really* need this... */
2588 clabel = raidget_component_label(raidPtr, sparecol);
2589 /* make sure status is noted */
2590
2591 raid_init_component_label(raidPtr, clabel);
2592
2593 clabel->column = scol;
2594 clabel->status = rf_ds_optimal;
2595 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2596 clabel->last_unit = raidPtr->raidid;
2597
2598 raidflush_component_label(raidPtr, sparecol);
2599 if (final == RF_FINAL_COMPONENT_UPDATE) {
2600 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2601 raidmarkclean(raidPtr, sparecol);
2602 }
2603 }
2604 }
2605 }
2606 }
2607
2608 void
2609 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2610 {
2611
2612 if (vp != NULL) {
2613 if (auto_configured == 1) {
2614 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2615 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2616 vput(vp);
2617
2618 } else {
2619 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2620 }
2621 }
2622 }
2623
2624
2625 void
2626 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2627 {
2628 int r,c;
2629 struct vnode *vp;
2630 int acd;
2631
2632
2633 /* We take this opportunity to close the vnodes like we should.. */
2634
2635 for (c = 0; c < raidPtr->numCol; c++) {
2636 vp = raidPtr->raid_cinfo[c].ci_vp;
2637 acd = raidPtr->Disks[c].auto_configured;
2638 rf_close_component(raidPtr, vp, acd);
2639 raidPtr->raid_cinfo[c].ci_vp = NULL;
2640 raidPtr->Disks[c].auto_configured = 0;
2641 }
2642
2643 for (r = 0; r < raidPtr->numSpare; r++) {
2644 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2645 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2646 rf_close_component(raidPtr, vp, acd);
2647 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2648 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2649 }
2650 }
2651
2652
2653 void
2654 rf_ReconThread(struct rf_recon_req *req)
2655 {
2656 int s;
2657 RF_Raid_t *raidPtr;
2658
2659 s = splbio();
2660 raidPtr = (RF_Raid_t *) req->raidPtr;
2661 raidPtr->recon_in_progress = 1;
2662
2663 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2664 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2665
2666 RF_Free(req, sizeof(*req));
2667
2668 raidPtr->recon_in_progress = 0;
2669 splx(s);
2670
2671 /* That's all... */
2672 kthread_exit(0); /* does not return */
2673 }
2674
2675 void
2676 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2677 {
2678 int retcode;
2679 int s;
2680
2681 raidPtr->parity_rewrite_stripes_done = 0;
2682 raidPtr->parity_rewrite_in_progress = 1;
2683 s = splbio();
2684 retcode = rf_RewriteParity(raidPtr);
2685 splx(s);
2686 if (retcode) {
2687 printf("raid%d: Error re-writing parity (%d)!\n",
2688 raidPtr->raidid, retcode);
2689 } else {
2690 /* set the clean bit! If we shutdown correctly,
2691 the clean bit on each component label will get
2692 set */
2693 raidPtr->parity_good = RF_RAID_CLEAN;
2694 }
2695 raidPtr->parity_rewrite_in_progress = 0;
2696
2697 /* Anyone waiting for us to stop? If so, inform them... */
2698 if (raidPtr->waitShutdown) {
2699 wakeup(&raidPtr->parity_rewrite_in_progress);
2700 }
2701
2702 /* That's all... */
2703 kthread_exit(0); /* does not return */
2704 }
2705
2706
2707 void
2708 rf_CopybackThread(RF_Raid_t *raidPtr)
2709 {
2710 int s;
2711
2712 raidPtr->copyback_in_progress = 1;
2713 s = splbio();
2714 rf_CopybackReconstructedData(raidPtr);
2715 splx(s);
2716 raidPtr->copyback_in_progress = 0;
2717
2718 /* That's all... */
2719 kthread_exit(0); /* does not return */
2720 }
2721
2722
2723 void
2724 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
2725 {
2726 int s;
2727 RF_Raid_t *raidPtr;
2728
2729 s = splbio();
2730 raidPtr = req->raidPtr;
2731 raidPtr->recon_in_progress = 1;
2732 rf_ReconstructInPlace(raidPtr, req->col);
2733 RF_Free(req, sizeof(*req));
2734 raidPtr->recon_in_progress = 0;
2735 splx(s);
2736
2737 /* That's all... */
2738 kthread_exit(0); /* does not return */
2739 }
2740
2741 static RF_AutoConfig_t *
2742 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
2743 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
2744 unsigned secsize)
2745 {
2746 int good_one = 0;
2747 RF_ComponentLabel_t *clabel;
2748 RF_AutoConfig_t *ac;
2749
2750 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
2751 if (clabel == NULL) {
2752 oomem:
2753 while(ac_list) {
2754 ac = ac_list;
2755 if (ac->clabel)
2756 free(ac->clabel, M_RAIDFRAME);
2757 ac_list = ac_list->next;
2758 free(ac, M_RAIDFRAME);
2759 }
2760 printf("RAID auto config: out of memory!\n");
2761 return NULL; /* XXX probably should panic? */
2762 }
2763
2764 if (!raidread_component_label(secsize, dev, vp, clabel)) {
2765 /* Got the label. Does it look reasonable? */
2766 if (rf_reasonable_label(clabel, numsecs) &&
2767 (rf_component_label_partitionsize(clabel) <= size)) {
2768 #ifdef DEBUG
2769 printf("Component on: %s: %llu\n",
2770 cname, (unsigned long long)size);
2771 rf_print_component_label(clabel);
2772 #endif
2773 /* if it's reasonable, add it, else ignore it. */
2774 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
2775 M_NOWAIT);
2776 if (ac == NULL) {
2777 free(clabel, M_RAIDFRAME);
2778 goto oomem;
2779 }
2780 strlcpy(ac->devname, cname, sizeof(ac->devname));
2781 ac->dev = dev;
2782 ac->vp = vp;
2783 ac->clabel = clabel;
2784 ac->next = ac_list;
2785 ac_list = ac;
2786 good_one = 1;
2787 }
2788 }
2789 if (!good_one) {
2790 /* cleanup */
2791 free(clabel, M_RAIDFRAME);
2792 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2793 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2794 vput(vp);
2795 }
2796 return ac_list;
2797 }
2798
2799 RF_AutoConfig_t *
2800 rf_find_raid_components(void)
2801 {
2802 struct vnode *vp;
2803 struct disklabel label;
2804 device_t dv;
2805 deviter_t di;
2806 dev_t dev;
2807 int bmajor, bminor, wedge, rf_part_found;
2808 int error;
2809 int i;
2810 RF_AutoConfig_t *ac_list;
2811 uint64_t numsecs;
2812 unsigned secsize;
2813 int dowedges;
2814
2815 /* initialize the AutoConfig list */
2816 ac_list = NULL;
2817
2818 /*
2819 * we begin by trolling through *all* the devices on the system *twice*
2820 * first we scan for wedges, second for other devices. This avoids
2821 * using a raw partition instead of a wedge that covers the whole disk
2822 */
2823
2824 for (dowedges=1; dowedges>=0; --dowedges) {
2825 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
2826 dv = deviter_next(&di)) {
2827
2828 /* we are only interested in disks... */
2829 if (device_class(dv) != DV_DISK)
2830 continue;
2831
2832 /* we don't care about floppies... */
2833 if (device_is_a(dv, "fd")) {
2834 continue;
2835 }
2836
2837 /* we don't care about CD's... */
2838 if (device_is_a(dv, "cd")) {
2839 continue;
2840 }
2841
2842 /* we don't care about md's... */
2843 if (device_is_a(dv, "md")) {
2844 continue;
2845 }
2846
2847 /* hdfd is the Atari/Hades floppy driver */
2848 if (device_is_a(dv, "hdfd")) {
2849 continue;
2850 }
2851
2852 /* fdisa is the Atari/Milan floppy driver */
2853 if (device_is_a(dv, "fdisa")) {
2854 continue;
2855 }
2856
2857 /* are we in the wedges pass ? */
2858 wedge = device_is_a(dv, "dk");
2859 if (wedge != dowedges) {
2860 continue;
2861 }
2862
2863 /* need to find the device_name_to_block_device_major stuff */
2864 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
2865
2866 rf_part_found = 0; /*No raid partition as yet*/
2867
2868 /* get a vnode for the raw partition of this disk */
2869 bminor = minor(device_unit(dv));
2870 dev = wedge ? makedev(bmajor, bminor) :
2871 MAKEDISKDEV(bmajor, bminor, RAW_PART);
2872 if (bdevvp(dev, &vp))
2873 panic("RAID can't alloc vnode");
2874
2875 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
2876
2877 if (error) {
2878 /* "Who cares." Continue looking
2879 for something that exists*/
2880 vput(vp);
2881 continue;
2882 }
2883
2884 error = getdisksize(vp, &numsecs, &secsize);
2885 if (error) {
2886 /*
2887 * Pseudo devices like vnd and cgd can be
2888 * opened but may still need some configuration.
2889 * Ignore these quietly.
2890 */
2891 if (error != ENXIO)
2892 printf("RAIDframe: can't get disk size"
2893 " for dev %s (%d)\n",
2894 device_xname(dv), error);
2895 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2896 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2897 vput(vp);
2898 continue;
2899 }
2900 if (wedge) {
2901 struct dkwedge_info dkw;
2902 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
2903 NOCRED);
2904 if (error) {
2905 printf("RAIDframe: can't get wedge info for "
2906 "dev %s (%d)\n", device_xname(dv), error);
2907 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2908 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2909 vput(vp);
2910 continue;
2911 }
2912
2913 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
2914 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2915 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2916 vput(vp);
2917 continue;
2918 }
2919
2920 ac_list = rf_get_component(ac_list, dev, vp,
2921 device_xname(dv), dkw.dkw_size, numsecs, secsize);
2922 rf_part_found = 1; /*There is a raid component on this disk*/
2923 continue;
2924 }
2925
2926 /* Ok, the disk exists. Go get the disklabel. */
2927 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
2928 if (error) {
2929 /*
2930 * XXX can't happen - open() would
2931 * have errored out (or faked up one)
2932 */
2933 if (error != ENOTTY)
2934 printf("RAIDframe: can't get label for dev "
2935 "%s (%d)\n", device_xname(dv), error);
2936 }
2937
2938 /* don't need this any more. We'll allocate it again
2939 a little later if we really do... */
2940 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2941 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2942 vput(vp);
2943
2944 if (error)
2945 continue;
2946
2947 rf_part_found = 0; /*No raid partitions yet*/
2948 for (i = 0; i < label.d_npartitions; i++) {
2949 char cname[sizeof(ac_list->devname)];
2950
2951 /* We only support partitions marked as RAID */
2952 if (label.d_partitions[i].p_fstype != FS_RAID)
2953 continue;
2954
2955 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
2956 if (bdevvp(dev, &vp))
2957 panic("RAID can't alloc vnode");
2958
2959 error = VOP_OPEN(vp, FREAD, NOCRED);
2960 if (error) {
2961 /* Whatever... */
2962 vput(vp);
2963 continue;
2964 }
2965 snprintf(cname, sizeof(cname), "%s%c",
2966 device_xname(dv), 'a' + i);
2967 ac_list = rf_get_component(ac_list, dev, vp, cname,
2968 label.d_partitions[i].p_size, numsecs, secsize);
2969 rf_part_found = 1; /*There is at least one raid partition on this disk*/
2970 }
2971
2972 /*
2973 *If there is no raid component on this disk, either in a
2974 *disklabel or inside a wedge, check the raw partition as well,
2975 *as it is possible to configure raid components on raw disk
2976 *devices.
2977 */
2978
2979 if (!rf_part_found) {
2980 char cname[sizeof(ac_list->devname)];
2981
2982 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
2983 if (bdevvp(dev, &vp))
2984 panic("RAID can't alloc vnode");
2985
2986 error = VOP_OPEN(vp, FREAD, NOCRED);
2987 if (error) {
2988 /* Whatever... */
2989 vput(vp);
2990 continue;
2991 }
2992 snprintf(cname, sizeof(cname), "%s%c",
2993 device_xname(dv), 'a' + RAW_PART);
2994 ac_list = rf_get_component(ac_list, dev, vp, cname,
2995 label.d_partitions[RAW_PART].p_size, numsecs, secsize);
2996 }
2997 }
2998 deviter_release(&di);
2999 }
3000 return ac_list;
3001 }
3002
3003
3004 int
3005 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3006 {
3007
3008 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
3009 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
3010 ((clabel->clean == RF_RAID_CLEAN) ||
3011 (clabel->clean == RF_RAID_DIRTY)) &&
3012 clabel->row >=0 &&
3013 clabel->column >= 0 &&
3014 clabel->num_rows > 0 &&
3015 clabel->num_columns > 0 &&
3016 clabel->row < clabel->num_rows &&
3017 clabel->column < clabel->num_columns &&
3018 clabel->blockSize > 0 &&
3019 /*
3020 * numBlocksHi may contain garbage, but it is ok since
3021 * the type is unsigned. If it is really garbage,
3022 * rf_fix_old_label_size() will fix it.
3023 */
3024 rf_component_label_numblocks(clabel) > 0) {
3025 /*
3026 * label looks reasonable enough...
3027 * let's make sure it has no old garbage.
3028 */
3029 if (numsecs)
3030 rf_fix_old_label_size(clabel, numsecs);
3031 return(1);
3032 }
3033 return(0);
3034 }
3035
3036
3037 /*
3038 * For reasons yet unknown, some old component labels have garbage in
3039 * the newer numBlocksHi region, and this causes lossage. Since those
3040 * disks will also have numsecs set to less than 32 bits of sectors,
3041 * we can determine when this corruption has occurred, and fix it.
3042 *
3043 * The exact same problem, with the same unknown reason, happens to
3044 * the partitionSizeHi member as well.
3045 */
3046 static void
3047 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3048 {
3049
3050 if (numsecs < ((uint64_t)1 << 32)) {
3051 if (clabel->numBlocksHi) {
3052 printf("WARNING: total sectors < 32 bits, yet "
3053 "numBlocksHi set\n"
3054 "WARNING: resetting numBlocksHi to zero.\n");
3055 clabel->numBlocksHi = 0;
3056 }
3057
3058 if (clabel->partitionSizeHi) {
3059 printf("WARNING: total sectors < 32 bits, yet "
3060 "partitionSizeHi set\n"
3061 "WARNING: resetting partitionSizeHi to zero.\n");
3062 clabel->partitionSizeHi = 0;
3063 }
3064 }
3065 }
3066
3067
3068 #ifdef DEBUG
3069 void
3070 rf_print_component_label(RF_ComponentLabel_t *clabel)
3071 {
3072 uint64_t numBlocks;
3073 static const char *rp[] = {
3074 "No", "Force", "Soft", "*invalid*"
3075 };
3076
3077
3078 numBlocks = rf_component_label_numblocks(clabel);
3079
3080 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3081 clabel->row, clabel->column,
3082 clabel->num_rows, clabel->num_columns);
3083 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3084 clabel->version, clabel->serial_number,
3085 clabel->mod_counter);
3086 printf(" Clean: %s Status: %d\n",
3087 clabel->clean ? "Yes" : "No", clabel->status);
3088 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3089 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3090 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3091 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3092 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3093 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]);
3094 printf(" Last configured as: raid%d\n", clabel->last_unit);
3095 #if 0
3096 printf(" Config order: %d\n", clabel->config_order);
3097 #endif
3098
3099 }
3100 #endif
3101
3102 RF_ConfigSet_t *
3103 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3104 {
3105 RF_AutoConfig_t *ac;
3106 RF_ConfigSet_t *config_sets;
3107 RF_ConfigSet_t *cset;
3108 RF_AutoConfig_t *ac_next;
3109
3110
3111 config_sets = NULL;
3112
3113 /* Go through the AutoConfig list, and figure out which components
3114 belong to what sets. */
3115 ac = ac_list;
3116 while(ac!=NULL) {
3117 /* we're going to putz with ac->next, so save it here
3118 for use at the end of the loop */
3119 ac_next = ac->next;
3120
3121 if (config_sets == NULL) {
3122 /* will need at least this one... */
3123 config_sets = (RF_ConfigSet_t *)
3124 malloc(sizeof(RF_ConfigSet_t),
3125 M_RAIDFRAME, M_NOWAIT);
3126 if (config_sets == NULL) {
3127 panic("rf_create_auto_sets: No memory!");
3128 }
3129 /* this one is easy :) */
3130 config_sets->ac = ac;
3131 config_sets->next = NULL;
3132 config_sets->rootable = 0;
3133 ac->next = NULL;
3134 } else {
3135 /* which set does this component fit into? */
3136 cset = config_sets;
3137 while(cset!=NULL) {
3138 if (rf_does_it_fit(cset, ac)) {
3139 /* looks like it matches... */
3140 ac->next = cset->ac;
3141 cset->ac = ac;
3142 break;
3143 }
3144 cset = cset->next;
3145 }
3146 if (cset==NULL) {
3147 /* didn't find a match above... new set..*/
3148 cset = (RF_ConfigSet_t *)
3149 malloc(sizeof(RF_ConfigSet_t),
3150 M_RAIDFRAME, M_NOWAIT);
3151 if (cset == NULL) {
3152 panic("rf_create_auto_sets: No memory!");
3153 }
3154 cset->ac = ac;
3155 ac->next = NULL;
3156 cset->next = config_sets;
3157 cset->rootable = 0;
3158 config_sets = cset;
3159 }
3160 }
3161 ac = ac_next;
3162 }
3163
3164
3165 return(config_sets);
3166 }
3167
3168 static int
3169 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3170 {
3171 RF_ComponentLabel_t *clabel1, *clabel2;
3172
3173 /* If this one matches the *first* one in the set, that's good
3174 enough, since the other members of the set would have been
3175 through here too... */
3176 /* note that we are not checking partitionSize here..
3177
3178 Note that we are also not checking the mod_counters here.
3179 If everything else matches except the mod_counter, that's
3180 good enough for this test. We will deal with the mod_counters
3181 a little later in the autoconfiguration process.
3182
3183 (clabel1->mod_counter == clabel2->mod_counter) &&
3184
3185 The reason we don't check for this is that failed disks
3186 will have lower modification counts. If those disks are
3187 not added to the set they used to belong to, then they will
3188 form their own set, which may result in 2 different sets,
3189 for example, competing to be configured at raid0, and
3190 perhaps competing to be the root filesystem set. If the
3191 wrong ones get configured, or both attempt to become /,
3192 weird behaviour and or serious lossage will occur. Thus we
3193 need to bring them into the fold here, and kick them out at
3194 a later point.
3195
3196 */
3197
3198 clabel1 = cset->ac->clabel;
3199 clabel2 = ac->clabel;
3200 if ((clabel1->version == clabel2->version) &&
3201 (clabel1->serial_number == clabel2->serial_number) &&
3202 (clabel1->num_rows == clabel2->num_rows) &&
3203 (clabel1->num_columns == clabel2->num_columns) &&
3204 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3205 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3206 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3207 (clabel1->parityConfig == clabel2->parityConfig) &&
3208 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3209 (clabel1->blockSize == clabel2->blockSize) &&
3210 rf_component_label_numblocks(clabel1) ==
3211 rf_component_label_numblocks(clabel2) &&
3212 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3213 (clabel1->root_partition == clabel2->root_partition) &&
3214 (clabel1->last_unit == clabel2->last_unit) &&
3215 (clabel1->config_order == clabel2->config_order)) {
3216 /* if it get's here, it almost *has* to be a match */
3217 } else {
3218 /* it's not consistent with somebody in the set..
3219 punt */
3220 return(0);
3221 }
3222 /* all was fine.. it must fit... */
3223 return(1);
3224 }
3225
3226 int
3227 rf_have_enough_components(RF_ConfigSet_t *cset)
3228 {
3229 RF_AutoConfig_t *ac;
3230 RF_AutoConfig_t *auto_config;
3231 RF_ComponentLabel_t *clabel;
3232 int c;
3233 int num_cols;
3234 int num_missing;
3235 int mod_counter;
3236 int mod_counter_found;
3237 int even_pair_failed;
3238 char parity_type;
3239
3240
3241 /* check to see that we have enough 'live' components
3242 of this set. If so, we can configure it if necessary */
3243
3244 num_cols = cset->ac->clabel->num_columns;
3245 parity_type = cset->ac->clabel->parityConfig;
3246
3247 /* XXX Check for duplicate components!?!?!? */
3248
3249 /* Determine what the mod_counter is supposed to be for this set. */
3250
3251 mod_counter_found = 0;
3252 mod_counter = 0;
3253 ac = cset->ac;
3254 while(ac!=NULL) {
3255 if (mod_counter_found==0) {
3256 mod_counter = ac->clabel->mod_counter;
3257 mod_counter_found = 1;
3258 } else {
3259 if (ac->clabel->mod_counter > mod_counter) {
3260 mod_counter = ac->clabel->mod_counter;
3261 }
3262 }
3263 ac = ac->next;
3264 }
3265
3266 num_missing = 0;
3267 auto_config = cset->ac;
3268
3269 even_pair_failed = 0;
3270 for(c=0; c<num_cols; c++) {
3271 ac = auto_config;
3272 while(ac!=NULL) {
3273 if ((ac->clabel->column == c) &&
3274 (ac->clabel->mod_counter == mod_counter)) {
3275 /* it's this one... */
3276 #ifdef DEBUG
3277 printf("Found: %s at %d\n",
3278 ac->devname,c);
3279 #endif
3280 break;
3281 }
3282 ac=ac->next;
3283 }
3284 if (ac==NULL) {
3285 /* Didn't find one here! */
3286 /* special case for RAID 1, especially
3287 where there are more than 2
3288 components (where RAIDframe treats
3289 things a little differently :( ) */
3290 if (parity_type == '1') {
3291 if (c%2 == 0) { /* even component */
3292 even_pair_failed = 1;
3293 } else { /* odd component. If
3294 we're failed, and
3295 so is the even
3296 component, it's
3297 "Good Night, Charlie" */
3298 if (even_pair_failed == 1) {
3299 return(0);
3300 }
3301 }
3302 } else {
3303 /* normal accounting */
3304 num_missing++;
3305 }
3306 }
3307 if ((parity_type == '1') && (c%2 == 1)) {
3308 /* Just did an even component, and we didn't
3309 bail.. reset the even_pair_failed flag,
3310 and go on to the next component.... */
3311 even_pair_failed = 0;
3312 }
3313 }
3314
3315 clabel = cset->ac->clabel;
3316
3317 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3318 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3319 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3320 /* XXX this needs to be made *much* more general */
3321 /* Too many failures */
3322 return(0);
3323 }
3324 /* otherwise, all is well, and we've got enough to take a kick
3325 at autoconfiguring this set */
3326 return(1);
3327 }
3328
3329 void
3330 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3331 RF_Raid_t *raidPtr)
3332 {
3333 RF_ComponentLabel_t *clabel;
3334 int i;
3335
3336 clabel = ac->clabel;
3337
3338 /* 1. Fill in the common stuff */
3339 config->numRow = clabel->num_rows = 1;
3340 config->numCol = clabel->num_columns;
3341 config->numSpare = 0; /* XXX should this be set here? */
3342 config->sectPerSU = clabel->sectPerSU;
3343 config->SUsPerPU = clabel->SUsPerPU;
3344 config->SUsPerRU = clabel->SUsPerRU;
3345 config->parityConfig = clabel->parityConfig;
3346 /* XXX... */
3347 strcpy(config->diskQueueType,"fifo");
3348 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3349 config->layoutSpecificSize = 0; /* XXX ?? */
3350
3351 while(ac!=NULL) {
3352 /* row/col values will be in range due to the checks
3353 in reasonable_label() */
3354 strcpy(config->devnames[0][ac->clabel->column],
3355 ac->devname);
3356 ac = ac->next;
3357 }
3358
3359 for(i=0;i<RF_MAXDBGV;i++) {
3360 config->debugVars[i][0] = 0;
3361 }
3362 }
3363
3364 int
3365 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3366 {
3367 RF_ComponentLabel_t *clabel;
3368 int column;
3369 int sparecol;
3370
3371 raidPtr->autoconfigure = new_value;
3372
3373 for(column=0; column<raidPtr->numCol; column++) {
3374 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3375 clabel = raidget_component_label(raidPtr, column);
3376 clabel->autoconfigure = new_value;
3377 raidflush_component_label(raidPtr, column);
3378 }
3379 }
3380 for(column = 0; column < raidPtr->numSpare ; column++) {
3381 sparecol = raidPtr->numCol + column;
3382 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3383 clabel = raidget_component_label(raidPtr, sparecol);
3384 clabel->autoconfigure = new_value;
3385 raidflush_component_label(raidPtr, sparecol);
3386 }
3387 }
3388 return(new_value);
3389 }
3390
3391 int
3392 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3393 {
3394 RF_ComponentLabel_t *clabel;
3395 int column;
3396 int sparecol;
3397
3398 raidPtr->root_partition = new_value;
3399 for(column=0; column<raidPtr->numCol; column++) {
3400 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3401 clabel = raidget_component_label(raidPtr, column);
3402 clabel->root_partition = new_value;
3403 raidflush_component_label(raidPtr, column);
3404 }
3405 }
3406 for(column = 0; column < raidPtr->numSpare ; column++) {
3407 sparecol = raidPtr->numCol + column;
3408 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3409 clabel = raidget_component_label(raidPtr, sparecol);
3410 clabel->root_partition = new_value;
3411 raidflush_component_label(raidPtr, sparecol);
3412 }
3413 }
3414 return(new_value);
3415 }
3416
3417 void
3418 rf_release_all_vps(RF_ConfigSet_t *cset)
3419 {
3420 RF_AutoConfig_t *ac;
3421
3422 ac = cset->ac;
3423 while(ac!=NULL) {
3424 /* Close the vp, and give it back */
3425 if (ac->vp) {
3426 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3427 VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
3428 vput(ac->vp);
3429 ac->vp = NULL;
3430 }
3431 ac = ac->next;
3432 }
3433 }
3434
3435
3436 void
3437 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3438 {
3439 RF_AutoConfig_t *ac;
3440 RF_AutoConfig_t *next_ac;
3441
3442 ac = cset->ac;
3443 while(ac!=NULL) {
3444 next_ac = ac->next;
3445 /* nuke the label */
3446 free(ac->clabel, M_RAIDFRAME);
3447 /* cleanup the config structure */
3448 free(ac, M_RAIDFRAME);
3449 /* "next.." */
3450 ac = next_ac;
3451 }
3452 /* and, finally, nuke the config set */
3453 free(cset, M_RAIDFRAME);
3454 }
3455
3456
3457 void
3458 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3459 {
3460 /* current version number */
3461 clabel->version = RF_COMPONENT_LABEL_VERSION;
3462 clabel->serial_number = raidPtr->serial_number;
3463 clabel->mod_counter = raidPtr->mod_counter;
3464
3465 clabel->num_rows = 1;
3466 clabel->num_columns = raidPtr->numCol;
3467 clabel->clean = RF_RAID_DIRTY; /* not clean */
3468 clabel->status = rf_ds_optimal; /* "It's good!" */
3469
3470 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3471 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3472 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3473
3474 clabel->blockSize = raidPtr->bytesPerSector;
3475 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3476
3477 /* XXX not portable */
3478 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3479 clabel->maxOutstanding = raidPtr->maxOutstanding;
3480 clabel->autoconfigure = raidPtr->autoconfigure;
3481 clabel->root_partition = raidPtr->root_partition;
3482 clabel->last_unit = raidPtr->raidid;
3483 clabel->config_order = raidPtr->config_order;
3484
3485 #ifndef RF_NO_PARITY_MAP
3486 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3487 #endif
3488 }
3489
3490 struct raid_softc *
3491 rf_auto_config_set(RF_ConfigSet_t *cset)
3492 {
3493 RF_Raid_t *raidPtr;
3494 RF_Config_t *config;
3495 int raidID;
3496 struct raid_softc *sc;
3497
3498 #ifdef DEBUG
3499 printf("RAID autoconfigure\n");
3500 #endif
3501
3502 /* 1. Create a config structure */
3503 config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO);
3504 if (config == NULL) {
3505 printf("%s: Out of mem - config!?!?\n", __func__);
3506 /* XXX do something more intelligent here. */
3507 return NULL;
3508 }
3509
3510 /*
3511 2. Figure out what RAID ID this one is supposed to live at
3512 See if we can get the same RAID dev that it was configured
3513 on last time..
3514 */
3515
3516 raidID = cset->ac->clabel->last_unit;
3517 for (sc = raidget(raidID, false); sc && sc->sc_r.valid != 0;
3518 sc = raidget(++raidID, false))
3519 continue;
3520 #ifdef DEBUG
3521 printf("Configuring raid%d:\n",raidID);
3522 #endif
3523
3524 if (sc == NULL)
3525 sc = raidget(raidID, true);
3526 if (sc == NULL) {
3527 printf("%s: Out of mem - softc!?!?\n", __func__);
3528 /* XXX do something more intelligent here. */
3529 free(config, M_RAIDFRAME);
3530 return NULL;
3531 }
3532
3533 raidPtr = &sc->sc_r;
3534
3535 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3536 raidPtr->softc = sc;
3537 raidPtr->raidid = raidID;
3538 raidPtr->openings = RAIDOUTSTANDING;
3539
3540 /* 3. Build the configuration structure */
3541 rf_create_configuration(cset->ac, config, raidPtr);
3542
3543 /* 4. Do the configuration */
3544 if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3545 raidinit(sc);
3546
3547 rf_markalldirty(raidPtr);
3548 raidPtr->autoconfigure = 1; /* XXX do this here? */
3549 switch (cset->ac->clabel->root_partition) {
3550 case 1: /* Force Root */
3551 case 2: /* Soft Root: root when boot partition part of raid */
3552 /*
3553 * everything configured just fine. Make a note
3554 * that this set is eligible to be root,
3555 * or forced to be root
3556 */
3557 cset->rootable = cset->ac->clabel->root_partition;
3558 /* XXX do this here? */
3559 raidPtr->root_partition = cset->rootable;
3560 break;
3561 default:
3562 break;
3563 }
3564 } else {
3565 raidput(sc);
3566 sc = NULL;
3567 }
3568
3569 /* 5. Cleanup */
3570 free(config, M_RAIDFRAME);
3571 return sc;
3572 }
3573
3574 void
3575 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3576 size_t xmin, size_t xmax)
3577 {
3578 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3579 pool_sethiwat(p, xmax);
3580 pool_prime(p, xmin);
3581 pool_setlowat(p, xmin);
3582 }
3583
3584 /*
3585 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buffer queue
3586 * to see if there is IO pending and if that IO could possibly be done
3587 * for a given RAID set. Returns 0 if IO is waiting and can be done, 1
3588 * otherwise.
3589 *
3590 */
3591 int
3592 rf_buf_queue_check(RF_Raid_t *raidPtr)
3593 {
3594 struct raid_softc *rs;
3595 struct dk_softc *dksc;
3596
3597 rs = raidPtr->softc;
3598 dksc = &rs->sc_dksc;
3599
3600 if ((rs->sc_flags & RAIDF_INITED) == 0)
3601 return 1;
3602
3603 if (dk_strategy_pending(dksc) && raidPtr->openings > 0) {
3604 /* there is work to do */
3605 return 0;
3606 }
3607 /* default is nothing to do */
3608 return 1;
3609 }
3610
3611 int
3612 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3613 {
3614 uint64_t numsecs;
3615 unsigned secsize;
3616 int error;
3617
3618 error = getdisksize(vp, &numsecs, &secsize);
3619 if (error == 0) {
3620 diskPtr->blockSize = secsize;
3621 diskPtr->numBlocks = numsecs - rf_protectedSectors;
3622 diskPtr->partitionSize = numsecs;
3623 return 0;
3624 }
3625 return error;
3626 }
3627
3628 static int
3629 raid_match(device_t self, cfdata_t cfdata, void *aux)
3630 {
3631 return 1;
3632 }
3633
3634 static void
3635 raid_attach(device_t parent, device_t self, void *aux)
3636 {
3637 }
3638
3639
3640 static int
3641 raid_detach(device_t self, int flags)
3642 {
3643 int error;
3644 struct raid_softc *rs = raidsoftc(self);
3645
3646 if (rs == NULL)
3647 return ENXIO;
3648
3649 if ((error = raidlock(rs)) != 0)
3650 return (error);
3651
3652 error = raid_detach_unlocked(rs);
3653
3654 raidunlock(rs);
3655
3656 /* XXX raid can be referenced here */
3657
3658 if (error)
3659 return error;
3660
3661 /* Free the softc */
3662 raidput(rs);
3663
3664 return 0;
3665 }
3666
3667 static void
3668 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
3669 {
3670 struct dk_softc *dksc = &rs->sc_dksc;
3671 struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
3672
3673 memset(dg, 0, sizeof(*dg));
3674
3675 dg->dg_secperunit = raidPtr->totalSectors;
3676 dg->dg_secsize = raidPtr->bytesPerSector;
3677 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
3678 dg->dg_ntracks = 4 * raidPtr->numCol;
3679
3680 disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, NULL);
3681 }
3682
3683 /*
3684 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3685 * We end up returning whatever error was returned by the first cache flush
3686 * that fails.
3687 */
3688
3689 int
3690 rf_sync_component_caches(RF_Raid_t *raidPtr)
3691 {
3692 int c, sparecol;
3693 int e,error;
3694 int force = 1;
3695
3696 error = 0;
3697 for (c = 0; c < raidPtr->numCol; c++) {
3698 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3699 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3700 &force, FWRITE, NOCRED);
3701 if (e) {
3702 if (e != ENODEV)
3703 printf("raid%d: cache flush to component %s failed.\n",
3704 raidPtr->raidid, raidPtr->Disks[c].devname);
3705 if (error == 0) {
3706 error = e;
3707 }
3708 }
3709 }
3710 }
3711
3712 for( c = 0; c < raidPtr->numSpare ; c++) {
3713 sparecol = raidPtr->numCol + c;
3714 /* Need to ensure that the reconstruct actually completed! */
3715 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3716 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3717 DIOCCACHESYNC, &force, FWRITE, NOCRED);
3718 if (e) {
3719 if (e != ENODEV)
3720 printf("raid%d: cache flush to component %s failed.\n",
3721 raidPtr->raidid, raidPtr->Disks[sparecol].devname);
3722 if (error == 0) {
3723 error = e;
3724 }
3725 }
3726 }
3727 }
3728 return error;
3729 }
3730
3731 /*
3732 * Module interface
3733 */
3734
3735 MODULE(MODULE_CLASS_DRIVER, raid, "dk_subr");
3736
3737 #ifdef _MODULE
3738 CFDRIVER_DECL(raid, DV_DISK, NULL);
3739 #endif
3740
3741 static int raid_modcmd(modcmd_t, void *);
3742 static int raid_modcmd_init(void);
3743 static int raid_modcmd_fini(void);
3744
3745 static int
3746 raid_modcmd(modcmd_t cmd, void *data)
3747 {
3748 int error;
3749
3750 error = 0;
3751 switch (cmd) {
3752 case MODULE_CMD_INIT:
3753 error = raid_modcmd_init();
3754 break;
3755 case MODULE_CMD_FINI:
3756 error = raid_modcmd_fini();
3757 break;
3758 default:
3759 error = ENOTTY;
3760 break;
3761 }
3762 return error;
3763 }
3764
3765 static int
3766 raid_modcmd_init(void)
3767 {
3768 int error;
3769 int bmajor, cmajor;
3770
3771 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
3772 mutex_enter(&raid_lock);
3773 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
3774 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
3775 rf_init_cond2(rf_sparet_wait_cv, "sparetw");
3776 rf_init_cond2(rf_sparet_resp_cv, "rfgst");
3777
3778 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
3779 #endif
3780
3781 bmajor = cmajor = -1;
3782 error = devsw_attach("raid", &raid_bdevsw, &bmajor,
3783 &raid_cdevsw, &cmajor);
3784 if (error != 0 && error != EEXIST) {
3785 aprint_error("%s: devsw_attach failed %d\n", __func__, error);
3786 mutex_exit(&raid_lock);
3787 return error;
3788 }
3789 #ifdef _MODULE
3790 error = config_cfdriver_attach(&raid_cd);
3791 if (error != 0) {
3792 aprint_error("%s: config_cfdriver_attach failed %d\n",
3793 __func__, error);
3794 devsw_detach(&raid_bdevsw, &raid_cdevsw);
3795 mutex_exit(&raid_lock);
3796 return error;
3797 }
3798 #endif
3799 error = config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3800 if (error != 0) {
3801 aprint_error("%s: config_cfattach_attach failed %d\n",
3802 __func__, error);
3803 #ifdef _MODULE
3804 config_cfdriver_detach(&raid_cd);
3805 #endif
3806 devsw_detach(&raid_bdevsw, &raid_cdevsw);
3807 mutex_exit(&raid_lock);
3808 return error;
3809 }
3810
3811 raidautoconfigdone = false;
3812
3813 mutex_exit(&raid_lock);
3814
3815 if (error == 0) {
3816 if (rf_BootRaidframe(true) == 0)
3817 aprint_verbose("Kernelized RAIDframe activated\n");
3818 else
3819 panic("Serious error activating RAID!!");
3820 }
3821
3822 /*
3823 * Register a finalizer which will be used to auto-config RAID
3824 * sets once all real hardware devices have been found.
3825 */
3826 error = config_finalize_register(NULL, rf_autoconfig);
3827 if (error != 0) {
3828 aprint_error("WARNING: unable to register RAIDframe "
3829 "finalizer\n");
3830 error = 0;
3831 }
3832
3833 return error;
3834 }
3835
3836 static int
3837 raid_modcmd_fini(void)
3838 {
3839 int error;
3840
3841 mutex_enter(&raid_lock);
3842
3843 /* Don't allow unload if raid device(s) exist. */
3844 if (!LIST_EMPTY(&raids)) {
3845 mutex_exit(&raid_lock);
3846 return EBUSY;
3847 }
3848
3849 error = config_cfattach_detach(raid_cd.cd_name, &raid_ca);
3850 if (error != 0) {
3851 aprint_error("%s: cannot detach cfattach\n",__func__);
3852 mutex_exit(&raid_lock);
3853 return error;
3854 }
3855 #ifdef _MODULE
3856 error = config_cfdriver_detach(&raid_cd);
3857 if (error != 0) {
3858 aprint_error("%s: cannot detach cfdriver\n",__func__);
3859 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3860 mutex_exit(&raid_lock);
3861 return error;
3862 }
3863 #endif
3864 error = devsw_detach(&raid_bdevsw, &raid_cdevsw);
3865 if (error != 0) {
3866 aprint_error("%s: cannot detach devsw\n",__func__);
3867 #ifdef _MODULE
3868 config_cfdriver_attach(&raid_cd);
3869 #endif
3870 config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3871 mutex_exit(&raid_lock);
3872 return error;
3873 }
3874 rf_BootRaidframe(false);
3875 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
3876 rf_destroy_mutex2(rf_sparet_wait_mutex);
3877 rf_destroy_cond2(rf_sparet_wait_cv);
3878 rf_destroy_cond2(rf_sparet_resp_cv);
3879 #endif
3880 mutex_exit(&raid_lock);
3881 mutex_destroy(&raid_lock);
3882
3883 return error;
3884 }
3885