rf_netbsdkintf.c revision 1.295.6.2.6.1 1 /* $NetBSD: rf_netbsdkintf.c,v 1.295.6.2.6.1 2014/12/02 22:08:01 snj Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: cd.c 1.6 90/11/28$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
89 * School of Computer Science
90 * Carnegie Mellon University
91 * Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97 /***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.295.6.2.6.1 2014/12/02 22:08:01 snj Exp $");
105
106 #ifdef _KERNEL_OPT
107 #include "opt_compat_netbsd.h"
108 #include "opt_raid_autoconfig.h"
109 #include "raid.h"
110 #endif
111
112 #include <sys/param.h>
113 #include <sys/errno.h>
114 #include <sys/pool.h>
115 #include <sys/proc.h>
116 #include <sys/queue.h>
117 #include <sys/disk.h>
118 #include <sys/device.h>
119 #include <sys/stat.h>
120 #include <sys/ioctl.h>
121 #include <sys/fcntl.h>
122 #include <sys/systm.h>
123 #include <sys/vnode.h>
124 #include <sys/disklabel.h>
125 #include <sys/conf.h>
126 #include <sys/buf.h>
127 #include <sys/bufq.h>
128 #include <sys/reboot.h>
129 #include <sys/kauth.h>
130
131 #include <prop/proplib.h>
132
133 #include <dev/raidframe/raidframevar.h>
134 #include <dev/raidframe/raidframeio.h>
135 #include <dev/raidframe/rf_paritymap.h>
136
137 #include "rf_raid.h"
138 #include "rf_copyback.h"
139 #include "rf_dag.h"
140 #include "rf_dagflags.h"
141 #include "rf_desc.h"
142 #include "rf_diskqueue.h"
143 #include "rf_etimer.h"
144 #include "rf_general.h"
145 #include "rf_kintf.h"
146 #include "rf_options.h"
147 #include "rf_driver.h"
148 #include "rf_parityscan.h"
149 #include "rf_threadstuff.h"
150
151 #ifdef COMPAT_50
152 #include "rf_compat50.h"
153 #endif
154
155 #ifdef DEBUG
156 int rf_kdebug_level = 0;
157 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
158 #else /* DEBUG */
159 #define db1_printf(a) { }
160 #endif /* DEBUG */
161
162 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
163
164 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
165 static rf_declare_mutex2(rf_sparet_wait_mutex);
166 static rf_declare_cond2(rf_sparet_wait_cv);
167 static rf_declare_cond2(rf_sparet_resp_cv);
168
169 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
170 * spare table */
171 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
172 * installation process */
173 #endif
174
175 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
176
177 /* prototypes */
178 static void KernelWakeupFunc(struct buf *);
179 static void InitBP(struct buf *, struct vnode *, unsigned,
180 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
181 void *, int, struct proc *);
182 static void raidinit(RF_Raid_t *);
183
184 void raidattach(int);
185 static int raid_match(device_t, cfdata_t, void *);
186 static void raid_attach(device_t, device_t, void *);
187 static int raid_detach(device_t, int);
188
189 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
190 daddr_t, daddr_t);
191 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
192 daddr_t, daddr_t, int);
193
194 static int raidwrite_component_label(unsigned,
195 dev_t, struct vnode *, RF_ComponentLabel_t *);
196 static int raidread_component_label(unsigned,
197 dev_t, struct vnode *, RF_ComponentLabel_t *);
198
199
200 dev_type_open(raidopen);
201 dev_type_close(raidclose);
202 dev_type_read(raidread);
203 dev_type_write(raidwrite);
204 dev_type_ioctl(raidioctl);
205 dev_type_strategy(raidstrategy);
206 dev_type_dump(raiddump);
207 dev_type_size(raidsize);
208
209 const struct bdevsw raid_bdevsw = {
210 raidopen, raidclose, raidstrategy, raidioctl,
211 raiddump, raidsize, D_DISK
212 };
213
214 const struct cdevsw raid_cdevsw = {
215 raidopen, raidclose, raidread, raidwrite, raidioctl,
216 nostop, notty, nopoll, nommap, nokqfilter, D_DISK
217 };
218
219 static struct dkdriver rf_dkdriver = { raidstrategy, minphys };
220
221 /* XXX Not sure if the following should be replacing the raidPtrs above,
222 or if it should be used in conjunction with that...
223 */
224
225 struct raid_softc {
226 device_t sc_dev;
227 int sc_flags; /* flags */
228 int sc_cflags; /* configuration flags */
229 uint64_t sc_size; /* size of the raid device */
230 char sc_xname[20]; /* XXX external name */
231 struct disk sc_dkdev; /* generic disk device info */
232 struct bufq_state *buf_queue; /* used for the device queue */
233 };
234 /* sc_flags */
235 #define RAIDF_INITED 0x01 /* unit has been initialized */
236 #define RAIDF_WLABEL 0x02 /* label area is writable */
237 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
238 #define RAIDF_SHUTDOWN 0x08 /* unit is being shutdown */
239 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
240 #define RAIDF_LOCKED 0x80 /* unit is locked */
241
242 #define raidunit(x) DISKUNIT(x)
243 int numraid = 0;
244
245 extern struct cfdriver raid_cd;
246 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
247 raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
248 DVF_DETACH_SHUTDOWN);
249
250 /*
251 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
252 * Be aware that large numbers can allow the driver to consume a lot of
253 * kernel memory, especially on writes, and in degraded mode reads.
254 *
255 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
256 * a single 64K write will typically require 64K for the old data,
257 * 64K for the old parity, and 64K for the new parity, for a total
258 * of 192K (if the parity buffer is not re-used immediately).
259 * Even it if is used immediately, that's still 128K, which when multiplied
260 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
261 *
262 * Now in degraded mode, for example, a 64K read on the above setup may
263 * require data reconstruction, which will require *all* of the 4 remaining
264 * disks to participate -- 4 * 32K/disk == 128K again.
265 */
266
267 #ifndef RAIDOUTSTANDING
268 #define RAIDOUTSTANDING 6
269 #endif
270
271 #define RAIDLABELDEV(dev) \
272 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
273
274 /* declared here, and made public, for the benefit of KVM stuff.. */
275 struct raid_softc *raid_softc;
276
277 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
278 struct disklabel *);
279 static void raidgetdisklabel(dev_t);
280 static void raidmakedisklabel(struct raid_softc *);
281
282 static int raidlock(struct raid_softc *);
283 static void raidunlock(struct raid_softc *);
284
285 static int raid_detach_unlocked(struct raid_softc *);
286
287 static void rf_markalldirty(RF_Raid_t *);
288 static void rf_set_properties(struct raid_softc *, RF_Raid_t *);
289
290 void rf_ReconThread(struct rf_recon_req *);
291 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
292 void rf_CopybackThread(RF_Raid_t *raidPtr);
293 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
294 int rf_autoconfig(device_t);
295 void rf_buildroothack(RF_ConfigSet_t *);
296
297 RF_AutoConfig_t *rf_find_raid_components(void);
298 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
299 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
300 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
301 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
302 int rf_set_autoconfig(RF_Raid_t *, int);
303 int rf_set_rootpartition(RF_Raid_t *, int);
304 void rf_release_all_vps(RF_ConfigSet_t *);
305 void rf_cleanup_config_set(RF_ConfigSet_t *);
306 int rf_have_enough_components(RF_ConfigSet_t *);
307 int rf_auto_config_set(RF_ConfigSet_t *, int *);
308 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
309
310 /*
311 * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
312 * Note that this is overridden by having RAID_AUTOCONFIG as an option
313 * in the kernel config file.
314 */
315 #ifdef RAID_AUTOCONFIG
316 int raidautoconfig = 1;
317 #else
318 int raidautoconfig = 0;
319 #endif
320 static bool raidautoconfigdone = false;
321
322 struct RF_Pools_s rf_pools;
323
324 void
325 raidattach(int num)
326 {
327 int raidID;
328 int i, rc;
329
330 aprint_debug("raidattach: Asked for %d units\n", num);
331
332 if (num <= 0) {
333 #ifdef DIAGNOSTIC
334 panic("raidattach: count <= 0");
335 #endif
336 return;
337 }
338 /* This is where all the initialization stuff gets done. */
339
340 numraid = num;
341
342 /* Make some space for requested number of units... */
343
344 RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
345 if (raidPtrs == NULL) {
346 panic("raidPtrs is NULL!!");
347 }
348
349 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
350 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
351 rf_init_cond2(rf_sparet_wait_cv, "sparetw");
352 rf_init_cond2(rf_sparet_resp_cv, "rfgst");
353
354 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
355 #endif
356
357 for (i = 0; i < num; i++)
358 raidPtrs[i] = NULL;
359 rc = rf_BootRaidframe();
360 if (rc == 0)
361 aprint_verbose("Kernelized RAIDframe activated\n");
362 else
363 panic("Serious error booting RAID!!");
364
365 /* put together some datastructures like the CCD device does.. This
366 * lets us lock the device and what-not when it gets opened. */
367
368 raid_softc = (struct raid_softc *)
369 malloc(num * sizeof(struct raid_softc),
370 M_RAIDFRAME, M_NOWAIT);
371 if (raid_softc == NULL) {
372 aprint_error("WARNING: no memory for RAIDframe driver\n");
373 return;
374 }
375
376 memset(raid_softc, 0, num * sizeof(struct raid_softc));
377
378 for (raidID = 0; raidID < num; raidID++) {
379 bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_DISK_DEFAULT_STRAT, BUFQ_SORT_RAWBLOCK);
380
381 RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
382 (RF_Raid_t *));
383 if (raidPtrs[raidID] == NULL) {
384 aprint_error("WARNING: raidPtrs[%d] is NULL\n", raidID);
385 numraid = raidID;
386 return;
387 }
388 }
389
390 if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) {
391 aprint_error("raidattach: config_cfattach_attach failed?\n");
392 }
393
394 raidautoconfigdone = false;
395
396 /*
397 * Register a finalizer which will be used to auto-config RAID
398 * sets once all real hardware devices have been found.
399 */
400 if (config_finalize_register(NULL, rf_autoconfig) != 0)
401 aprint_error("WARNING: unable to register RAIDframe finalizer\n");
402 }
403
404 int
405 rf_autoconfig(device_t self)
406 {
407 RF_AutoConfig_t *ac_list;
408 RF_ConfigSet_t *config_sets;
409
410 if (!raidautoconfig || raidautoconfigdone == true)
411 return (0);
412
413 /* XXX This code can only be run once. */
414 raidautoconfigdone = true;
415
416 /* 1. locate all RAID components on the system */
417 aprint_debug("Searching for RAID components...\n");
418 ac_list = rf_find_raid_components();
419
420 /* 2. Sort them into their respective sets. */
421 config_sets = rf_create_auto_sets(ac_list);
422
423 /*
424 * 3. Evaluate each set andconfigure the valid ones.
425 * This gets done in rf_buildroothack().
426 */
427 rf_buildroothack(config_sets);
428
429 return 1;
430 }
431
432 void
433 rf_buildroothack(RF_ConfigSet_t *config_sets)
434 {
435 RF_ConfigSet_t *cset;
436 RF_ConfigSet_t *next_cset;
437 int retcode;
438 int raidID;
439 int rootID;
440 int col;
441 int num_root;
442 char *devname;
443
444 rootID = 0;
445 num_root = 0;
446 cset = config_sets;
447 while (cset != NULL) {
448 next_cset = cset->next;
449 if (rf_have_enough_components(cset) &&
450 cset->ac->clabel->autoconfigure==1) {
451 retcode = rf_auto_config_set(cset,&raidID);
452 if (!retcode) {
453 aprint_debug("raid%d: configured ok\n", raidID);
454 if (cset->rootable) {
455 rootID = raidID;
456 num_root++;
457 }
458 } else {
459 /* The autoconfig didn't work :( */
460 aprint_debug("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
461 rf_release_all_vps(cset);
462 }
463 } else {
464 /* we're not autoconfiguring this set...
465 release the associated resources */
466 rf_release_all_vps(cset);
467 }
468 /* cleanup */
469 rf_cleanup_config_set(cset);
470 cset = next_cset;
471 }
472
473 /* if the user has specified what the root device should be
474 then we don't touch booted_device or boothowto... */
475
476 if (rootspec != NULL)
477 return;
478
479 /* we found something bootable... */
480
481 if (num_root == 1) {
482 booted_device = raid_softc[rootID].sc_dev;
483 } else if (num_root > 1) {
484
485 /*
486 * Maybe the MD code can help. If it cannot, then
487 * setroot() will discover that we have no
488 * booted_device and will ask the user if nothing was
489 * hardwired in the kernel config file
490 */
491
492 if (booted_device == NULL)
493 cpu_rootconf();
494 if (booted_device == NULL)
495 return;
496
497 num_root = 0;
498 for (raidID = 0; raidID < numraid; raidID++) {
499 if (raidPtrs[raidID]->valid == 0)
500 continue;
501
502 if (raidPtrs[raidID]->root_partition == 0)
503 continue;
504
505 for (col = 0; col < raidPtrs[raidID]->numCol; col++) {
506 devname = raidPtrs[raidID]->Disks[col].devname;
507 devname += sizeof("/dev/") - 1;
508 if (strncmp(devname, device_xname(booted_device),
509 strlen(device_xname(booted_device))) != 0)
510 continue;
511 aprint_debug("raid%d includes boot device %s\n",
512 raidID, devname);
513 num_root++;
514 rootID = raidID;
515 }
516 }
517
518 if (num_root == 1) {
519 booted_device = raid_softc[rootID].sc_dev;
520 } else {
521 /* we can't guess.. require the user to answer... */
522 boothowto |= RB_ASKNAME;
523 }
524 }
525 }
526
527
528 int
529 raidsize(dev_t dev)
530 {
531 struct raid_softc *rs;
532 struct disklabel *lp;
533 int part, unit, omask, size;
534
535 unit = raidunit(dev);
536 if (unit >= numraid)
537 return (-1);
538 rs = &raid_softc[unit];
539
540 if ((rs->sc_flags & RAIDF_INITED) == 0)
541 return (-1);
542
543 part = DISKPART(dev);
544 omask = rs->sc_dkdev.dk_openmask & (1 << part);
545 lp = rs->sc_dkdev.dk_label;
546
547 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
548 return (-1);
549
550 if (lp->d_partitions[part].p_fstype != FS_SWAP)
551 size = -1;
552 else
553 size = lp->d_partitions[part].p_size *
554 (lp->d_secsize / DEV_BSIZE);
555
556 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
557 return (-1);
558
559 return (size);
560
561 }
562
563 int
564 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
565 {
566 int unit = raidunit(dev);
567 struct raid_softc *rs;
568 const struct bdevsw *bdev;
569 struct disklabel *lp;
570 RF_Raid_t *raidPtr;
571 daddr_t offset;
572 int part, c, sparecol, j, scol, dumpto;
573 int error = 0;
574
575 if (unit >= numraid)
576 return (ENXIO);
577
578 rs = &raid_softc[unit];
579 raidPtr = raidPtrs[unit];
580
581 if ((rs->sc_flags & RAIDF_INITED) == 0)
582 return ENXIO;
583
584 /* we only support dumping to RAID 1 sets */
585 if (raidPtr->Layout.numDataCol != 1 ||
586 raidPtr->Layout.numParityCol != 1)
587 return EINVAL;
588
589
590 if ((error = raidlock(rs)) != 0)
591 return error;
592
593 if (size % DEV_BSIZE != 0) {
594 error = EINVAL;
595 goto out;
596 }
597
598 if (blkno + size / DEV_BSIZE > rs->sc_size) {
599 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
600 "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
601 size / DEV_BSIZE, rs->sc_size);
602 error = EINVAL;
603 goto out;
604 }
605
606 part = DISKPART(dev);
607 lp = rs->sc_dkdev.dk_label;
608 offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
609
610 /* figure out what device is alive.. */
611
612 /*
613 Look for a component to dump to. The preference for the
614 component to dump to is as follows:
615 1) the master
616 2) a used_spare of the master
617 3) the slave
618 4) a used_spare of the slave
619 */
620
621 dumpto = -1;
622 for (c = 0; c < raidPtr->numCol; c++) {
623 if (raidPtr->Disks[c].status == rf_ds_optimal) {
624 /* this might be the one */
625 dumpto = c;
626 break;
627 }
628 }
629
630 /*
631 At this point we have possibly selected a live master or a
632 live slave. We now check to see if there is a spared
633 master (or a spared slave), if we didn't find a live master
634 or a live slave.
635 */
636
637 for (c = 0; c < raidPtr->numSpare; c++) {
638 sparecol = raidPtr->numCol + c;
639 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
640 /* How about this one? */
641 scol = -1;
642 for(j=0;j<raidPtr->numCol;j++) {
643 if (raidPtr->Disks[j].spareCol == sparecol) {
644 scol = j;
645 break;
646 }
647 }
648 if (scol == 0) {
649 /*
650 We must have found a spared master!
651 We'll take that over anything else
652 found so far. (We couldn't have
653 found a real master before, since
654 this is a used spare, and it's
655 saying that it's replacing the
656 master.) On reboot (with
657 autoconfiguration turned on)
658 sparecol will become the 1st
659 component (component0) of this set.
660 */
661 dumpto = sparecol;
662 break;
663 } else if (scol != -1) {
664 /*
665 Must be a spared slave. We'll dump
666 to that if we havn't found anything
667 else so far.
668 */
669 if (dumpto == -1)
670 dumpto = sparecol;
671 }
672 }
673 }
674
675 if (dumpto == -1) {
676 /* we couldn't find any live components to dump to!?!?
677 */
678 error = EINVAL;
679 goto out;
680 }
681
682 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
683
684 /*
685 Note that blkno is relative to this particular partition.
686 By adding the offset of this partition in the RAID
687 set, and also adding RF_PROTECTED_SECTORS, we get a
688 value that is relative to the partition used for the
689 underlying component.
690 */
691
692 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
693 blkno + offset, va, size);
694
695 out:
696 raidunlock(rs);
697
698 return error;
699 }
700 /* ARGSUSED */
701 int
702 raidopen(dev_t dev, int flags, int fmt,
703 struct lwp *l)
704 {
705 int unit = raidunit(dev);
706 struct raid_softc *rs;
707 struct disklabel *lp;
708 int part, pmask;
709 int error = 0;
710
711 if (unit >= numraid)
712 return (ENXIO);
713 rs = &raid_softc[unit];
714
715 if ((error = raidlock(rs)) != 0)
716 return (error);
717
718 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
719 error = EBUSY;
720 goto bad;
721 }
722
723 lp = rs->sc_dkdev.dk_label;
724
725 part = DISKPART(dev);
726
727 /*
728 * If there are wedges, and this is not RAW_PART, then we
729 * need to fail.
730 */
731 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
732 error = EBUSY;
733 goto bad;
734 }
735 pmask = (1 << part);
736
737 if ((rs->sc_flags & RAIDF_INITED) &&
738 (rs->sc_dkdev.dk_openmask == 0))
739 raidgetdisklabel(dev);
740
741 /* make sure that this partition exists */
742
743 if (part != RAW_PART) {
744 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
745 ((part >= lp->d_npartitions) ||
746 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
747 error = ENXIO;
748 goto bad;
749 }
750 }
751 /* Prevent this unit from being unconfigured while open. */
752 switch (fmt) {
753 case S_IFCHR:
754 rs->sc_dkdev.dk_copenmask |= pmask;
755 break;
756
757 case S_IFBLK:
758 rs->sc_dkdev.dk_bopenmask |= pmask;
759 break;
760 }
761
762 if ((rs->sc_dkdev.dk_openmask == 0) &&
763 ((rs->sc_flags & RAIDF_INITED) != 0)) {
764 /* First one... mark things as dirty... Note that we *MUST*
765 have done a configure before this. I DO NOT WANT TO BE
766 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
767 THAT THEY BELONG TOGETHER!!!!! */
768 /* XXX should check to see if we're only open for reading
769 here... If so, we needn't do this, but then need some
770 other way of keeping track of what's happened.. */
771
772 rf_markalldirty(raidPtrs[unit]);
773 }
774
775
776 rs->sc_dkdev.dk_openmask =
777 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
778
779 bad:
780 raidunlock(rs);
781
782 return (error);
783
784
785 }
786 /* ARGSUSED */
787 int
788 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
789 {
790 int unit = raidunit(dev);
791 struct raid_softc *rs;
792 int error = 0;
793 int part;
794
795 if (unit >= numraid)
796 return (ENXIO);
797 rs = &raid_softc[unit];
798
799 if ((error = raidlock(rs)) != 0)
800 return (error);
801
802 part = DISKPART(dev);
803
804 /* ...that much closer to allowing unconfiguration... */
805 switch (fmt) {
806 case S_IFCHR:
807 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
808 break;
809
810 case S_IFBLK:
811 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
812 break;
813 }
814 rs->sc_dkdev.dk_openmask =
815 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
816
817 if ((rs->sc_dkdev.dk_openmask == 0) &&
818 ((rs->sc_flags & RAIDF_INITED) != 0)) {
819 /* Last one... device is not unconfigured yet.
820 Device shutdown has taken care of setting the
821 clean bits if RAIDF_INITED is not set
822 mark things as clean... */
823
824 rf_update_component_labels(raidPtrs[unit],
825 RF_FINAL_COMPONENT_UPDATE);
826
827 /* If the kernel is shutting down, it will detach
828 * this RAID set soon enough.
829 */
830 }
831
832 raidunlock(rs);
833 return (0);
834
835 }
836
837 void
838 raidstrategy(struct buf *bp)
839 {
840 unsigned int raidID = raidunit(bp->b_dev);
841 RF_Raid_t *raidPtr;
842 struct raid_softc *rs = &raid_softc[raidID];
843 int wlabel;
844
845 if ((rs->sc_flags & RAIDF_INITED) ==0) {
846 bp->b_error = ENXIO;
847 goto done;
848 }
849 if (raidID >= numraid || !raidPtrs[raidID]) {
850 bp->b_error = ENODEV;
851 goto done;
852 }
853 raidPtr = raidPtrs[raidID];
854 if (!raidPtr->valid) {
855 bp->b_error = ENODEV;
856 goto done;
857 }
858 if (bp->b_bcount == 0) {
859 db1_printf(("b_bcount is zero..\n"));
860 goto done;
861 }
862
863 /*
864 * Do bounds checking and adjust transfer. If there's an
865 * error, the bounds check will flag that for us.
866 */
867
868 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
869 if (DISKPART(bp->b_dev) == RAW_PART) {
870 uint64_t size; /* device size in DEV_BSIZE unit */
871
872 if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
873 size = raidPtr->totalSectors <<
874 (raidPtr->logBytesPerSector - DEV_BSHIFT);
875 } else {
876 size = raidPtr->totalSectors >>
877 (DEV_BSHIFT - raidPtr->logBytesPerSector);
878 }
879 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
880 goto done;
881 }
882 } else {
883 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
884 db1_printf(("Bounds check failed!!:%d %d\n",
885 (int) bp->b_blkno, (int) wlabel));
886 goto done;
887 }
888 }
889
890 rf_lock_mutex2(raidPtr->iodone_lock);
891
892 bp->b_resid = 0;
893
894 /* stuff it onto our queue */
895 bufq_put(rs->buf_queue, bp);
896
897 /* scheduled the IO to happen at the next convenient time */
898 rf_signal_cond2(raidPtr->iodone_cv);
899 rf_unlock_mutex2(raidPtr->iodone_lock);
900
901 return;
902
903 done:
904 bp->b_resid = bp->b_bcount;
905 biodone(bp);
906 }
907 /* ARGSUSED */
908 int
909 raidread(dev_t dev, struct uio *uio, int flags)
910 {
911 int unit = raidunit(dev);
912 struct raid_softc *rs;
913
914 if (unit >= numraid)
915 return (ENXIO);
916 rs = &raid_softc[unit];
917
918 if ((rs->sc_flags & RAIDF_INITED) == 0)
919 return (ENXIO);
920
921 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
922
923 }
924 /* ARGSUSED */
925 int
926 raidwrite(dev_t dev, struct uio *uio, int flags)
927 {
928 int unit = raidunit(dev);
929 struct raid_softc *rs;
930
931 if (unit >= numraid)
932 return (ENXIO);
933 rs = &raid_softc[unit];
934
935 if ((rs->sc_flags & RAIDF_INITED) == 0)
936 return (ENXIO);
937
938 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
939
940 }
941
942 static int
943 raid_detach_unlocked(struct raid_softc *rs)
944 {
945 int error;
946 RF_Raid_t *raidPtr;
947
948 raidPtr = raidPtrs[device_unit(rs->sc_dev)];
949
950 /*
951 * If somebody has a partition mounted, we shouldn't
952 * shutdown.
953 */
954 if (rs->sc_dkdev.dk_openmask != 0)
955 return EBUSY;
956
957 if ((rs->sc_flags & RAIDF_INITED) == 0)
958 ; /* not initialized: nothing to do */
959 else if ((error = rf_Shutdown(raidPtr)) != 0)
960 return error;
961 else
962 rs->sc_flags &= ~(RAIDF_INITED|RAIDF_SHUTDOWN);
963
964 /* Detach the disk. */
965 dkwedge_delall(&rs->sc_dkdev);
966 disk_detach(&rs->sc_dkdev);
967 disk_destroy(&rs->sc_dkdev);
968
969 aprint_normal_dev(rs->sc_dev, "detached\n");
970
971 return 0;
972 }
973
974 int
975 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
976 {
977 int unit = raidunit(dev);
978 int error = 0;
979 int part, pmask, s;
980 cfdata_t cf;
981 struct raid_softc *rs;
982 RF_Config_t *k_cfg, *u_cfg;
983 RF_Raid_t *raidPtr;
984 RF_RaidDisk_t *diskPtr;
985 RF_AccTotals_t *totals;
986 RF_DeviceConfig_t *d_cfg, **ucfgp;
987 u_char *specific_buf;
988 int retcode = 0;
989 int column;
990 /* int raidid; */
991 struct rf_recon_req *rrcopy, *rr;
992 RF_ComponentLabel_t *clabel;
993 RF_ComponentLabel_t *ci_label;
994 RF_ComponentLabel_t **clabel_ptr;
995 RF_SingleComponent_t *sparePtr,*componentPtr;
996 RF_SingleComponent_t component;
997 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
998 int i, j, d;
999 #ifdef __HAVE_OLD_DISKLABEL
1000 struct disklabel newlabel;
1001 #endif
1002 struct dkwedge_info *dkw;
1003
1004 if (unit >= numraid)
1005 return (ENXIO);
1006 rs = &raid_softc[unit];
1007 raidPtr = raidPtrs[unit];
1008
1009 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1010 (int) DISKPART(dev), (int) unit, cmd));
1011
1012 /* Must be open for writes for these commands... */
1013 switch (cmd) {
1014 #ifdef DIOCGSECTORSIZE
1015 case DIOCGSECTORSIZE:
1016 *(u_int *)data = raidPtr->bytesPerSector;
1017 return 0;
1018 case DIOCGMEDIASIZE:
1019 *(off_t *)data =
1020 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
1021 return 0;
1022 #endif
1023 case DIOCSDINFO:
1024 case DIOCWDINFO:
1025 #ifdef __HAVE_OLD_DISKLABEL
1026 case ODIOCWDINFO:
1027 case ODIOCSDINFO:
1028 #endif
1029 case DIOCWLABEL:
1030 case DIOCAWEDGE:
1031 case DIOCDWEDGE:
1032 case DIOCSSTRATEGY:
1033 if ((flag & FWRITE) == 0)
1034 return (EBADF);
1035 }
1036
1037 /* Must be initialized for these... */
1038 switch (cmd) {
1039 case DIOCGDINFO:
1040 case DIOCSDINFO:
1041 case DIOCWDINFO:
1042 #ifdef __HAVE_OLD_DISKLABEL
1043 case ODIOCGDINFO:
1044 case ODIOCWDINFO:
1045 case ODIOCSDINFO:
1046 case ODIOCGDEFLABEL:
1047 #endif
1048 case DIOCGPART:
1049 case DIOCWLABEL:
1050 case DIOCGDEFLABEL:
1051 case DIOCAWEDGE:
1052 case DIOCDWEDGE:
1053 case DIOCLWEDGES:
1054 case DIOCCACHESYNC:
1055 case RAIDFRAME_SHUTDOWN:
1056 case RAIDFRAME_REWRITEPARITY:
1057 case RAIDFRAME_GET_INFO:
1058 case RAIDFRAME_RESET_ACCTOTALS:
1059 case RAIDFRAME_GET_ACCTOTALS:
1060 case RAIDFRAME_KEEP_ACCTOTALS:
1061 case RAIDFRAME_GET_SIZE:
1062 case RAIDFRAME_FAIL_DISK:
1063 case RAIDFRAME_COPYBACK:
1064 case RAIDFRAME_CHECK_RECON_STATUS:
1065 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1066 case RAIDFRAME_GET_COMPONENT_LABEL:
1067 case RAIDFRAME_SET_COMPONENT_LABEL:
1068 case RAIDFRAME_ADD_HOT_SPARE:
1069 case RAIDFRAME_REMOVE_HOT_SPARE:
1070 case RAIDFRAME_INIT_LABELS:
1071 case RAIDFRAME_REBUILD_IN_PLACE:
1072 case RAIDFRAME_CHECK_PARITY:
1073 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1074 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1075 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1076 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1077 case RAIDFRAME_SET_AUTOCONFIG:
1078 case RAIDFRAME_SET_ROOT:
1079 case RAIDFRAME_DELETE_COMPONENT:
1080 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1081 case RAIDFRAME_PARITYMAP_STATUS:
1082 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1083 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1084 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1085 case DIOCGSTRATEGY:
1086 case DIOCSSTRATEGY:
1087 if ((rs->sc_flags & RAIDF_INITED) == 0)
1088 return (ENXIO);
1089 }
1090
1091 switch (cmd) {
1092 #ifdef COMPAT_50
1093 case RAIDFRAME_GET_INFO50:
1094 return rf_get_info50(raidPtr, data);
1095
1096 case RAIDFRAME_CONFIGURE50:
1097 if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0)
1098 return retcode;
1099 goto config;
1100 #endif
1101 /* configure the system */
1102 case RAIDFRAME_CONFIGURE:
1103
1104 if (raidPtr->valid) {
1105 /* There is a valid RAID set running on this unit! */
1106 printf("raid%d: Device already configured!\n",unit);
1107 return(EINVAL);
1108 }
1109
1110 /* copy-in the configuration information */
1111 /* data points to a pointer to the configuration structure */
1112
1113 u_cfg = *((RF_Config_t **) data);
1114 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1115 if (k_cfg == NULL) {
1116 return (ENOMEM);
1117 }
1118 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1119 if (retcode) {
1120 RF_Free(k_cfg, sizeof(RF_Config_t));
1121 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1122 retcode));
1123 return (retcode);
1124 }
1125 goto config;
1126 config:
1127 /* allocate a buffer for the layout-specific data, and copy it
1128 * in */
1129 if (k_cfg->layoutSpecificSize) {
1130 if (k_cfg->layoutSpecificSize > 10000) {
1131 /* sanity check */
1132 RF_Free(k_cfg, sizeof(RF_Config_t));
1133 return (EINVAL);
1134 }
1135 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1136 (u_char *));
1137 if (specific_buf == NULL) {
1138 RF_Free(k_cfg, sizeof(RF_Config_t));
1139 return (ENOMEM);
1140 }
1141 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1142 k_cfg->layoutSpecificSize);
1143 if (retcode) {
1144 RF_Free(k_cfg, sizeof(RF_Config_t));
1145 RF_Free(specific_buf,
1146 k_cfg->layoutSpecificSize);
1147 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1148 retcode));
1149 return (retcode);
1150 }
1151 } else
1152 specific_buf = NULL;
1153 k_cfg->layoutSpecific = specific_buf;
1154
1155 /* should do some kind of sanity check on the configuration.
1156 * Store the sum of all the bytes in the last byte? */
1157
1158 /* configure the system */
1159
1160 /*
1161 * Clear the entire RAID descriptor, just to make sure
1162 * there is no stale data left in the case of a
1163 * reconfiguration
1164 */
1165 memset(raidPtr, 0, sizeof(*raidPtr));
1166 raidPtr->raidid = unit;
1167
1168 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1169
1170 if (retcode == 0) {
1171
1172 /* allow this many simultaneous IO's to
1173 this RAID device */
1174 raidPtr->openings = RAIDOUTSTANDING;
1175
1176 raidinit(raidPtr);
1177 rf_markalldirty(raidPtr);
1178 }
1179 /* free the buffers. No return code here. */
1180 if (k_cfg->layoutSpecificSize) {
1181 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1182 }
1183 RF_Free(k_cfg, sizeof(RF_Config_t));
1184
1185 return (retcode);
1186
1187 /* shutdown the system */
1188 case RAIDFRAME_SHUTDOWN:
1189
1190 part = DISKPART(dev);
1191 pmask = (1 << part);
1192
1193 if ((error = raidlock(rs)) != 0)
1194 return (error);
1195
1196 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
1197 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1198 (rs->sc_dkdev.dk_copenmask & pmask)))
1199 retcode = EBUSY;
1200 else {
1201 rs->sc_flags |= RAIDF_SHUTDOWN;
1202 rs->sc_dkdev.dk_copenmask &= ~pmask;
1203 rs->sc_dkdev.dk_bopenmask &= ~pmask;
1204 rs->sc_dkdev.dk_openmask &= ~pmask;
1205 retcode = 0;
1206 }
1207
1208 raidunlock(rs);
1209
1210 if (retcode != 0)
1211 return retcode;
1212
1213 /* free the pseudo device attach bits */
1214
1215 cf = device_cfdata(rs->sc_dev);
1216 if ((retcode = config_detach(rs->sc_dev, DETACH_QUIET)) == 0)
1217 free(cf, M_RAIDFRAME);
1218
1219 return (retcode);
1220 case RAIDFRAME_GET_COMPONENT_LABEL:
1221 clabel_ptr = (RF_ComponentLabel_t **) data;
1222 /* need to read the component label for the disk indicated
1223 by row,column in clabel */
1224
1225 /*
1226 * Perhaps there should be an option to skip the in-core
1227 * copy and hit the disk, as with disklabel(8).
1228 */
1229 RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *));
1230
1231 retcode = copyin(*clabel_ptr, clabel, sizeof(*clabel));
1232
1233 if (retcode) {
1234 RF_Free(clabel, sizeof(*clabel));
1235 return retcode;
1236 }
1237
1238 clabel->row = 0; /* Don't allow looking at anything else.*/
1239
1240 column = clabel->column;
1241
1242 if ((column < 0) || (column >= raidPtr->numCol +
1243 raidPtr->numSpare)) {
1244 RF_Free(clabel, sizeof(*clabel));
1245 return EINVAL;
1246 }
1247
1248 RF_Free(clabel, sizeof(*clabel));
1249
1250 clabel = raidget_component_label(raidPtr, column);
1251
1252 return copyout(clabel, *clabel_ptr, sizeof(**clabel_ptr));
1253
1254 #if 0
1255 case RAIDFRAME_SET_COMPONENT_LABEL:
1256 clabel = (RF_ComponentLabel_t *) data;
1257
1258 /* XXX check the label for valid stuff... */
1259 /* Note that some things *should not* get modified --
1260 the user should be re-initing the labels instead of
1261 trying to patch things.
1262 */
1263
1264 raidid = raidPtr->raidid;
1265 #ifdef DEBUG
1266 printf("raid%d: Got component label:\n", raidid);
1267 printf("raid%d: Version: %d\n", raidid, clabel->version);
1268 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1269 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1270 printf("raid%d: Column: %d\n", raidid, clabel->column);
1271 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1272 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1273 printf("raid%d: Status: %d\n", raidid, clabel->status);
1274 #endif
1275 clabel->row = 0;
1276 column = clabel->column;
1277
1278 if ((column < 0) || (column >= raidPtr->numCol)) {
1279 return(EINVAL);
1280 }
1281
1282 /* XXX this isn't allowed to do anything for now :-) */
1283
1284 /* XXX and before it is, we need to fill in the rest
1285 of the fields!?!?!?! */
1286 memcpy(raidget_component_label(raidPtr, column),
1287 clabel, sizeof(*clabel));
1288 raidflush_component_label(raidPtr, column);
1289 return (0);
1290 #endif
1291
1292 case RAIDFRAME_INIT_LABELS:
1293 clabel = (RF_ComponentLabel_t *) data;
1294 /*
1295 we only want the serial number from
1296 the above. We get all the rest of the information
1297 from the config that was used to create this RAID
1298 set.
1299 */
1300
1301 raidPtr->serial_number = clabel->serial_number;
1302
1303 for(column=0;column<raidPtr->numCol;column++) {
1304 diskPtr = &raidPtr->Disks[column];
1305 if (!RF_DEAD_DISK(diskPtr->status)) {
1306 ci_label = raidget_component_label(raidPtr,
1307 column);
1308 /* Zeroing this is important. */
1309 memset(ci_label, 0, sizeof(*ci_label));
1310 raid_init_component_label(raidPtr, ci_label);
1311 ci_label->serial_number =
1312 raidPtr->serial_number;
1313 ci_label->row = 0; /* we dont' pretend to support more */
1314 rf_component_label_set_partitionsize(ci_label,
1315 diskPtr->partitionSize);
1316 ci_label->column = column;
1317 raidflush_component_label(raidPtr, column);
1318 }
1319 /* XXXjld what about the spares? */
1320 }
1321
1322 return (retcode);
1323 case RAIDFRAME_SET_AUTOCONFIG:
1324 d = rf_set_autoconfig(raidPtr, *(int *) data);
1325 printf("raid%d: New autoconfig value is: %d\n",
1326 raidPtr->raidid, d);
1327 *(int *) data = d;
1328 return (retcode);
1329
1330 case RAIDFRAME_SET_ROOT:
1331 d = rf_set_rootpartition(raidPtr, *(int *) data);
1332 printf("raid%d: New rootpartition value is: %d\n",
1333 raidPtr->raidid, d);
1334 *(int *) data = d;
1335 return (retcode);
1336
1337 /* initialize all parity */
1338 case RAIDFRAME_REWRITEPARITY:
1339
1340 if (raidPtr->Layout.map->faultsTolerated == 0) {
1341 /* Parity for RAID 0 is trivially correct */
1342 raidPtr->parity_good = RF_RAID_CLEAN;
1343 return(0);
1344 }
1345
1346 if (raidPtr->parity_rewrite_in_progress == 1) {
1347 /* Re-write is already in progress! */
1348 return(EINVAL);
1349 }
1350
1351 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1352 rf_RewriteParityThread,
1353 raidPtr,"raid_parity");
1354 return (retcode);
1355
1356
1357 case RAIDFRAME_ADD_HOT_SPARE:
1358 sparePtr = (RF_SingleComponent_t *) data;
1359 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1360 retcode = rf_add_hot_spare(raidPtr, &component);
1361 return(retcode);
1362
1363 case RAIDFRAME_REMOVE_HOT_SPARE:
1364 return(retcode);
1365
1366 case RAIDFRAME_DELETE_COMPONENT:
1367 componentPtr = (RF_SingleComponent_t *)data;
1368 memcpy( &component, componentPtr,
1369 sizeof(RF_SingleComponent_t));
1370 retcode = rf_delete_component(raidPtr, &component);
1371 return(retcode);
1372
1373 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1374 componentPtr = (RF_SingleComponent_t *)data;
1375 memcpy( &component, componentPtr,
1376 sizeof(RF_SingleComponent_t));
1377 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1378 return(retcode);
1379
1380 case RAIDFRAME_REBUILD_IN_PLACE:
1381
1382 if (raidPtr->Layout.map->faultsTolerated == 0) {
1383 /* Can't do this on a RAID 0!! */
1384 return(EINVAL);
1385 }
1386
1387 if (raidPtr->recon_in_progress == 1) {
1388 /* a reconstruct is already in progress! */
1389 return(EINVAL);
1390 }
1391
1392 componentPtr = (RF_SingleComponent_t *) data;
1393 memcpy( &component, componentPtr,
1394 sizeof(RF_SingleComponent_t));
1395 component.row = 0; /* we don't support any more */
1396 column = component.column;
1397
1398 if ((column < 0) || (column >= raidPtr->numCol)) {
1399 return(EINVAL);
1400 }
1401
1402 rf_lock_mutex2(raidPtr->mutex);
1403 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1404 (raidPtr->numFailures > 0)) {
1405 /* XXX 0 above shouldn't be constant!!! */
1406 /* some component other than this has failed.
1407 Let's not make things worse than they already
1408 are... */
1409 printf("raid%d: Unable to reconstruct to disk at:\n",
1410 raidPtr->raidid);
1411 printf("raid%d: Col: %d Too many failures.\n",
1412 raidPtr->raidid, column);
1413 rf_unlock_mutex2(raidPtr->mutex);
1414 return (EINVAL);
1415 }
1416 if (raidPtr->Disks[column].status ==
1417 rf_ds_reconstructing) {
1418 printf("raid%d: Unable to reconstruct to disk at:\n",
1419 raidPtr->raidid);
1420 printf("raid%d: Col: %d Reconstruction already occuring!\n", raidPtr->raidid, column);
1421
1422 rf_unlock_mutex2(raidPtr->mutex);
1423 return (EINVAL);
1424 }
1425 if (raidPtr->Disks[column].status == rf_ds_spared) {
1426 rf_unlock_mutex2(raidPtr->mutex);
1427 return (EINVAL);
1428 }
1429 rf_unlock_mutex2(raidPtr->mutex);
1430
1431 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1432 if (rrcopy == NULL)
1433 return(ENOMEM);
1434
1435 rrcopy->raidPtr = (void *) raidPtr;
1436 rrcopy->col = column;
1437
1438 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1439 rf_ReconstructInPlaceThread,
1440 rrcopy,"raid_reconip");
1441 return(retcode);
1442
1443 case RAIDFRAME_GET_INFO:
1444 if (!raidPtr->valid)
1445 return (ENODEV);
1446 ucfgp = (RF_DeviceConfig_t **) data;
1447 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1448 (RF_DeviceConfig_t *));
1449 if (d_cfg == NULL)
1450 return (ENOMEM);
1451 d_cfg->rows = 1; /* there is only 1 row now */
1452 d_cfg->cols = raidPtr->numCol;
1453 d_cfg->ndevs = raidPtr->numCol;
1454 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1455 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1456 return (ENOMEM);
1457 }
1458 d_cfg->nspares = raidPtr->numSpare;
1459 if (d_cfg->nspares >= RF_MAX_DISKS) {
1460 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1461 return (ENOMEM);
1462 }
1463 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1464 d = 0;
1465 for (j = 0; j < d_cfg->cols; j++) {
1466 d_cfg->devs[d] = raidPtr->Disks[j];
1467 d++;
1468 }
1469 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1470 d_cfg->spares[i] = raidPtr->Disks[j];
1471 if (d_cfg->spares[i].status == rf_ds_rebuilding_spare) {
1472 /* XXX: raidctl(8) expects to see this as a used spare */
1473 d_cfg->spares[i].status = rf_ds_used_spare;
1474 }
1475 }
1476 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1477 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1478
1479 return (retcode);
1480
1481 case RAIDFRAME_CHECK_PARITY:
1482 *(int *) data = raidPtr->parity_good;
1483 return (0);
1484
1485 case RAIDFRAME_PARITYMAP_STATUS:
1486 if (rf_paritymap_ineligible(raidPtr))
1487 return EINVAL;
1488 rf_paritymap_status(raidPtr->parity_map,
1489 (struct rf_pmstat *)data);
1490 return 0;
1491
1492 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1493 if (rf_paritymap_ineligible(raidPtr))
1494 return EINVAL;
1495 if (raidPtr->parity_map == NULL)
1496 return ENOENT; /* ??? */
1497 if (0 != rf_paritymap_set_params(raidPtr->parity_map,
1498 (struct rf_pmparams *)data, 1))
1499 return EINVAL;
1500 return 0;
1501
1502 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1503 if (rf_paritymap_ineligible(raidPtr))
1504 return EINVAL;
1505 *(int *) data = rf_paritymap_get_disable(raidPtr);
1506 return 0;
1507
1508 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1509 if (rf_paritymap_ineligible(raidPtr))
1510 return EINVAL;
1511 rf_paritymap_set_disable(raidPtr, *(int *)data);
1512 /* XXX should errors be passed up? */
1513 return 0;
1514
1515 case RAIDFRAME_RESET_ACCTOTALS:
1516 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1517 return (0);
1518
1519 case RAIDFRAME_GET_ACCTOTALS:
1520 totals = (RF_AccTotals_t *) data;
1521 *totals = raidPtr->acc_totals;
1522 return (0);
1523
1524 case RAIDFRAME_KEEP_ACCTOTALS:
1525 raidPtr->keep_acc_totals = *(int *)data;
1526 return (0);
1527
1528 case RAIDFRAME_GET_SIZE:
1529 *(int *) data = raidPtr->totalSectors;
1530 return (0);
1531
1532 /* fail a disk & optionally start reconstruction */
1533 case RAIDFRAME_FAIL_DISK:
1534
1535 if (raidPtr->Layout.map->faultsTolerated == 0) {
1536 /* Can't do this on a RAID 0!! */
1537 return(EINVAL);
1538 }
1539
1540 rr = (struct rf_recon_req *) data;
1541 rr->row = 0;
1542 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1543 return (EINVAL);
1544
1545
1546 rf_lock_mutex2(raidPtr->mutex);
1547 if (raidPtr->status == rf_rs_reconstructing) {
1548 /* you can't fail a disk while we're reconstructing! */
1549 /* XXX wrong for RAID6 */
1550 rf_unlock_mutex2(raidPtr->mutex);
1551 return (EINVAL);
1552 }
1553 if ((raidPtr->Disks[rr->col].status ==
1554 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1555 /* some other component has failed. Let's not make
1556 things worse. XXX wrong for RAID6 */
1557 rf_unlock_mutex2(raidPtr->mutex);
1558 return (EINVAL);
1559 }
1560 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1561 /* Can't fail a spared disk! */
1562 rf_unlock_mutex2(raidPtr->mutex);
1563 return (EINVAL);
1564 }
1565 rf_unlock_mutex2(raidPtr->mutex);
1566
1567 /* make a copy of the recon request so that we don't rely on
1568 * the user's buffer */
1569 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1570 if (rrcopy == NULL)
1571 return(ENOMEM);
1572 memcpy(rrcopy, rr, sizeof(*rr));
1573 rrcopy->raidPtr = (void *) raidPtr;
1574
1575 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1576 rf_ReconThread,
1577 rrcopy,"raid_recon");
1578 return (0);
1579
1580 /* invoke a copyback operation after recon on whatever disk
1581 * needs it, if any */
1582 case RAIDFRAME_COPYBACK:
1583
1584 if (raidPtr->Layout.map->faultsTolerated == 0) {
1585 /* This makes no sense on a RAID 0!! */
1586 return(EINVAL);
1587 }
1588
1589 if (raidPtr->copyback_in_progress == 1) {
1590 /* Copyback is already in progress! */
1591 return(EINVAL);
1592 }
1593
1594 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1595 rf_CopybackThread,
1596 raidPtr,"raid_copyback");
1597 return (retcode);
1598
1599 /* return the percentage completion of reconstruction */
1600 case RAIDFRAME_CHECK_RECON_STATUS:
1601 if (raidPtr->Layout.map->faultsTolerated == 0) {
1602 /* This makes no sense on a RAID 0, so tell the
1603 user it's done. */
1604 *(int *) data = 100;
1605 return(0);
1606 }
1607 if (raidPtr->status != rf_rs_reconstructing)
1608 *(int *) data = 100;
1609 else {
1610 if (raidPtr->reconControl->numRUsTotal > 0) {
1611 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1612 } else {
1613 *(int *) data = 0;
1614 }
1615 }
1616 return (0);
1617 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1618 progressInfoPtr = (RF_ProgressInfo_t **) data;
1619 if (raidPtr->status != rf_rs_reconstructing) {
1620 progressInfo.remaining = 0;
1621 progressInfo.completed = 100;
1622 progressInfo.total = 100;
1623 } else {
1624 progressInfo.total =
1625 raidPtr->reconControl->numRUsTotal;
1626 progressInfo.completed =
1627 raidPtr->reconControl->numRUsComplete;
1628 progressInfo.remaining = progressInfo.total -
1629 progressInfo.completed;
1630 }
1631 retcode = copyout(&progressInfo, *progressInfoPtr,
1632 sizeof(RF_ProgressInfo_t));
1633 return (retcode);
1634
1635 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1636 if (raidPtr->Layout.map->faultsTolerated == 0) {
1637 /* This makes no sense on a RAID 0, so tell the
1638 user it's done. */
1639 *(int *) data = 100;
1640 return(0);
1641 }
1642 if (raidPtr->parity_rewrite_in_progress == 1) {
1643 *(int *) data = 100 *
1644 raidPtr->parity_rewrite_stripes_done /
1645 raidPtr->Layout.numStripe;
1646 } else {
1647 *(int *) data = 100;
1648 }
1649 return (0);
1650
1651 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1652 progressInfoPtr = (RF_ProgressInfo_t **) data;
1653 if (raidPtr->parity_rewrite_in_progress == 1) {
1654 progressInfo.total = raidPtr->Layout.numStripe;
1655 progressInfo.completed =
1656 raidPtr->parity_rewrite_stripes_done;
1657 progressInfo.remaining = progressInfo.total -
1658 progressInfo.completed;
1659 } else {
1660 progressInfo.remaining = 0;
1661 progressInfo.completed = 100;
1662 progressInfo.total = 100;
1663 }
1664 retcode = copyout(&progressInfo, *progressInfoPtr,
1665 sizeof(RF_ProgressInfo_t));
1666 return (retcode);
1667
1668 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1669 if (raidPtr->Layout.map->faultsTolerated == 0) {
1670 /* This makes no sense on a RAID 0 */
1671 *(int *) data = 100;
1672 return(0);
1673 }
1674 if (raidPtr->copyback_in_progress == 1) {
1675 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1676 raidPtr->Layout.numStripe;
1677 } else {
1678 *(int *) data = 100;
1679 }
1680 return (0);
1681
1682 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1683 progressInfoPtr = (RF_ProgressInfo_t **) data;
1684 if (raidPtr->copyback_in_progress == 1) {
1685 progressInfo.total = raidPtr->Layout.numStripe;
1686 progressInfo.completed =
1687 raidPtr->copyback_stripes_done;
1688 progressInfo.remaining = progressInfo.total -
1689 progressInfo.completed;
1690 } else {
1691 progressInfo.remaining = 0;
1692 progressInfo.completed = 100;
1693 progressInfo.total = 100;
1694 }
1695 retcode = copyout(&progressInfo, *progressInfoPtr,
1696 sizeof(RF_ProgressInfo_t));
1697 return (retcode);
1698
1699 /* the sparetable daemon calls this to wait for the kernel to
1700 * need a spare table. this ioctl does not return until a
1701 * spare table is needed. XXX -- calling mpsleep here in the
1702 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1703 * -- I should either compute the spare table in the kernel,
1704 * or have a different -- XXX XXX -- interface (a different
1705 * character device) for delivering the table -- XXX */
1706 #if 0
1707 case RAIDFRAME_SPARET_WAIT:
1708 rf_lock_mutex2(rf_sparet_wait_mutex);
1709 while (!rf_sparet_wait_queue)
1710 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1711 waitreq = rf_sparet_wait_queue;
1712 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1713 rf_unlock_mutex2(rf_sparet_wait_mutex);
1714
1715 /* structure assignment */
1716 *((RF_SparetWait_t *) data) = *waitreq;
1717
1718 RF_Free(waitreq, sizeof(*waitreq));
1719 return (0);
1720
1721 /* wakes up a process waiting on SPARET_WAIT and puts an error
1722 * code in it that will cause the dameon to exit */
1723 case RAIDFRAME_ABORT_SPARET_WAIT:
1724 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1725 waitreq->fcol = -1;
1726 rf_lock_mutex2(rf_sparet_wait_mutex);
1727 waitreq->next = rf_sparet_wait_queue;
1728 rf_sparet_wait_queue = waitreq;
1729 rf_broadcast_conf2(rf_sparet_wait_cv);
1730 rf_unlock_mutex2(rf_sparet_wait_mutex);
1731 return (0);
1732
1733 /* used by the spare table daemon to deliver a spare table
1734 * into the kernel */
1735 case RAIDFRAME_SEND_SPARET:
1736
1737 /* install the spare table */
1738 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1739
1740 /* respond to the requestor. the return status of the spare
1741 * table installation is passed in the "fcol" field */
1742 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1743 waitreq->fcol = retcode;
1744 rf_lock_mutex2(rf_sparet_wait_mutex);
1745 waitreq->next = rf_sparet_resp_queue;
1746 rf_sparet_resp_queue = waitreq;
1747 rf_broadcast_cond2(rf_sparet_resp_cv);
1748 rf_unlock_mutex2(rf_sparet_wait_mutex);
1749
1750 return (retcode);
1751 #endif
1752
1753 default:
1754 break; /* fall through to the os-specific code below */
1755
1756 }
1757
1758 if (!raidPtr->valid)
1759 return (EINVAL);
1760
1761 /*
1762 * Add support for "regular" device ioctls here.
1763 */
1764
1765 error = disk_ioctl(&rs->sc_dkdev, cmd, data, flag, l);
1766 if (error != EPASSTHROUGH)
1767 return (error);
1768
1769 switch (cmd) {
1770 case DIOCGDINFO:
1771 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1772 break;
1773 #ifdef __HAVE_OLD_DISKLABEL
1774 case ODIOCGDINFO:
1775 newlabel = *(rs->sc_dkdev.dk_label);
1776 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1777 return ENOTTY;
1778 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1779 break;
1780 #endif
1781
1782 case DIOCGPART:
1783 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1784 ((struct partinfo *) data)->part =
1785 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1786 break;
1787
1788 case DIOCWDINFO:
1789 case DIOCSDINFO:
1790 #ifdef __HAVE_OLD_DISKLABEL
1791 case ODIOCWDINFO:
1792 case ODIOCSDINFO:
1793 #endif
1794 {
1795 struct disklabel *lp;
1796 #ifdef __HAVE_OLD_DISKLABEL
1797 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1798 memset(&newlabel, 0, sizeof newlabel);
1799 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1800 lp = &newlabel;
1801 } else
1802 #endif
1803 lp = (struct disklabel *)data;
1804
1805 if ((error = raidlock(rs)) != 0)
1806 return (error);
1807
1808 rs->sc_flags |= RAIDF_LABELLING;
1809
1810 error = setdisklabel(rs->sc_dkdev.dk_label,
1811 lp, 0, rs->sc_dkdev.dk_cpulabel);
1812 if (error == 0) {
1813 if (cmd == DIOCWDINFO
1814 #ifdef __HAVE_OLD_DISKLABEL
1815 || cmd == ODIOCWDINFO
1816 #endif
1817 )
1818 error = writedisklabel(RAIDLABELDEV(dev),
1819 raidstrategy, rs->sc_dkdev.dk_label,
1820 rs->sc_dkdev.dk_cpulabel);
1821 }
1822 rs->sc_flags &= ~RAIDF_LABELLING;
1823
1824 raidunlock(rs);
1825
1826 if (error)
1827 return (error);
1828 break;
1829 }
1830
1831 case DIOCWLABEL:
1832 if (*(int *) data != 0)
1833 rs->sc_flags |= RAIDF_WLABEL;
1834 else
1835 rs->sc_flags &= ~RAIDF_WLABEL;
1836 break;
1837
1838 case DIOCGDEFLABEL:
1839 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1840 break;
1841
1842 #ifdef __HAVE_OLD_DISKLABEL
1843 case ODIOCGDEFLABEL:
1844 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1845 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1846 return ENOTTY;
1847 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1848 break;
1849 #endif
1850
1851 case DIOCAWEDGE:
1852 case DIOCDWEDGE:
1853 dkw = (void *)data;
1854
1855 /* If the ioctl happens here, the parent is us. */
1856 (void)strcpy(dkw->dkw_parent, rs->sc_xname);
1857 return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw);
1858
1859 case DIOCLWEDGES:
1860 return dkwedge_list(&rs->sc_dkdev,
1861 (struct dkwedge_list *)data, l);
1862 case DIOCCACHESYNC:
1863 return rf_sync_component_caches(raidPtr);
1864
1865 case DIOCGSTRATEGY:
1866 {
1867 struct disk_strategy *dks = (void *)data;
1868
1869 s = splbio();
1870 strlcpy(dks->dks_name, bufq_getstrategyname(rs->buf_queue),
1871 sizeof(dks->dks_name));
1872 splx(s);
1873 dks->dks_paramlen = 0;
1874
1875 return 0;
1876 }
1877
1878 case DIOCSSTRATEGY:
1879 {
1880 struct disk_strategy *dks = (void *)data;
1881 struct bufq_state *new;
1882 struct bufq_state *old;
1883
1884 if (dks->dks_param != NULL) {
1885 return EINVAL;
1886 }
1887 dks->dks_name[sizeof(dks->dks_name) - 1] = 0; /* ensure term */
1888 error = bufq_alloc(&new, dks->dks_name,
1889 BUFQ_EXACT|BUFQ_SORT_RAWBLOCK);
1890 if (error) {
1891 return error;
1892 }
1893 s = splbio();
1894 old = rs->buf_queue;
1895 bufq_move(new, old);
1896 rs->buf_queue = new;
1897 splx(s);
1898 bufq_free(old);
1899
1900 return 0;
1901 }
1902
1903 default:
1904 retcode = ENOTTY;
1905 }
1906 return (retcode);
1907
1908 }
1909
1910
1911 /* raidinit -- complete the rest of the initialization for the
1912 RAIDframe device. */
1913
1914
1915 static void
1916 raidinit(RF_Raid_t *raidPtr)
1917 {
1918 cfdata_t cf;
1919 struct raid_softc *rs;
1920 int unit;
1921
1922 unit = raidPtr->raidid;
1923
1924 rs = &raid_softc[unit];
1925
1926 /* XXX should check return code first... */
1927 rs->sc_flags |= RAIDF_INITED;
1928
1929 /* XXX doesn't check bounds. */
1930 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1931
1932 /* attach the pseudo device */
1933 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1934 cf->cf_name = raid_cd.cd_name;
1935 cf->cf_atname = raid_cd.cd_name;
1936 cf->cf_unit = unit;
1937 cf->cf_fstate = FSTATE_STAR;
1938
1939 rs->sc_dev = config_attach_pseudo(cf);
1940
1941 if (rs->sc_dev == NULL) {
1942 printf("raid%d: config_attach_pseudo failed\n",
1943 raidPtr->raidid);
1944 rs->sc_flags &= ~RAIDF_INITED;
1945 free(cf, M_RAIDFRAME);
1946 return;
1947 }
1948
1949 /* disk_attach actually creates space for the CPU disklabel, among
1950 * other things, so it's critical to call this *BEFORE* we try putzing
1951 * with disklabels. */
1952
1953 disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1954 disk_attach(&rs->sc_dkdev);
1955 disk_blocksize(&rs->sc_dkdev, raidPtr->bytesPerSector);
1956
1957 /* XXX There may be a weird interaction here between this, and
1958 * protectedSectors, as used in RAIDframe. */
1959
1960 rs->sc_size = raidPtr->totalSectors;
1961
1962 dkwedge_discover(&rs->sc_dkdev);
1963
1964 rf_set_properties(rs, raidPtr);
1965
1966 }
1967 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1968 /* wake up the daemon & tell it to get us a spare table
1969 * XXX
1970 * the entries in the queues should be tagged with the raidPtr
1971 * so that in the extremely rare case that two recons happen at once,
1972 * we know for which device were requesting a spare table
1973 * XXX
1974 *
1975 * XXX This code is not currently used. GO
1976 */
1977 int
1978 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1979 {
1980 int retcode;
1981
1982 rf_lock_mutex2(rf_sparet_wait_mutex);
1983 req->next = rf_sparet_wait_queue;
1984 rf_sparet_wait_queue = req;
1985 rf_broadcast_cond2(rf_sparet_wait_cv);
1986
1987 /* mpsleep unlocks the mutex */
1988 while (!rf_sparet_resp_queue) {
1989 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
1990 }
1991 req = rf_sparet_resp_queue;
1992 rf_sparet_resp_queue = req->next;
1993 rf_unlock_mutex2(rf_sparet_wait_mutex);
1994
1995 retcode = req->fcol;
1996 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1997 * alloc'd */
1998 return (retcode);
1999 }
2000 #endif
2001
2002 /* a wrapper around rf_DoAccess that extracts appropriate info from the
2003 * bp & passes it down.
2004 * any calls originating in the kernel must use non-blocking I/O
2005 * do some extra sanity checking to return "appropriate" error values for
2006 * certain conditions (to make some standard utilities work)
2007 *
2008 * Formerly known as: rf_DoAccessKernel
2009 */
2010 void
2011 raidstart(RF_Raid_t *raidPtr)
2012 {
2013 RF_SectorCount_t num_blocks, pb, sum;
2014 RF_RaidAddr_t raid_addr;
2015 struct partition *pp;
2016 daddr_t blocknum;
2017 int unit;
2018 struct raid_softc *rs;
2019 int do_async;
2020 struct buf *bp;
2021 int rc;
2022
2023 unit = raidPtr->raidid;
2024 rs = &raid_softc[unit];
2025
2026 /* quick check to see if anything has died recently */
2027 rf_lock_mutex2(raidPtr->mutex);
2028 if (raidPtr->numNewFailures > 0) {
2029 rf_unlock_mutex2(raidPtr->mutex);
2030 rf_update_component_labels(raidPtr,
2031 RF_NORMAL_COMPONENT_UPDATE);
2032 rf_lock_mutex2(raidPtr->mutex);
2033 raidPtr->numNewFailures--;
2034 }
2035
2036 /* Check to see if we're at the limit... */
2037 while (raidPtr->openings > 0) {
2038 rf_unlock_mutex2(raidPtr->mutex);
2039
2040 /* get the next item, if any, from the queue */
2041 if ((bp = bufq_get(rs->buf_queue)) == NULL) {
2042 /* nothing more to do */
2043 return;
2044 }
2045
2046 /* Ok, for the bp we have here, bp->b_blkno is relative to the
2047 * partition.. Need to make it absolute to the underlying
2048 * device.. */
2049
2050 blocknum = bp->b_blkno << DEV_BSHIFT >> raidPtr->logBytesPerSector;
2051 if (DISKPART(bp->b_dev) != RAW_PART) {
2052 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
2053 blocknum += pp->p_offset;
2054 }
2055
2056 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
2057 (int) blocknum));
2058
2059 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
2060 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
2061
2062 /* *THIS* is where we adjust what block we're going to...
2063 * but DO NOT TOUCH bp->b_blkno!!! */
2064 raid_addr = blocknum;
2065
2066 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
2067 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
2068 sum = raid_addr + num_blocks + pb;
2069 if (1 || rf_debugKernelAccess) {
2070 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
2071 (int) raid_addr, (int) sum, (int) num_blocks,
2072 (int) pb, (int) bp->b_resid));
2073 }
2074 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
2075 || (sum < num_blocks) || (sum < pb)) {
2076 bp->b_error = ENOSPC;
2077 bp->b_resid = bp->b_bcount;
2078 biodone(bp);
2079 rf_lock_mutex2(raidPtr->mutex);
2080 continue;
2081 }
2082 /*
2083 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
2084 */
2085
2086 if (bp->b_bcount & raidPtr->sectorMask) {
2087 bp->b_error = EINVAL;
2088 bp->b_resid = bp->b_bcount;
2089 biodone(bp);
2090 rf_lock_mutex2(raidPtr->mutex);
2091 continue;
2092
2093 }
2094 db1_printf(("Calling DoAccess..\n"));
2095
2096
2097 rf_lock_mutex2(raidPtr->mutex);
2098 raidPtr->openings--;
2099 rf_unlock_mutex2(raidPtr->mutex);
2100
2101 /*
2102 * Everything is async.
2103 */
2104 do_async = 1;
2105
2106 disk_busy(&rs->sc_dkdev);
2107
2108 /* XXX we're still at splbio() here... do we *really*
2109 need to be? */
2110
2111 /* don't ever condition on bp->b_flags & B_WRITE.
2112 * always condition on B_READ instead */
2113
2114 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
2115 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
2116 do_async, raid_addr, num_blocks,
2117 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
2118
2119 if (rc) {
2120 bp->b_error = rc;
2121 bp->b_resid = bp->b_bcount;
2122 biodone(bp);
2123 /* continue loop */
2124 }
2125
2126 rf_lock_mutex2(raidPtr->mutex);
2127 }
2128 rf_unlock_mutex2(raidPtr->mutex);
2129 }
2130
2131
2132
2133
2134 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
2135
2136 int
2137 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
2138 {
2139 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2140 struct buf *bp;
2141
2142 req->queue = queue;
2143 bp = req->bp;
2144
2145 switch (req->type) {
2146 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2147 /* XXX need to do something extra here.. */
2148 /* I'm leaving this in, as I've never actually seen it used,
2149 * and I'd like folks to report it... GO */
2150 printf(("WAKEUP CALLED\n"));
2151 queue->numOutstanding++;
2152
2153 bp->b_flags = 0;
2154 bp->b_private = req;
2155
2156 KernelWakeupFunc(bp);
2157 break;
2158
2159 case RF_IO_TYPE_READ:
2160 case RF_IO_TYPE_WRITE:
2161 #if RF_ACC_TRACE > 0
2162 if (req->tracerec) {
2163 RF_ETIMER_START(req->tracerec->timer);
2164 }
2165 #endif
2166 InitBP(bp, queue->rf_cinfo->ci_vp,
2167 op, queue->rf_cinfo->ci_dev,
2168 req->sectorOffset, req->numSector,
2169 req->buf, KernelWakeupFunc, (void *) req,
2170 queue->raidPtr->logBytesPerSector, req->b_proc);
2171
2172 if (rf_debugKernelAccess) {
2173 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2174 (long) bp->b_blkno));
2175 }
2176 queue->numOutstanding++;
2177 queue->last_deq_sector = req->sectorOffset;
2178 /* acc wouldn't have been let in if there were any pending
2179 * reqs at any other priority */
2180 queue->curPriority = req->priority;
2181
2182 db1_printf(("Going for %c to unit %d col %d\n",
2183 req->type, queue->raidPtr->raidid,
2184 queue->col));
2185 db1_printf(("sector %d count %d (%d bytes) %d\n",
2186 (int) req->sectorOffset, (int) req->numSector,
2187 (int) (req->numSector <<
2188 queue->raidPtr->logBytesPerSector),
2189 (int) queue->raidPtr->logBytesPerSector));
2190
2191 /*
2192 * XXX: drop lock here since this can block at
2193 * least with backing SCSI devices. Retake it
2194 * to minimize fuss with calling interfaces.
2195 */
2196
2197 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2198 bdev_strategy(bp);
2199 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2200 break;
2201
2202 default:
2203 panic("bad req->type in rf_DispatchKernelIO");
2204 }
2205 db1_printf(("Exiting from DispatchKernelIO\n"));
2206
2207 return (0);
2208 }
2209 /* this is the callback function associated with a I/O invoked from
2210 kernel code.
2211 */
2212 static void
2213 KernelWakeupFunc(struct buf *bp)
2214 {
2215 RF_DiskQueueData_t *req = NULL;
2216 RF_DiskQueue_t *queue;
2217
2218 db1_printf(("recovering the request queue:\n"));
2219
2220 req = bp->b_private;
2221
2222 queue = (RF_DiskQueue_t *) req->queue;
2223
2224 rf_lock_mutex2(queue->raidPtr->iodone_lock);
2225
2226 #if RF_ACC_TRACE > 0
2227 if (req->tracerec) {
2228 RF_ETIMER_STOP(req->tracerec->timer);
2229 RF_ETIMER_EVAL(req->tracerec->timer);
2230 rf_lock_mutex2(rf_tracing_mutex);
2231 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2232 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2233 req->tracerec->num_phys_ios++;
2234 rf_unlock_mutex2(rf_tracing_mutex);
2235 }
2236 #endif
2237
2238 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2239 * ballistic, and mark the component as hosed... */
2240
2241 if (bp->b_error != 0) {
2242 /* Mark the disk as dead */
2243 /* but only mark it once... */
2244 /* and only if it wouldn't leave this RAID set
2245 completely broken */
2246 if (((queue->raidPtr->Disks[queue->col].status ==
2247 rf_ds_optimal) ||
2248 (queue->raidPtr->Disks[queue->col].status ==
2249 rf_ds_used_spare)) &&
2250 (queue->raidPtr->numFailures <
2251 queue->raidPtr->Layout.map->faultsTolerated)) {
2252 printf("raid%d: IO Error. Marking %s as failed.\n",
2253 queue->raidPtr->raidid,
2254 queue->raidPtr->Disks[queue->col].devname);
2255 queue->raidPtr->Disks[queue->col].status =
2256 rf_ds_failed;
2257 queue->raidPtr->status = rf_rs_degraded;
2258 queue->raidPtr->numFailures++;
2259 queue->raidPtr->numNewFailures++;
2260 } else { /* Disk is already dead... */
2261 /* printf("Disk already marked as dead!\n"); */
2262 }
2263
2264 }
2265
2266 /* Fill in the error value */
2267 req->error = bp->b_error;
2268
2269 /* Drop this one on the "finished" queue... */
2270 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2271
2272 /* Let the raidio thread know there is work to be done. */
2273 rf_signal_cond2(queue->raidPtr->iodone_cv);
2274
2275 rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2276 }
2277
2278
2279 /*
2280 * initialize a buf structure for doing an I/O in the kernel.
2281 */
2282 static void
2283 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2284 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2285 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2286 struct proc *b_proc)
2287 {
2288 /* bp->b_flags = B_PHYS | rw_flag; */
2289 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */
2290 bp->b_oflags = 0;
2291 bp->b_cflags = 0;
2292 bp->b_bcount = numSect << logBytesPerSector;
2293 bp->b_bufsize = bp->b_bcount;
2294 bp->b_error = 0;
2295 bp->b_dev = dev;
2296 bp->b_data = bf;
2297 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2298 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2299 if (bp->b_bcount == 0) {
2300 panic("bp->b_bcount is zero in InitBP!!");
2301 }
2302 bp->b_proc = b_proc;
2303 bp->b_iodone = cbFunc;
2304 bp->b_private = cbArg;
2305 }
2306
2307 static void
2308 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2309 struct disklabel *lp)
2310 {
2311 memset(lp, 0, sizeof(*lp));
2312
2313 /* fabricate a label... */
2314 lp->d_secperunit = raidPtr->totalSectors;
2315 lp->d_secsize = raidPtr->bytesPerSector;
2316 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2317 lp->d_ntracks = 4 * raidPtr->numCol;
2318 lp->d_ncylinders = raidPtr->totalSectors /
2319 (lp->d_nsectors * lp->d_ntracks);
2320 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2321
2322 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2323 lp->d_type = DTYPE_RAID;
2324 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2325 lp->d_rpm = 3600;
2326 lp->d_interleave = 1;
2327 lp->d_flags = 0;
2328
2329 lp->d_partitions[RAW_PART].p_offset = 0;
2330 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2331 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2332 lp->d_npartitions = RAW_PART + 1;
2333
2334 lp->d_magic = DISKMAGIC;
2335 lp->d_magic2 = DISKMAGIC;
2336 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2337
2338 }
2339 /*
2340 * Read the disklabel from the raid device. If one is not present, fake one
2341 * up.
2342 */
2343 static void
2344 raidgetdisklabel(dev_t dev)
2345 {
2346 int unit = raidunit(dev);
2347 struct raid_softc *rs = &raid_softc[unit];
2348 const char *errstring;
2349 struct disklabel *lp = rs->sc_dkdev.dk_label;
2350 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2351 RF_Raid_t *raidPtr;
2352
2353 db1_printf(("Getting the disklabel...\n"));
2354
2355 memset(clp, 0, sizeof(*clp));
2356
2357 raidPtr = raidPtrs[unit];
2358
2359 raidgetdefaultlabel(raidPtr, rs, lp);
2360
2361 /*
2362 * Call the generic disklabel extraction routine.
2363 */
2364 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2365 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2366 if (errstring)
2367 raidmakedisklabel(rs);
2368 else {
2369 int i;
2370 struct partition *pp;
2371
2372 /*
2373 * Sanity check whether the found disklabel is valid.
2374 *
2375 * This is necessary since total size of the raid device
2376 * may vary when an interleave is changed even though exactly
2377 * same components are used, and old disklabel may used
2378 * if that is found.
2379 */
2380 if (lp->d_secperunit != rs->sc_size)
2381 printf("raid%d: WARNING: %s: "
2382 "total sector size in disklabel (%" PRIu32 ") != "
2383 "the size of raid (%" PRIu64 ")\n", unit, rs->sc_xname,
2384 lp->d_secperunit, rs->sc_size);
2385 for (i = 0; i < lp->d_npartitions; i++) {
2386 pp = &lp->d_partitions[i];
2387 if (pp->p_offset + pp->p_size > rs->sc_size)
2388 printf("raid%d: WARNING: %s: end of partition `%c' "
2389 "exceeds the size of raid (%" PRIu64 ")\n",
2390 unit, rs->sc_xname, 'a' + i, rs->sc_size);
2391 }
2392 }
2393
2394 }
2395 /*
2396 * Take care of things one might want to take care of in the event
2397 * that a disklabel isn't present.
2398 */
2399 static void
2400 raidmakedisklabel(struct raid_softc *rs)
2401 {
2402 struct disklabel *lp = rs->sc_dkdev.dk_label;
2403 db1_printf(("Making a label..\n"));
2404
2405 /*
2406 * For historical reasons, if there's no disklabel present
2407 * the raw partition must be marked FS_BSDFFS.
2408 */
2409
2410 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2411
2412 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2413
2414 lp->d_checksum = dkcksum(lp);
2415 }
2416 /*
2417 * Wait interruptibly for an exclusive lock.
2418 *
2419 * XXX
2420 * Several drivers do this; it should be abstracted and made MP-safe.
2421 * (Hmm... where have we seen this warning before :-> GO )
2422 */
2423 static int
2424 raidlock(struct raid_softc *rs)
2425 {
2426 int error;
2427
2428 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2429 rs->sc_flags |= RAIDF_WANTED;
2430 if ((error =
2431 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2432 return (error);
2433 }
2434 rs->sc_flags |= RAIDF_LOCKED;
2435 return (0);
2436 }
2437 /*
2438 * Unlock and wake up any waiters.
2439 */
2440 static void
2441 raidunlock(struct raid_softc *rs)
2442 {
2443
2444 rs->sc_flags &= ~RAIDF_LOCKED;
2445 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2446 rs->sc_flags &= ~RAIDF_WANTED;
2447 wakeup(rs);
2448 }
2449 }
2450
2451
2452 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2453 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2454 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2455
2456 static daddr_t
2457 rf_component_info_offset(void)
2458 {
2459
2460 return RF_COMPONENT_INFO_OFFSET;
2461 }
2462
2463 static daddr_t
2464 rf_component_info_size(unsigned secsize)
2465 {
2466 daddr_t info_size;
2467
2468 KASSERT(secsize);
2469 if (secsize > RF_COMPONENT_INFO_SIZE)
2470 info_size = secsize;
2471 else
2472 info_size = RF_COMPONENT_INFO_SIZE;
2473
2474 return info_size;
2475 }
2476
2477 static daddr_t
2478 rf_parity_map_offset(RF_Raid_t *raidPtr)
2479 {
2480 daddr_t map_offset;
2481
2482 KASSERT(raidPtr->bytesPerSector);
2483 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2484 map_offset = raidPtr->bytesPerSector;
2485 else
2486 map_offset = RF_COMPONENT_INFO_SIZE;
2487 map_offset += rf_component_info_offset();
2488
2489 return map_offset;
2490 }
2491
2492 static daddr_t
2493 rf_parity_map_size(RF_Raid_t *raidPtr)
2494 {
2495 daddr_t map_size;
2496
2497 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2498 map_size = raidPtr->bytesPerSector;
2499 else
2500 map_size = RF_PARITY_MAP_SIZE;
2501
2502 return map_size;
2503 }
2504
2505 int
2506 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2507 {
2508 RF_ComponentLabel_t *clabel;
2509
2510 clabel = raidget_component_label(raidPtr, col);
2511 clabel->clean = RF_RAID_CLEAN;
2512 raidflush_component_label(raidPtr, col);
2513 return(0);
2514 }
2515
2516
2517 int
2518 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2519 {
2520 RF_ComponentLabel_t *clabel;
2521
2522 clabel = raidget_component_label(raidPtr, col);
2523 clabel->clean = RF_RAID_DIRTY;
2524 raidflush_component_label(raidPtr, col);
2525 return(0);
2526 }
2527
2528 int
2529 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2530 {
2531 KASSERT(raidPtr->bytesPerSector);
2532 return raidread_component_label(raidPtr->bytesPerSector,
2533 raidPtr->Disks[col].dev,
2534 raidPtr->raid_cinfo[col].ci_vp,
2535 &raidPtr->raid_cinfo[col].ci_label);
2536 }
2537
2538 RF_ComponentLabel_t *
2539 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2540 {
2541 return &raidPtr->raid_cinfo[col].ci_label;
2542 }
2543
2544 int
2545 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2546 {
2547 RF_ComponentLabel_t *label;
2548
2549 label = &raidPtr->raid_cinfo[col].ci_label;
2550 label->mod_counter = raidPtr->mod_counter;
2551 #ifndef RF_NO_PARITY_MAP
2552 label->parity_map_modcount = label->mod_counter;
2553 #endif
2554 return raidwrite_component_label(raidPtr->bytesPerSector,
2555 raidPtr->Disks[col].dev,
2556 raidPtr->raid_cinfo[col].ci_vp, label);
2557 }
2558
2559
2560 static int
2561 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2562 RF_ComponentLabel_t *clabel)
2563 {
2564 return raidread_component_area(dev, b_vp, clabel,
2565 sizeof(RF_ComponentLabel_t),
2566 rf_component_info_offset(),
2567 rf_component_info_size(secsize));
2568 }
2569
2570 /* ARGSUSED */
2571 static int
2572 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2573 size_t msize, daddr_t offset, daddr_t dsize)
2574 {
2575 struct buf *bp;
2576 const struct bdevsw *bdev;
2577 int error;
2578
2579 /* XXX should probably ensure that we don't try to do this if
2580 someone has changed rf_protected_sectors. */
2581
2582 if (b_vp == NULL) {
2583 /* For whatever reason, this component is not valid.
2584 Don't try to read a component label from it. */
2585 return(EINVAL);
2586 }
2587
2588 /* get a block of the appropriate size... */
2589 bp = geteblk((int)dsize);
2590 bp->b_dev = dev;
2591
2592 /* get our ducks in a row for the read */
2593 bp->b_blkno = offset / DEV_BSIZE;
2594 bp->b_bcount = dsize;
2595 bp->b_flags |= B_READ;
2596 bp->b_resid = dsize;
2597
2598 bdev = bdevsw_lookup(bp->b_dev);
2599 if (bdev == NULL)
2600 return (ENXIO);
2601 (*bdev->d_strategy)(bp);
2602
2603 error = biowait(bp);
2604
2605 if (!error) {
2606 memcpy(data, bp->b_data, msize);
2607 }
2608
2609 brelse(bp, 0);
2610 return(error);
2611 }
2612
2613
2614 static int
2615 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2616 RF_ComponentLabel_t *clabel)
2617 {
2618 return raidwrite_component_area(dev, b_vp, clabel,
2619 sizeof(RF_ComponentLabel_t),
2620 rf_component_info_offset(),
2621 rf_component_info_size(secsize), 0);
2622 }
2623
2624 /* ARGSUSED */
2625 static int
2626 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2627 size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2628 {
2629 struct buf *bp;
2630 const struct bdevsw *bdev;
2631 int error;
2632
2633 /* get a block of the appropriate size... */
2634 bp = geteblk((int)dsize);
2635 bp->b_dev = dev;
2636
2637 /* get our ducks in a row for the write */
2638 bp->b_blkno = offset / DEV_BSIZE;
2639 bp->b_bcount = dsize;
2640 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2641 bp->b_resid = dsize;
2642
2643 memset(bp->b_data, 0, dsize);
2644 memcpy(bp->b_data, data, msize);
2645
2646 bdev = bdevsw_lookup(bp->b_dev);
2647 if (bdev == NULL)
2648 return (ENXIO);
2649 (*bdev->d_strategy)(bp);
2650 if (asyncp)
2651 return 0;
2652 error = biowait(bp);
2653 brelse(bp, 0);
2654 if (error) {
2655 #if 1
2656 printf("Failed to write RAID component info!\n");
2657 #endif
2658 }
2659
2660 return(error);
2661 }
2662
2663 void
2664 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2665 {
2666 int c;
2667
2668 for (c = 0; c < raidPtr->numCol; c++) {
2669 /* Skip dead disks. */
2670 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2671 continue;
2672 /* XXXjld: what if an error occurs here? */
2673 raidwrite_component_area(raidPtr->Disks[c].dev,
2674 raidPtr->raid_cinfo[c].ci_vp, map,
2675 RF_PARITYMAP_NBYTE,
2676 rf_parity_map_offset(raidPtr),
2677 rf_parity_map_size(raidPtr), 0);
2678 }
2679 }
2680
2681 void
2682 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2683 {
2684 struct rf_paritymap_ondisk tmp;
2685 int c,first;
2686
2687 first=1;
2688 for (c = 0; c < raidPtr->numCol; c++) {
2689 /* Skip dead disks. */
2690 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2691 continue;
2692 raidread_component_area(raidPtr->Disks[c].dev,
2693 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2694 RF_PARITYMAP_NBYTE,
2695 rf_parity_map_offset(raidPtr),
2696 rf_parity_map_size(raidPtr));
2697 if (first) {
2698 memcpy(map, &tmp, sizeof(*map));
2699 first = 0;
2700 } else {
2701 rf_paritymap_merge(map, &tmp);
2702 }
2703 }
2704 }
2705
2706 void
2707 rf_markalldirty(RF_Raid_t *raidPtr)
2708 {
2709 RF_ComponentLabel_t *clabel;
2710 int sparecol;
2711 int c;
2712 int j;
2713 int scol = -1;
2714
2715 raidPtr->mod_counter++;
2716 for (c = 0; c < raidPtr->numCol; c++) {
2717 /* we don't want to touch (at all) a disk that has
2718 failed */
2719 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2720 clabel = raidget_component_label(raidPtr, c);
2721 if (clabel->status == rf_ds_spared) {
2722 /* XXX do something special...
2723 but whatever you do, don't
2724 try to access it!! */
2725 } else {
2726 raidmarkdirty(raidPtr, c);
2727 }
2728 }
2729 }
2730
2731 for( c = 0; c < raidPtr->numSpare ; c++) {
2732 sparecol = raidPtr->numCol + c;
2733 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2734 /*
2735
2736 we claim this disk is "optimal" if it's
2737 rf_ds_used_spare, as that means it should be
2738 directly substitutable for the disk it replaced.
2739 We note that too...
2740
2741 */
2742
2743 for(j=0;j<raidPtr->numCol;j++) {
2744 if (raidPtr->Disks[j].spareCol == sparecol) {
2745 scol = j;
2746 break;
2747 }
2748 }
2749
2750 clabel = raidget_component_label(raidPtr, sparecol);
2751 /* make sure status is noted */
2752
2753 raid_init_component_label(raidPtr, clabel);
2754
2755 clabel->row = 0;
2756 clabel->column = scol;
2757 /* Note: we *don't* change status from rf_ds_used_spare
2758 to rf_ds_optimal */
2759 /* clabel.status = rf_ds_optimal; */
2760
2761 raidmarkdirty(raidPtr, sparecol);
2762 }
2763 }
2764 }
2765
2766
2767 void
2768 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2769 {
2770 RF_ComponentLabel_t *clabel;
2771 int sparecol;
2772 int c;
2773 int j;
2774 int scol;
2775
2776 scol = -1;
2777
2778 /* XXX should do extra checks to make sure things really are clean,
2779 rather than blindly setting the clean bit... */
2780
2781 raidPtr->mod_counter++;
2782
2783 for (c = 0; c < raidPtr->numCol; c++) {
2784 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2785 clabel = raidget_component_label(raidPtr, c);
2786 /* make sure status is noted */
2787 clabel->status = rf_ds_optimal;
2788
2789 /* note what unit we are configured as */
2790 clabel->last_unit = raidPtr->raidid;
2791
2792 raidflush_component_label(raidPtr, c);
2793 if (final == RF_FINAL_COMPONENT_UPDATE) {
2794 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2795 raidmarkclean(raidPtr, c);
2796 }
2797 }
2798 }
2799 /* else we don't touch it.. */
2800 }
2801
2802 for( c = 0; c < raidPtr->numSpare ; c++) {
2803 sparecol = raidPtr->numCol + c;
2804 /* Need to ensure that the reconstruct actually completed! */
2805 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2806 /*
2807
2808 we claim this disk is "optimal" if it's
2809 rf_ds_used_spare, as that means it should be
2810 directly substitutable for the disk it replaced.
2811 We note that too...
2812
2813 */
2814
2815 for(j=0;j<raidPtr->numCol;j++) {
2816 if (raidPtr->Disks[j].spareCol == sparecol) {
2817 scol = j;
2818 break;
2819 }
2820 }
2821
2822 /* XXX shouldn't *really* need this... */
2823 clabel = raidget_component_label(raidPtr, sparecol);
2824 /* make sure status is noted */
2825
2826 raid_init_component_label(raidPtr, clabel);
2827
2828 clabel->column = scol;
2829 clabel->status = rf_ds_optimal;
2830 clabel->last_unit = raidPtr->raidid;
2831
2832 raidflush_component_label(raidPtr, sparecol);
2833 if (final == RF_FINAL_COMPONENT_UPDATE) {
2834 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2835 raidmarkclean(raidPtr, sparecol);
2836 }
2837 }
2838 }
2839 }
2840 }
2841
2842 void
2843 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2844 {
2845
2846 if (vp != NULL) {
2847 if (auto_configured == 1) {
2848 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2849 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2850 vput(vp);
2851
2852 } else {
2853 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2854 }
2855 }
2856 }
2857
2858
2859 void
2860 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2861 {
2862 int r,c;
2863 struct vnode *vp;
2864 int acd;
2865
2866
2867 /* We take this opportunity to close the vnodes like we should.. */
2868
2869 for (c = 0; c < raidPtr->numCol; c++) {
2870 vp = raidPtr->raid_cinfo[c].ci_vp;
2871 acd = raidPtr->Disks[c].auto_configured;
2872 rf_close_component(raidPtr, vp, acd);
2873 raidPtr->raid_cinfo[c].ci_vp = NULL;
2874 raidPtr->Disks[c].auto_configured = 0;
2875 }
2876
2877 for (r = 0; r < raidPtr->numSpare; r++) {
2878 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2879 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2880 rf_close_component(raidPtr, vp, acd);
2881 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2882 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2883 }
2884 }
2885
2886
2887 void
2888 rf_ReconThread(struct rf_recon_req *req)
2889 {
2890 int s;
2891 RF_Raid_t *raidPtr;
2892
2893 s = splbio();
2894 raidPtr = (RF_Raid_t *) req->raidPtr;
2895 raidPtr->recon_in_progress = 1;
2896
2897 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2898 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2899
2900 RF_Free(req, sizeof(*req));
2901
2902 raidPtr->recon_in_progress = 0;
2903 splx(s);
2904
2905 /* That's all... */
2906 kthread_exit(0); /* does not return */
2907 }
2908
2909 void
2910 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2911 {
2912 int retcode;
2913 int s;
2914
2915 raidPtr->parity_rewrite_stripes_done = 0;
2916 raidPtr->parity_rewrite_in_progress = 1;
2917 s = splbio();
2918 retcode = rf_RewriteParity(raidPtr);
2919 splx(s);
2920 if (retcode) {
2921 printf("raid%d: Error re-writing parity (%d)!\n",
2922 raidPtr->raidid, retcode);
2923 } else {
2924 /* set the clean bit! If we shutdown correctly,
2925 the clean bit on each component label will get
2926 set */
2927 raidPtr->parity_good = RF_RAID_CLEAN;
2928 }
2929 raidPtr->parity_rewrite_in_progress = 0;
2930
2931 /* Anyone waiting for us to stop? If so, inform them... */
2932 if (raidPtr->waitShutdown) {
2933 wakeup(&raidPtr->parity_rewrite_in_progress);
2934 }
2935
2936 /* That's all... */
2937 kthread_exit(0); /* does not return */
2938 }
2939
2940
2941 void
2942 rf_CopybackThread(RF_Raid_t *raidPtr)
2943 {
2944 int s;
2945
2946 raidPtr->copyback_in_progress = 1;
2947 s = splbio();
2948 rf_CopybackReconstructedData(raidPtr);
2949 splx(s);
2950 raidPtr->copyback_in_progress = 0;
2951
2952 /* That's all... */
2953 kthread_exit(0); /* does not return */
2954 }
2955
2956
2957 void
2958 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
2959 {
2960 int s;
2961 RF_Raid_t *raidPtr;
2962
2963 s = splbio();
2964 raidPtr = req->raidPtr;
2965 raidPtr->recon_in_progress = 1;
2966 rf_ReconstructInPlace(raidPtr, req->col);
2967 RF_Free(req, sizeof(*req));
2968 raidPtr->recon_in_progress = 0;
2969 splx(s);
2970
2971 /* That's all... */
2972 kthread_exit(0); /* does not return */
2973 }
2974
2975 static RF_AutoConfig_t *
2976 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
2977 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
2978 unsigned secsize)
2979 {
2980 int good_one = 0;
2981 RF_ComponentLabel_t *clabel;
2982 RF_AutoConfig_t *ac;
2983
2984 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
2985 if (clabel == NULL) {
2986 oomem:
2987 while(ac_list) {
2988 ac = ac_list;
2989 if (ac->clabel)
2990 free(ac->clabel, M_RAIDFRAME);
2991 ac_list = ac_list->next;
2992 free(ac, M_RAIDFRAME);
2993 }
2994 printf("RAID auto config: out of memory!\n");
2995 return NULL; /* XXX probably should panic? */
2996 }
2997
2998 if (!raidread_component_label(secsize, dev, vp, clabel)) {
2999 /* Got the label. Does it look reasonable? */
3000 if (rf_reasonable_label(clabel, numsecs) &&
3001 (rf_component_label_partitionsize(clabel) <= size)) {
3002 #ifdef DEBUG
3003 printf("Component on: %s: %llu\n",
3004 cname, (unsigned long long)size);
3005 rf_print_component_label(clabel);
3006 #endif
3007 /* if it's reasonable, add it, else ignore it. */
3008 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
3009 M_NOWAIT);
3010 if (ac == NULL) {
3011 free(clabel, M_RAIDFRAME);
3012 goto oomem;
3013 }
3014 strlcpy(ac->devname, cname, sizeof(ac->devname));
3015 ac->dev = dev;
3016 ac->vp = vp;
3017 ac->clabel = clabel;
3018 ac->next = ac_list;
3019 ac_list = ac;
3020 good_one = 1;
3021 }
3022 }
3023 if (!good_one) {
3024 /* cleanup */
3025 free(clabel, M_RAIDFRAME);
3026 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3027 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3028 vput(vp);
3029 }
3030 return ac_list;
3031 }
3032
3033 RF_AutoConfig_t *
3034 rf_find_raid_components(void)
3035 {
3036 struct vnode *vp;
3037 struct disklabel label;
3038 device_t dv;
3039 deviter_t di;
3040 dev_t dev;
3041 int bmajor, bminor, wedge, rf_part_found;
3042 int error;
3043 int i;
3044 RF_AutoConfig_t *ac_list;
3045 uint64_t numsecs;
3046 unsigned secsize;
3047
3048 /* initialize the AutoConfig list */
3049 ac_list = NULL;
3050
3051 /* we begin by trolling through *all* the devices on the system */
3052
3053 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
3054 dv = deviter_next(&di)) {
3055
3056 /* we are only interested in disks... */
3057 if (device_class(dv) != DV_DISK)
3058 continue;
3059
3060 /* we don't care about floppies... */
3061 if (device_is_a(dv, "fd")) {
3062 continue;
3063 }
3064
3065 /* we don't care about CD's... */
3066 if (device_is_a(dv, "cd")) {
3067 continue;
3068 }
3069
3070 /* we don't care about md's... */
3071 if (device_is_a(dv, "md")) {
3072 continue;
3073 }
3074
3075 /* hdfd is the Atari/Hades floppy driver */
3076 if (device_is_a(dv, "hdfd")) {
3077 continue;
3078 }
3079
3080 /* fdisa is the Atari/Milan floppy driver */
3081 if (device_is_a(dv, "fdisa")) {
3082 continue;
3083 }
3084
3085 /* need to find the device_name_to_block_device_major stuff */
3086 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
3087
3088 rf_part_found = 0; /*No raid partition as yet*/
3089
3090 /* get a vnode for the raw partition of this disk */
3091
3092 wedge = device_is_a(dv, "dk");
3093 bminor = minor(device_unit(dv));
3094 dev = wedge ? makedev(bmajor, bminor) :
3095 MAKEDISKDEV(bmajor, bminor, RAW_PART);
3096 if (bdevvp(dev, &vp))
3097 panic("RAID can't alloc vnode");
3098
3099 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
3100
3101 if (error) {
3102 /* "Who cares." Continue looking
3103 for something that exists*/
3104 vput(vp);
3105 continue;
3106 }
3107
3108 error = getdisksize(vp, &numsecs, &secsize);
3109 if (error) {
3110 vput(vp);
3111 continue;
3112 }
3113 if (wedge) {
3114 struct dkwedge_info dkw;
3115 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
3116 NOCRED);
3117 if (error) {
3118 printf("RAIDframe: can't get wedge info for "
3119 "dev %s (%d)\n", device_xname(dv), error);
3120 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3121 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3122 vput(vp);
3123 continue;
3124 }
3125
3126 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
3127 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3128 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3129 vput(vp);
3130 continue;
3131 }
3132
3133 ac_list = rf_get_component(ac_list, dev, vp,
3134 device_xname(dv), dkw.dkw_size, numsecs, secsize);
3135 rf_part_found = 1; /*There is a raid component on this disk*/
3136 continue;
3137 }
3138
3139 /* Ok, the disk exists. Go get the disklabel. */
3140 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
3141 if (error) {
3142 /*
3143 * XXX can't happen - open() would
3144 * have errored out (or faked up one)
3145 */
3146 if (error != ENOTTY)
3147 printf("RAIDframe: can't get label for dev "
3148 "%s (%d)\n", device_xname(dv), error);
3149 }
3150
3151 /* don't need this any more. We'll allocate it again
3152 a little later if we really do... */
3153 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3154 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3155 vput(vp);
3156
3157 if (error)
3158 continue;
3159
3160 rf_part_found = 0; /*No raid partitions yet*/
3161 for (i = 0; i < label.d_npartitions; i++) {
3162 char cname[sizeof(ac_list->devname)];
3163
3164 /* We only support partitions marked as RAID */
3165 if (label.d_partitions[i].p_fstype != FS_RAID)
3166 continue;
3167
3168 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
3169 if (bdevvp(dev, &vp))
3170 panic("RAID can't alloc vnode");
3171
3172 error = VOP_OPEN(vp, FREAD, NOCRED);
3173 if (error) {
3174 /* Whatever... */
3175 vput(vp);
3176 continue;
3177 }
3178 snprintf(cname, sizeof(cname), "%s%c",
3179 device_xname(dv), 'a' + i);
3180 ac_list = rf_get_component(ac_list, dev, vp, cname,
3181 label.d_partitions[i].p_size, numsecs, secsize);
3182 rf_part_found = 1; /*There is at least one raid partition on this disk*/
3183 }
3184
3185 /*
3186 *If there is no raid component on this disk, either in a
3187 *disklabel or inside a wedge, check the raw partition as well,
3188 *as it is possible to configure raid components on raw disk
3189 *devices.
3190 */
3191
3192 if (!rf_part_found) {
3193 char cname[sizeof(ac_list->devname)];
3194
3195 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
3196 if (bdevvp(dev, &vp))
3197 panic("RAID can't alloc vnode");
3198
3199 error = VOP_OPEN(vp, FREAD, NOCRED);
3200 if (error) {
3201 /* Whatever... */
3202 vput(vp);
3203 continue;
3204 }
3205 snprintf(cname, sizeof(cname), "%s%c",
3206 device_xname(dv), 'a' + RAW_PART);
3207 ac_list = rf_get_component(ac_list, dev, vp, cname,
3208 label.d_partitions[RAW_PART].p_size, numsecs, secsize);
3209 }
3210 }
3211 deviter_release(&di);
3212 return ac_list;
3213 }
3214
3215
3216 int
3217 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3218 {
3219
3220 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
3221 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
3222 ((clabel->clean == RF_RAID_CLEAN) ||
3223 (clabel->clean == RF_RAID_DIRTY)) &&
3224 clabel->row >=0 &&
3225 clabel->column >= 0 &&
3226 clabel->num_rows > 0 &&
3227 clabel->num_columns > 0 &&
3228 clabel->row < clabel->num_rows &&
3229 clabel->column < clabel->num_columns &&
3230 clabel->blockSize > 0 &&
3231 /*
3232 * numBlocksHi may contain garbage, but it is ok since
3233 * the type is unsigned. If it is really garbage,
3234 * rf_fix_old_label_size() will fix it.
3235 */
3236 rf_component_label_numblocks(clabel) > 0) {
3237 /*
3238 * label looks reasonable enough...
3239 * let's make sure it has no old garbage.
3240 */
3241 if (numsecs)
3242 rf_fix_old_label_size(clabel, numsecs);
3243 return(1);
3244 }
3245 return(0);
3246 }
3247
3248
3249 /*
3250 * For reasons yet unknown, some old component labels have garbage in
3251 * the newer numBlocksHi region, and this causes lossage. Since those
3252 * disks will also have numsecs set to less than 32 bits of sectors,
3253 * we can determine when this corruption has occured, and fix it.
3254 *
3255 * The exact same problem, with the same unknown reason, happens to
3256 * the partitionSizeHi member as well.
3257 */
3258 static void
3259 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3260 {
3261
3262 if (numsecs < ((uint64_t)1 << 32)) {
3263 if (clabel->numBlocksHi) {
3264 printf("WARNING: total sectors < 32 bits, yet "
3265 "numBlocksHi set\n"
3266 "WARNING: resetting numBlocksHi to zero.\n");
3267 clabel->numBlocksHi = 0;
3268 }
3269
3270 if (clabel->partitionSizeHi) {
3271 printf("WARNING: total sectors < 32 bits, yet "
3272 "partitionSizeHi set\n"
3273 "WARNING: resetting partitionSizeHi to zero.\n");
3274 clabel->partitionSizeHi = 0;
3275 }
3276 }
3277 }
3278
3279
3280 #ifdef DEBUG
3281 void
3282 rf_print_component_label(RF_ComponentLabel_t *clabel)
3283 {
3284 uint64_t numBlocks;
3285
3286 numBlocks = rf_component_label_numblocks(clabel);
3287
3288 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3289 clabel->row, clabel->column,
3290 clabel->num_rows, clabel->num_columns);
3291 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3292 clabel->version, clabel->serial_number,
3293 clabel->mod_counter);
3294 printf(" Clean: %s Status: %d\n",
3295 clabel->clean ? "Yes" : "No", clabel->status);
3296 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3297 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3298 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3299 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3300 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3301 printf(" Contains root partition: %s\n",
3302 clabel->root_partition ? "Yes" : "No");
3303 printf(" Last configured as: raid%d\n", clabel->last_unit);
3304 #if 0
3305 printf(" Config order: %d\n", clabel->config_order);
3306 #endif
3307
3308 }
3309 #endif
3310
3311 RF_ConfigSet_t *
3312 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3313 {
3314 RF_AutoConfig_t *ac;
3315 RF_ConfigSet_t *config_sets;
3316 RF_ConfigSet_t *cset;
3317 RF_AutoConfig_t *ac_next;
3318
3319
3320 config_sets = NULL;
3321
3322 /* Go through the AutoConfig list, and figure out which components
3323 belong to what sets. */
3324 ac = ac_list;
3325 while(ac!=NULL) {
3326 /* we're going to putz with ac->next, so save it here
3327 for use at the end of the loop */
3328 ac_next = ac->next;
3329
3330 if (config_sets == NULL) {
3331 /* will need at least this one... */
3332 config_sets = (RF_ConfigSet_t *)
3333 malloc(sizeof(RF_ConfigSet_t),
3334 M_RAIDFRAME, M_NOWAIT);
3335 if (config_sets == NULL) {
3336 panic("rf_create_auto_sets: No memory!");
3337 }
3338 /* this one is easy :) */
3339 config_sets->ac = ac;
3340 config_sets->next = NULL;
3341 config_sets->rootable = 0;
3342 ac->next = NULL;
3343 } else {
3344 /* which set does this component fit into? */
3345 cset = config_sets;
3346 while(cset!=NULL) {
3347 if (rf_does_it_fit(cset, ac)) {
3348 /* looks like it matches... */
3349 ac->next = cset->ac;
3350 cset->ac = ac;
3351 break;
3352 }
3353 cset = cset->next;
3354 }
3355 if (cset==NULL) {
3356 /* didn't find a match above... new set..*/
3357 cset = (RF_ConfigSet_t *)
3358 malloc(sizeof(RF_ConfigSet_t),
3359 M_RAIDFRAME, M_NOWAIT);
3360 if (cset == NULL) {
3361 panic("rf_create_auto_sets: No memory!");
3362 }
3363 cset->ac = ac;
3364 ac->next = NULL;
3365 cset->next = config_sets;
3366 cset->rootable = 0;
3367 config_sets = cset;
3368 }
3369 }
3370 ac = ac_next;
3371 }
3372
3373
3374 return(config_sets);
3375 }
3376
3377 static int
3378 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3379 {
3380 RF_ComponentLabel_t *clabel1, *clabel2;
3381
3382 /* If this one matches the *first* one in the set, that's good
3383 enough, since the other members of the set would have been
3384 through here too... */
3385 /* note that we are not checking partitionSize here..
3386
3387 Note that we are also not checking the mod_counters here.
3388 If everything else matches execpt the mod_counter, that's
3389 good enough for this test. We will deal with the mod_counters
3390 a little later in the autoconfiguration process.
3391
3392 (clabel1->mod_counter == clabel2->mod_counter) &&
3393
3394 The reason we don't check for this is that failed disks
3395 will have lower modification counts. If those disks are
3396 not added to the set they used to belong to, then they will
3397 form their own set, which may result in 2 different sets,
3398 for example, competing to be configured at raid0, and
3399 perhaps competing to be the root filesystem set. If the
3400 wrong ones get configured, or both attempt to become /,
3401 weird behaviour and or serious lossage will occur. Thus we
3402 need to bring them into the fold here, and kick them out at
3403 a later point.
3404
3405 */
3406
3407 clabel1 = cset->ac->clabel;
3408 clabel2 = ac->clabel;
3409 if ((clabel1->version == clabel2->version) &&
3410 (clabel1->serial_number == clabel2->serial_number) &&
3411 (clabel1->num_rows == clabel2->num_rows) &&
3412 (clabel1->num_columns == clabel2->num_columns) &&
3413 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3414 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3415 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3416 (clabel1->parityConfig == clabel2->parityConfig) &&
3417 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3418 (clabel1->blockSize == clabel2->blockSize) &&
3419 rf_component_label_numblocks(clabel1) ==
3420 rf_component_label_numblocks(clabel2) &&
3421 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3422 (clabel1->root_partition == clabel2->root_partition) &&
3423 (clabel1->last_unit == clabel2->last_unit) &&
3424 (clabel1->config_order == clabel2->config_order)) {
3425 /* if it get's here, it almost *has* to be a match */
3426 } else {
3427 /* it's not consistent with somebody in the set..
3428 punt */
3429 return(0);
3430 }
3431 /* all was fine.. it must fit... */
3432 return(1);
3433 }
3434
3435 int
3436 rf_have_enough_components(RF_ConfigSet_t *cset)
3437 {
3438 RF_AutoConfig_t *ac;
3439 RF_AutoConfig_t *auto_config;
3440 RF_ComponentLabel_t *clabel;
3441 int c;
3442 int num_cols;
3443 int num_missing;
3444 int mod_counter;
3445 int mod_counter_found;
3446 int even_pair_failed;
3447 char parity_type;
3448
3449
3450 /* check to see that we have enough 'live' components
3451 of this set. If so, we can configure it if necessary */
3452
3453 num_cols = cset->ac->clabel->num_columns;
3454 parity_type = cset->ac->clabel->parityConfig;
3455
3456 /* XXX Check for duplicate components!?!?!? */
3457
3458 /* Determine what the mod_counter is supposed to be for this set. */
3459
3460 mod_counter_found = 0;
3461 mod_counter = 0;
3462 ac = cset->ac;
3463 while(ac!=NULL) {
3464 if (mod_counter_found==0) {
3465 mod_counter = ac->clabel->mod_counter;
3466 mod_counter_found = 1;
3467 } else {
3468 if (ac->clabel->mod_counter > mod_counter) {
3469 mod_counter = ac->clabel->mod_counter;
3470 }
3471 }
3472 ac = ac->next;
3473 }
3474
3475 num_missing = 0;
3476 auto_config = cset->ac;
3477
3478 even_pair_failed = 0;
3479 for(c=0; c<num_cols; c++) {
3480 ac = auto_config;
3481 while(ac!=NULL) {
3482 if ((ac->clabel->column == c) &&
3483 (ac->clabel->mod_counter == mod_counter)) {
3484 /* it's this one... */
3485 #ifdef DEBUG
3486 printf("Found: %s at %d\n",
3487 ac->devname,c);
3488 #endif
3489 break;
3490 }
3491 ac=ac->next;
3492 }
3493 if (ac==NULL) {
3494 /* Didn't find one here! */
3495 /* special case for RAID 1, especially
3496 where there are more than 2
3497 components (where RAIDframe treats
3498 things a little differently :( ) */
3499 if (parity_type == '1') {
3500 if (c%2 == 0) { /* even component */
3501 even_pair_failed = 1;
3502 } else { /* odd component. If
3503 we're failed, and
3504 so is the even
3505 component, it's
3506 "Good Night, Charlie" */
3507 if (even_pair_failed == 1) {
3508 return(0);
3509 }
3510 }
3511 } else {
3512 /* normal accounting */
3513 num_missing++;
3514 }
3515 }
3516 if ((parity_type == '1') && (c%2 == 1)) {
3517 /* Just did an even component, and we didn't
3518 bail.. reset the even_pair_failed flag,
3519 and go on to the next component.... */
3520 even_pair_failed = 0;
3521 }
3522 }
3523
3524 clabel = cset->ac->clabel;
3525
3526 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3527 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3528 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3529 /* XXX this needs to be made *much* more general */
3530 /* Too many failures */
3531 return(0);
3532 }
3533 /* otherwise, all is well, and we've got enough to take a kick
3534 at autoconfiguring this set */
3535 return(1);
3536 }
3537
3538 void
3539 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3540 RF_Raid_t *raidPtr)
3541 {
3542 RF_ComponentLabel_t *clabel;
3543 int i;
3544
3545 clabel = ac->clabel;
3546
3547 /* 1. Fill in the common stuff */
3548 config->numRow = clabel->num_rows = 1;
3549 config->numCol = clabel->num_columns;
3550 config->numSpare = 0; /* XXX should this be set here? */
3551 config->sectPerSU = clabel->sectPerSU;
3552 config->SUsPerPU = clabel->SUsPerPU;
3553 config->SUsPerRU = clabel->SUsPerRU;
3554 config->parityConfig = clabel->parityConfig;
3555 /* XXX... */
3556 strcpy(config->diskQueueType,"fifo");
3557 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3558 config->layoutSpecificSize = 0; /* XXX ?? */
3559
3560 while(ac!=NULL) {
3561 /* row/col values will be in range due to the checks
3562 in reasonable_label() */
3563 strcpy(config->devnames[0][ac->clabel->column],
3564 ac->devname);
3565 ac = ac->next;
3566 }
3567
3568 for(i=0;i<RF_MAXDBGV;i++) {
3569 config->debugVars[i][0] = 0;
3570 }
3571 }
3572
3573 int
3574 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3575 {
3576 RF_ComponentLabel_t *clabel;
3577 int column;
3578 int sparecol;
3579
3580 raidPtr->autoconfigure = new_value;
3581
3582 for(column=0; column<raidPtr->numCol; column++) {
3583 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3584 clabel = raidget_component_label(raidPtr, column);
3585 clabel->autoconfigure = new_value;
3586 raidflush_component_label(raidPtr, column);
3587 }
3588 }
3589 for(column = 0; column < raidPtr->numSpare ; column++) {
3590 sparecol = raidPtr->numCol + column;
3591 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3592 clabel = raidget_component_label(raidPtr, sparecol);
3593 clabel->autoconfigure = new_value;
3594 raidflush_component_label(raidPtr, sparecol);
3595 }
3596 }
3597 return(new_value);
3598 }
3599
3600 int
3601 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3602 {
3603 RF_ComponentLabel_t *clabel;
3604 int column;
3605 int sparecol;
3606
3607 raidPtr->root_partition = new_value;
3608 for(column=0; column<raidPtr->numCol; column++) {
3609 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3610 clabel = raidget_component_label(raidPtr, column);
3611 clabel->root_partition = new_value;
3612 raidflush_component_label(raidPtr, column);
3613 }
3614 }
3615 for(column = 0; column < raidPtr->numSpare ; column++) {
3616 sparecol = raidPtr->numCol + column;
3617 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3618 clabel = raidget_component_label(raidPtr, sparecol);
3619 clabel->root_partition = new_value;
3620 raidflush_component_label(raidPtr, sparecol);
3621 }
3622 }
3623 return(new_value);
3624 }
3625
3626 void
3627 rf_release_all_vps(RF_ConfigSet_t *cset)
3628 {
3629 RF_AutoConfig_t *ac;
3630
3631 ac = cset->ac;
3632 while(ac!=NULL) {
3633 /* Close the vp, and give it back */
3634 if (ac->vp) {
3635 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3636 VOP_CLOSE(ac->vp, FREAD, NOCRED);
3637 vput(ac->vp);
3638 ac->vp = NULL;
3639 }
3640 ac = ac->next;
3641 }
3642 }
3643
3644
3645 void
3646 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3647 {
3648 RF_AutoConfig_t *ac;
3649 RF_AutoConfig_t *next_ac;
3650
3651 ac = cset->ac;
3652 while(ac!=NULL) {
3653 next_ac = ac->next;
3654 /* nuke the label */
3655 free(ac->clabel, M_RAIDFRAME);
3656 /* cleanup the config structure */
3657 free(ac, M_RAIDFRAME);
3658 /* "next.." */
3659 ac = next_ac;
3660 }
3661 /* and, finally, nuke the config set */
3662 free(cset, M_RAIDFRAME);
3663 }
3664
3665
3666 void
3667 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3668 {
3669 /* current version number */
3670 clabel->version = RF_COMPONENT_LABEL_VERSION;
3671 clabel->serial_number = raidPtr->serial_number;
3672 clabel->mod_counter = raidPtr->mod_counter;
3673
3674 clabel->num_rows = 1;
3675 clabel->num_columns = raidPtr->numCol;
3676 clabel->clean = RF_RAID_DIRTY; /* not clean */
3677 clabel->status = rf_ds_optimal; /* "It's good!" */
3678
3679 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3680 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3681 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3682
3683 clabel->blockSize = raidPtr->bytesPerSector;
3684 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3685
3686 /* XXX not portable */
3687 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3688 clabel->maxOutstanding = raidPtr->maxOutstanding;
3689 clabel->autoconfigure = raidPtr->autoconfigure;
3690 clabel->root_partition = raidPtr->root_partition;
3691 clabel->last_unit = raidPtr->raidid;
3692 clabel->config_order = raidPtr->config_order;
3693
3694 #ifndef RF_NO_PARITY_MAP
3695 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3696 #endif
3697 }
3698
3699 int
3700 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
3701 {
3702 RF_Raid_t *raidPtr;
3703 RF_Config_t *config;
3704 int raidID;
3705 int retcode;
3706
3707 #ifdef DEBUG
3708 printf("RAID autoconfigure\n");
3709 #endif
3710
3711 retcode = 0;
3712 *unit = -1;
3713
3714 /* 1. Create a config structure */
3715
3716 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3717 M_RAIDFRAME,
3718 M_NOWAIT);
3719 if (config==NULL) {
3720 printf("Out of mem!?!?\n");
3721 /* XXX do something more intelligent here. */
3722 return(1);
3723 }
3724
3725 memset(config, 0, sizeof(RF_Config_t));
3726
3727 /*
3728 2. Figure out what RAID ID this one is supposed to live at
3729 See if we can get the same RAID dev that it was configured
3730 on last time..
3731 */
3732
3733 raidID = cset->ac->clabel->last_unit;
3734 if ((raidID < 0) || (raidID >= numraid)) {
3735 /* let's not wander off into lala land. */
3736 raidID = numraid - 1;
3737 }
3738 if (raidPtrs[raidID]->valid != 0) {
3739
3740 /*
3741 Nope... Go looking for an alternative...
3742 Start high so we don't immediately use raid0 if that's
3743 not taken.
3744 */
3745
3746 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3747 if (raidPtrs[raidID]->valid == 0) {
3748 /* can use this one! */
3749 break;
3750 }
3751 }
3752 }
3753
3754 if (raidID < 0) {
3755 /* punt... */
3756 printf("Unable to auto configure this set!\n");
3757 printf("(Out of RAID devs!)\n");
3758 free(config, M_RAIDFRAME);
3759 return(1);
3760 }
3761
3762 #ifdef DEBUG
3763 printf("Configuring raid%d:\n",raidID);
3764 #endif
3765
3766 raidPtr = raidPtrs[raidID];
3767
3768 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3769 raidPtr->raidid = raidID;
3770 raidPtr->openings = RAIDOUTSTANDING;
3771
3772 /* 3. Build the configuration structure */
3773 rf_create_configuration(cset->ac, config, raidPtr);
3774
3775 /* 4. Do the configuration */
3776 retcode = rf_Configure(raidPtr, config, cset->ac);
3777
3778 if (retcode == 0) {
3779
3780 raidinit(raidPtrs[raidID]);
3781
3782 rf_markalldirty(raidPtrs[raidID]);
3783 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3784 if (cset->ac->clabel->root_partition==1) {
3785 /* everything configured just fine. Make a note
3786 that this set is eligible to be root. */
3787 cset->rootable = 1;
3788 /* XXX do this here? */
3789 raidPtrs[raidID]->root_partition = 1;
3790 }
3791 }
3792
3793 /* 5. Cleanup */
3794 free(config, M_RAIDFRAME);
3795
3796 *unit = raidID;
3797 return(retcode);
3798 }
3799
3800 void
3801 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3802 {
3803 struct buf *bp;
3804
3805 bp = (struct buf *)desc->bp;
3806 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3807 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
3808 }
3809
3810 void
3811 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3812 size_t xmin, size_t xmax)
3813 {
3814 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3815 pool_sethiwat(p, xmax);
3816 pool_prime(p, xmin);
3817 pool_setlowat(p, xmin);
3818 }
3819
3820 /*
3821 * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see
3822 * if there is IO pending and if that IO could possibly be done for a
3823 * given RAID set. Returns 0 if IO is waiting and can be done, 1
3824 * otherwise.
3825 *
3826 */
3827
3828 int
3829 rf_buf_queue_check(int raidid)
3830 {
3831 if ((bufq_peek(raid_softc[raidid].buf_queue) != NULL) &&
3832 raidPtrs[raidid]->openings > 0) {
3833 /* there is work to do */
3834 return 0;
3835 }
3836 /* default is nothing to do */
3837 return 1;
3838 }
3839
3840 int
3841 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3842 {
3843 uint64_t numsecs;
3844 unsigned secsize;
3845 int error;
3846
3847 error = getdisksize(vp, &numsecs, &secsize);
3848 if (error == 0) {
3849 diskPtr->blockSize = secsize;
3850 diskPtr->numBlocks = numsecs - rf_protectedSectors;
3851 diskPtr->partitionSize = numsecs;
3852 return 0;
3853 }
3854 return error;
3855 }
3856
3857 static int
3858 raid_match(device_t self, cfdata_t cfdata, void *aux)
3859 {
3860 return 1;
3861 }
3862
3863 static void
3864 raid_attach(device_t parent, device_t self, void *aux)
3865 {
3866
3867 }
3868
3869
3870 static int
3871 raid_detach(device_t self, int flags)
3872 {
3873 int error;
3874 struct raid_softc *rs = &raid_softc[device_unit(self)];
3875
3876 if ((error = raidlock(rs)) != 0)
3877 return (error);
3878
3879 error = raid_detach_unlocked(rs);
3880
3881 raidunlock(rs);
3882
3883 return error;
3884 }
3885
3886 static void
3887 rf_set_properties(struct raid_softc *rs, RF_Raid_t *raidPtr)
3888 {
3889 prop_dictionary_t disk_info, odisk_info, geom;
3890 disk_info = prop_dictionary_create();
3891 geom = prop_dictionary_create();
3892 prop_dictionary_set_uint64(geom, "sectors-per-unit",
3893 raidPtr->totalSectors);
3894 prop_dictionary_set_uint32(geom, "sector-size",
3895 raidPtr->bytesPerSector);
3896
3897 prop_dictionary_set_uint16(geom, "sectors-per-track",
3898 raidPtr->Layout.dataSectorsPerStripe);
3899 prop_dictionary_set_uint16(geom, "tracks-per-cylinder",
3900 4 * raidPtr->numCol);
3901
3902 prop_dictionary_set_uint64(geom, "cylinders-per-unit",
3903 raidPtr->totalSectors / (raidPtr->Layout.dataSectorsPerStripe *
3904 (4 * raidPtr->numCol)));
3905
3906 prop_dictionary_set(disk_info, "geometry", geom);
3907 prop_object_release(geom);
3908 prop_dictionary_set(device_properties(rs->sc_dev),
3909 "disk-info", disk_info);
3910 odisk_info = rs->sc_dkdev.dk_info;
3911 rs->sc_dkdev.dk_info = disk_info;
3912 if (odisk_info)
3913 prop_object_release(odisk_info);
3914 }
3915
3916 /*
3917 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3918 * We end up returning whatever error was returned by the first cache flush
3919 * that fails.
3920 */
3921
3922 int
3923 rf_sync_component_caches(RF_Raid_t *raidPtr)
3924 {
3925 int c, sparecol;
3926 int e,error;
3927 int force = 1;
3928
3929 error = 0;
3930 for (c = 0; c < raidPtr->numCol; c++) {
3931 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3932 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3933 &force, FWRITE, NOCRED);
3934 if (e) {
3935 if (e != ENODEV)
3936 printf("raid%d: cache flush to component %s failed.\n",
3937 raidPtr->raidid, raidPtr->Disks[c].devname);
3938 if (error == 0) {
3939 error = e;
3940 }
3941 }
3942 }
3943 }
3944
3945 for( c = 0; c < raidPtr->numSpare ; c++) {
3946 sparecol = raidPtr->numCol + c;
3947 /* Need to ensure that the reconstruct actually completed! */
3948 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3949 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3950 DIOCCACHESYNC, &force, FWRITE, NOCRED);
3951 if (e) {
3952 if (e != ENODEV)
3953 printf("raid%d: cache flush to component %s failed.\n",
3954 raidPtr->raidid, raidPtr->Disks[sparecol].devname);
3955 if (error == 0) {
3956 error = e;
3957 }
3958 }
3959 }
3960 }
3961 return error;
3962 }
3963