rf_netbsdkintf.c revision 1.250.4.13 1 /* $NetBSD: rf_netbsdkintf.c,v 1.250.4.13 2012/10/24 03:03:53 riz Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998, 2008 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 /*
32 * Copyright (c) 1990, 1993
33 * The Regents of the University of California. All rights reserved.
34 *
35 * This code is derived from software contributed to Berkeley by
36 * the Systems Programming Group of the University of Utah Computer
37 * Science Department.
38 *
39 * Redistribution and use in source and binary forms, with or without
40 * modification, are permitted provided that the following conditions
41 * are met:
42 * 1. Redistributions of source code must retain the above copyright
43 * notice, this list of conditions and the following disclaimer.
44 * 2. Redistributions in binary form must reproduce the above copyright
45 * notice, this list of conditions and the following disclaimer in the
46 * documentation and/or other materials provided with the distribution.
47 * 3. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 * from: Utah $Hdr: cd.c 1.6 90/11/28$
64 *
65 * @(#)cd.c 8.2 (Berkeley) 11/16/93
66 */
67
68 /*
69 * Copyright (c) 1988 University of Utah.
70 *
71 * This code is derived from software contributed to Berkeley by
72 * the Systems Programming Group of the University of Utah Computer
73 * Science Department.
74 *
75 * Redistribution and use in source and binary forms, with or without
76 * modification, are permitted provided that the following conditions
77 * are met:
78 * 1. Redistributions of source code must retain the above copyright
79 * notice, this list of conditions and the following disclaimer.
80 * 2. Redistributions in binary form must reproduce the above copyright
81 * notice, this list of conditions and the following disclaimer in the
82 * documentation and/or other materials provided with the distribution.
83 * 3. All advertising materials mentioning features or use of this software
84 * must display the following acknowledgement:
85 * This product includes software developed by the University of
86 * California, Berkeley and its contributors.
87 * 4. Neither the name of the University nor the names of its contributors
88 * may be used to endorse or promote products derived from this software
89 * without specific prior written permission.
90 *
91 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
92 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
93 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
94 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
95 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
96 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
97 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
98 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
99 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
100 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
101 * SUCH DAMAGE.
102 *
103 * from: Utah $Hdr: cd.c 1.6 90/11/28$
104 *
105 * @(#)cd.c 8.2 (Berkeley) 11/16/93
106 */
107
108 /*
109 * Copyright (c) 1995 Carnegie-Mellon University.
110 * All rights reserved.
111 *
112 * Authors: Mark Holland, Jim Zelenka
113 *
114 * Permission to use, copy, modify and distribute this software and
115 * its documentation is hereby granted, provided that both the copyright
116 * notice and this permission notice appear in all copies of the
117 * software, derivative works or modified versions, and any portions
118 * thereof, and that both notices appear in supporting documentation.
119 *
120 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
121 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
122 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
123 *
124 * Carnegie Mellon requests users of this software to return to
125 *
126 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
127 * School of Computer Science
128 * Carnegie Mellon University
129 * Pittsburgh PA 15213-3890
130 *
131 * any improvements or extensions that they make and grant Carnegie the
132 * rights to redistribute these changes.
133 */
134
135 /***********************************************************
136 *
137 * rf_kintf.c -- the kernel interface routines for RAIDframe
138 *
139 ***********************************************************/
140
141 #include <sys/cdefs.h>
142 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.250.4.13 2012/10/24 03:03:53 riz Exp $");
143
144 #include <sys/param.h>
145 #include <sys/errno.h>
146 #include <sys/pool.h>
147 #include <sys/proc.h>
148 #include <sys/queue.h>
149 #include <sys/disk.h>
150 #include <sys/device.h>
151 #include <sys/stat.h>
152 #include <sys/ioctl.h>
153 #include <sys/fcntl.h>
154 #include <sys/systm.h>
155 #include <sys/vnode.h>
156 #include <sys/disklabel.h>
157 #include <sys/conf.h>
158 #include <sys/buf.h>
159 #include <sys/bufq.h>
160 #include <sys/user.h>
161 #include <sys/reboot.h>
162 #include <sys/kauth.h>
163
164 #include <prop/proplib.h>
165
166 #include <dev/raidframe/raidframevar.h>
167 #include <dev/raidframe/raidframeio.h>
168 #include <dev/raidframe/rf_paritymap.h>
169 #include "raid.h"
170 #include "opt_raid_autoconfig.h"
171 #include "rf_raid.h"
172 #include "rf_copyback.h"
173 #include "rf_dag.h"
174 #include "rf_dagflags.h"
175 #include "rf_desc.h"
176 #include "rf_diskqueue.h"
177 #include "rf_etimer.h"
178 #include "rf_general.h"
179 #include "rf_kintf.h"
180 #include "rf_options.h"
181 #include "rf_driver.h"
182 #include "rf_parityscan.h"
183 #include "rf_threadstuff.h"
184
185 #ifdef DEBUG
186 int rf_kdebug_level = 0;
187 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
188 #else /* DEBUG */
189 #define db1_printf(a) { }
190 #endif /* DEBUG */
191
192 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
193
194 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
195 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
196
197 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
198 * spare table */
199 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
200 * installation process */
201 #endif
202
203 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
204
205 /* prototypes */
206 static void KernelWakeupFunc(struct buf *);
207 static void InitBP(struct buf *, struct vnode *, unsigned,
208 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
209 void *, int, struct proc *);
210 static void raidinit(RF_Raid_t *);
211
212 void raidattach(int);
213 static int raid_match(struct device *, struct cfdata *, void *);
214 static void raid_attach(struct device *, struct device *, void *);
215 static int raid_detach(struct device *, int);
216
217 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
218 daddr_t, daddr_t);
219 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
220 daddr_t, daddr_t, int);
221
222 static int raidwrite_component_label(unsigned,
223 dev_t, struct vnode *, RF_ComponentLabel_t *);
224 static int raidread_component_label(unsigned,
225 dev_t, struct vnode *, RF_ComponentLabel_t *);
226
227
228 dev_type_open(raidopen);
229 dev_type_close(raidclose);
230 dev_type_read(raidread);
231 dev_type_write(raidwrite);
232 dev_type_ioctl(raidioctl);
233 dev_type_strategy(raidstrategy);
234 dev_type_dump(raiddump);
235 dev_type_size(raidsize);
236
237 const struct bdevsw raid_bdevsw = {
238 raidopen, raidclose, raidstrategy, raidioctl,
239 raiddump, raidsize, D_DISK
240 };
241
242 const struct cdevsw raid_cdevsw = {
243 raidopen, raidclose, raidread, raidwrite, raidioctl,
244 nostop, notty, nopoll, nommap, nokqfilter, D_DISK
245 };
246
247 static struct dkdriver rf_dkdriver = { raidstrategy, minphys };
248
249 /* XXX Not sure if the following should be replacing the raidPtrs above,
250 or if it should be used in conjunction with that...
251 */
252
253 struct raid_softc {
254 struct device *sc_dev;
255 int sc_flags; /* flags */
256 int sc_cflags; /* configuration flags */
257 uint64_t sc_size; /* size of the raid device */
258 char sc_xname[20]; /* XXX external name */
259 struct disk sc_dkdev; /* generic disk device info */
260 struct bufq_state *buf_queue; /* used for the device queue */
261 };
262 /* sc_flags */
263 #define RAIDF_INITED 0x01 /* unit has been initialized */
264 #define RAIDF_WLABEL 0x02 /* label area is writable */
265 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
266 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
267 #define RAIDF_LOCKED 0x80 /* unit is locked */
268
269 #define raidunit(x) DISKUNIT(x)
270 int numraid = 0;
271
272 extern struct cfdriver raid_cd;
273 CFATTACH_DECL_NEW(raid, sizeof(struct raid_softc),
274 raid_match, raid_attach, raid_detach, NULL);
275
276 /*
277 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
278 * Be aware that large numbers can allow the driver to consume a lot of
279 * kernel memory, especially on writes, and in degraded mode reads.
280 *
281 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
282 * a single 64K write will typically require 64K for the old data,
283 * 64K for the old parity, and 64K for the new parity, for a total
284 * of 192K (if the parity buffer is not re-used immediately).
285 * Even it if is used immediately, that's still 128K, which when multiplied
286 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
287 *
288 * Now in degraded mode, for example, a 64K read on the above setup may
289 * require data reconstruction, which will require *all* of the 4 remaining
290 * disks to participate -- 4 * 32K/disk == 128K again.
291 */
292
293 #ifndef RAIDOUTSTANDING
294 #define RAIDOUTSTANDING 6
295 #endif
296
297 #define RAIDLABELDEV(dev) \
298 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
299
300 /* declared here, and made public, for the benefit of KVM stuff.. */
301 struct raid_softc *raid_softc;
302
303 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
304 struct disklabel *);
305 static void raidgetdisklabel(dev_t);
306 static void raidmakedisklabel(struct raid_softc *);
307
308 static int raidlock(struct raid_softc *);
309 static void raidunlock(struct raid_softc *);
310
311 static void rf_markalldirty(RF_Raid_t *);
312 static void rf_set_properties(struct raid_softc *, RF_Raid_t *);
313
314 void rf_ReconThread(struct rf_recon_req *);
315 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
316 void rf_CopybackThread(RF_Raid_t *raidPtr);
317 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
318 int rf_autoconfig(struct device *self);
319 void rf_buildroothack(RF_ConfigSet_t *);
320
321 RF_AutoConfig_t *rf_find_raid_components(void);
322 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
323 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
324 static int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
325 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
326 int rf_set_autoconfig(RF_Raid_t *, int);
327 int rf_set_rootpartition(RF_Raid_t *, int);
328 void rf_release_all_vps(RF_ConfigSet_t *);
329 void rf_cleanup_config_set(RF_ConfigSet_t *);
330 int rf_have_enough_components(RF_ConfigSet_t *);
331 int rf_auto_config_set(RF_ConfigSet_t *, int *);
332 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
333
334 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
335 allow autoconfig to take place.
336 Note that this is overridden by having
337 RAID_AUTOCONFIG as an option in the
338 kernel config file. */
339
340 struct RF_Pools_s rf_pools;
341
342 void
343 raidattach(int num)
344 {
345 int raidID;
346 int i, rc;
347
348 aprint_debug("raidattach: Asked for %d units\n", num);
349
350 if (num <= 0) {
351 #ifdef DIAGNOSTIC
352 panic("raidattach: count <= 0");
353 #endif
354 return;
355 }
356 /* This is where all the initialization stuff gets done. */
357
358 numraid = num;
359
360 /* Make some space for requested number of units... */
361
362 RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
363 if (raidPtrs == NULL) {
364 panic("raidPtrs is NULL!!");
365 }
366
367 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
368 rf_mutex_init(&rf_sparet_wait_mutex);
369
370 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
371 #endif
372
373 for (i = 0; i < num; i++)
374 raidPtrs[i] = NULL;
375 rc = rf_BootRaidframe();
376 if (rc == 0)
377 aprint_normal("Kernelized RAIDframe activated\n");
378 else
379 panic("Serious error booting RAID!!");
380
381 /* put together some datastructures like the CCD device does.. This
382 * lets us lock the device and what-not when it gets opened. */
383
384 raid_softc = (struct raid_softc *)
385 malloc(num * sizeof(struct raid_softc),
386 M_RAIDFRAME, M_NOWAIT);
387 if (raid_softc == NULL) {
388 aprint_error("WARNING: no memory for RAIDframe driver\n");
389 return;
390 }
391
392 memset(raid_softc, 0, num * sizeof(struct raid_softc));
393
394 for (raidID = 0; raidID < num; raidID++) {
395 bufq_alloc(&raid_softc[raidID].buf_queue, "fcfs", 0);
396
397 RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
398 (RF_Raid_t *));
399 if (raidPtrs[raidID] == NULL) {
400 aprint_error("WARNING: raidPtrs[%d] is NULL\n", raidID);
401 numraid = raidID;
402 return;
403 }
404 }
405
406 if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) {
407 aprint_error("raidattach: config_cfattach_attach failed?\n");
408 }
409
410 #ifdef RAID_AUTOCONFIG
411 raidautoconfig = 1;
412 #endif
413
414 /*
415 * Register a finalizer which will be used to auto-config RAID
416 * sets once all real hardware devices have been found.
417 */
418 if (config_finalize_register(NULL, rf_autoconfig) != 0)
419 aprint_error("WARNING: unable to register RAIDframe finalizer\n");
420 }
421
422 int
423 rf_autoconfig(struct device *self)
424 {
425 RF_AutoConfig_t *ac_list;
426 RF_ConfigSet_t *config_sets;
427
428 if (raidautoconfig == 0)
429 return (0);
430
431 /* XXX This code can only be run once. */
432 raidautoconfig = 0;
433
434 /* 1. locate all RAID components on the system */
435 aprint_debug("Searching for RAID components...\n");
436 ac_list = rf_find_raid_components();
437
438 /* 2. Sort them into their respective sets. */
439 config_sets = rf_create_auto_sets(ac_list);
440
441 /*
442 * 3. Evaluate each set andconfigure the valid ones.
443 * This gets done in rf_buildroothack().
444 */
445 rf_buildroothack(config_sets);
446
447 return 1;
448 }
449
450 void
451 rf_buildroothack(RF_ConfigSet_t *config_sets)
452 {
453 RF_ConfigSet_t *cset;
454 RF_ConfigSet_t *next_cset;
455 int retcode;
456 int raidID;
457 int rootID;
458 int col;
459 int num_root;
460 char *devname;
461
462 rootID = 0;
463 num_root = 0;
464 cset = config_sets;
465 while(cset != NULL ) {
466 next_cset = cset->next;
467 if (rf_have_enough_components(cset) &&
468 cset->ac->clabel->autoconfigure==1) {
469 retcode = rf_auto_config_set(cset,&raidID);
470 if (!retcode) {
471 aprint_debug("raid%d: configured ok\n", raidID);
472 if (cset->rootable) {
473 rootID = raidID;
474 num_root++;
475 }
476 } else {
477 /* The autoconfig didn't work :( */
478 aprint_debug("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
479 rf_release_all_vps(cset);
480 }
481 } else {
482 /* we're not autoconfiguring this set...
483 release the associated resources */
484 rf_release_all_vps(cset);
485 }
486 /* cleanup */
487 rf_cleanup_config_set(cset);
488 cset = next_cset;
489 }
490
491 /* if the user has specified what the root device should be
492 then we don't touch booted_device or boothowto... */
493
494 if (rootspec != NULL)
495 return;
496
497 /* we found something bootable... */
498
499 if (num_root == 1) {
500 booted_device = raid_softc[rootID].sc_dev;
501 } else if (num_root > 1) {
502
503 /*
504 * Maybe the MD code can help. If it cannot, then
505 * setroot() will discover that we have no
506 * booted_device and will ask the user if nothing was
507 * hardwired in the kernel config file
508 */
509
510 if (booted_device == NULL)
511 cpu_rootconf();
512 if (booted_device == NULL)
513 return;
514
515 num_root = 0;
516 for (raidID = 0; raidID < numraid; raidID++) {
517 if (raidPtrs[raidID]->valid == 0)
518 continue;
519
520 if (raidPtrs[raidID]->root_partition == 0)
521 continue;
522
523 for (col = 0; col < raidPtrs[raidID]->numCol; col++) {
524 devname = raidPtrs[raidID]->Disks[col].devname;
525 devname += sizeof("/dev/") - 1;
526 if (strncmp(devname, device_xname(booted_device),
527 strlen(device_xname(booted_device))) != 0)
528 continue;
529 aprint_debug("raid%d includes boot device %s\n",
530 raidID, devname);
531 num_root++;
532 rootID = raidID;
533 }
534 }
535
536 if (num_root == 1) {
537 booted_device = raid_softc[rootID].sc_dev;
538 } else {
539 /* we can't guess.. require the user to answer... */
540 boothowto |= RB_ASKNAME;
541 }
542 }
543 }
544
545
546 int
547 raidsize(dev_t dev)
548 {
549 struct raid_softc *rs;
550 struct disklabel *lp;
551 int part, unit, omask, size;
552
553 unit = raidunit(dev);
554 if (unit >= numraid)
555 return (-1);
556 rs = &raid_softc[unit];
557
558 if ((rs->sc_flags & RAIDF_INITED) == 0)
559 return (-1);
560
561 part = DISKPART(dev);
562 omask = rs->sc_dkdev.dk_openmask & (1 << part);
563 lp = rs->sc_dkdev.dk_label;
564
565 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
566 return (-1);
567
568 if (lp->d_partitions[part].p_fstype != FS_SWAP)
569 size = -1;
570 else
571 size = lp->d_partitions[part].p_size *
572 (lp->d_secsize / DEV_BSIZE);
573
574 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
575 return (-1);
576
577 return (size);
578
579 }
580
581 int
582 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
583 {
584 int unit = raidunit(dev);
585 struct raid_softc *rs;
586 const struct bdevsw *bdev;
587 struct disklabel *lp;
588 RF_Raid_t *raidPtr;
589 daddr_t offset;
590 int part, c, sparecol, j, scol, dumpto;
591 int error = 0;
592
593 if (unit >= numraid)
594 return (ENXIO);
595
596 rs = &raid_softc[unit];
597 raidPtr = raidPtrs[unit];
598
599 if ((rs->sc_flags & RAIDF_INITED) == 0)
600 return ENXIO;
601
602 /* we only support dumping to RAID 1 sets */
603 if (raidPtr->Layout.numDataCol != 1 ||
604 raidPtr->Layout.numParityCol != 1)
605 return EINVAL;
606
607
608 if ((error = raidlock(rs)) != 0)
609 return error;
610
611 if (size % DEV_BSIZE != 0) {
612 error = EINVAL;
613 goto out;
614 }
615
616 if (blkno + size / DEV_BSIZE > rs->sc_size) {
617 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
618 "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
619 size / DEV_BSIZE, rs->sc_size);
620 error = EINVAL;
621 goto out;
622 }
623
624 part = DISKPART(dev);
625 lp = rs->sc_dkdev.dk_label;
626 offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
627
628 /* figure out what device is alive.. */
629
630 /*
631 Look for a component to dump to. The preference for the
632 component to dump to is as follows:
633 1) the master
634 2) a used_spare of the master
635 3) the slave
636 4) a used_spare of the slave
637 */
638
639 dumpto = -1;
640 for (c = 0; c < raidPtr->numCol; c++) {
641 if (raidPtr->Disks[c].status == rf_ds_optimal) {
642 /* this might be the one */
643 dumpto = c;
644 break;
645 }
646 }
647
648 /*
649 At this point we have possibly selected a live master or a
650 live slave. We now check to see if there is a spared
651 master (or a spared slave), if we didn't find a live master
652 or a live slave.
653 */
654
655 for (c = 0; c < raidPtr->numSpare; c++) {
656 sparecol = raidPtr->numCol + c;
657 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
658 /* How about this one? */
659 scol = -1;
660 for(j=0;j<raidPtr->numCol;j++) {
661 if (raidPtr->Disks[j].spareCol == sparecol) {
662 scol = j;
663 break;
664 }
665 }
666 if (scol == 0) {
667 /*
668 We must have found a spared master!
669 We'll take that over anything else
670 found so far. (We couldn't have
671 found a real master before, since
672 this is a used spare, and it's
673 saying that it's replacing the
674 master.) On reboot (with
675 autoconfiguration turned on)
676 sparecol will become the 1st
677 component (component0) of this set.
678 */
679 dumpto = sparecol;
680 break;
681 } else if (scol != -1) {
682 /*
683 Must be a spared slave. We'll dump
684 to that if we havn't found anything
685 else so far.
686 */
687 if (dumpto == -1)
688 dumpto = sparecol;
689 }
690 }
691 }
692
693 if (dumpto == -1) {
694 /* we couldn't find any live components to dump to!?!?
695 */
696 error = EINVAL;
697 goto out;
698 }
699
700 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
701
702 /*
703 Note that blkno is relative to this particular partition.
704 By adding the offset of this partition in the RAID
705 set, and also adding RF_PROTECTED_SECTORS, we get a
706 value that is relative to the partition used for the
707 underlying component.
708 */
709
710 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
711 blkno + offset, va, size);
712
713 out:
714 raidunlock(rs);
715
716 return error;
717 }
718 /* ARGSUSED */
719 int
720 raidopen(dev_t dev, int flags, int fmt,
721 struct lwp *l)
722 {
723 int unit = raidunit(dev);
724 struct raid_softc *rs;
725 struct disklabel *lp;
726 int part, pmask;
727 int error = 0;
728
729 if (unit >= numraid)
730 return (ENXIO);
731 rs = &raid_softc[unit];
732
733 if ((error = raidlock(rs)) != 0)
734 return (error);
735 lp = rs->sc_dkdev.dk_label;
736
737 part = DISKPART(dev);
738
739 /*
740 * If there are wedges, and this is not RAW_PART, then we
741 * need to fail.
742 */
743 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
744 error = EBUSY;
745 goto bad;
746 }
747 pmask = (1 << part);
748
749 if ((rs->sc_flags & RAIDF_INITED) &&
750 (rs->sc_dkdev.dk_openmask == 0))
751 raidgetdisklabel(dev);
752
753 /* make sure that this partition exists */
754
755 if (part != RAW_PART) {
756 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
757 ((part >= lp->d_npartitions) ||
758 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
759 error = ENXIO;
760 goto bad;
761 }
762 }
763 /* Prevent this unit from being unconfigured while open. */
764 switch (fmt) {
765 case S_IFCHR:
766 rs->sc_dkdev.dk_copenmask |= pmask;
767 break;
768
769 case S_IFBLK:
770 rs->sc_dkdev.dk_bopenmask |= pmask;
771 break;
772 }
773
774 if ((rs->sc_dkdev.dk_openmask == 0) &&
775 ((rs->sc_flags & RAIDF_INITED) != 0)) {
776 /* First one... mark things as dirty... Note that we *MUST*
777 have done a configure before this. I DO NOT WANT TO BE
778 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
779 THAT THEY BELONG TOGETHER!!!!! */
780 /* XXX should check to see if we're only open for reading
781 here... If so, we needn't do this, but then need some
782 other way of keeping track of what's happened.. */
783
784 rf_markalldirty( raidPtrs[unit] );
785 }
786
787
788 rs->sc_dkdev.dk_openmask =
789 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
790
791 bad:
792 raidunlock(rs);
793
794 return (error);
795
796
797 }
798 /* ARGSUSED */
799 int
800 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
801 {
802 int unit = raidunit(dev);
803 struct cfdata *cf;
804 struct raid_softc *rs;
805 int error = 0;
806 int part;
807
808 if (unit >= numraid)
809 return (ENXIO);
810 rs = &raid_softc[unit];
811
812 if ((error = raidlock(rs)) != 0)
813 return (error);
814
815 part = DISKPART(dev);
816
817 /* ...that much closer to allowing unconfiguration... */
818 switch (fmt) {
819 case S_IFCHR:
820 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
821 break;
822
823 case S_IFBLK:
824 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
825 break;
826 }
827 rs->sc_dkdev.dk_openmask =
828 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
829
830 if ((rs->sc_dkdev.dk_openmask == 0) &&
831 ((rs->sc_flags & RAIDF_INITED) != 0)) {
832 /* Last one... device is not unconfigured yet.
833 Device shutdown has taken care of setting the
834 clean bits if RAIDF_INITED is not set
835 mark things as clean... */
836
837 rf_update_component_labels(raidPtrs[unit],
838 RF_FINAL_COMPONENT_UPDATE);
839 if (doing_shutdown) {
840 /* last one, and we're going down, so
841 lights out for this RAID set too. */
842 error = rf_Shutdown(raidPtrs[unit]);
843
844 /* It's no longer initialized... */
845 rs->sc_flags &= ~RAIDF_INITED;
846
847 /* detach the device */
848
849 cf = device_cfdata(rs->sc_dev);
850 error = config_detach(rs->sc_dev, DETACH_QUIET);
851 free(cf, M_RAIDFRAME);
852
853 /* Detach the disk. */
854 dkwedge_delall(&rs->sc_dkdev);
855 disk_detach(&rs->sc_dkdev);
856 disk_destroy(&rs->sc_dkdev);
857 }
858 }
859
860 raidunlock(rs);
861 return (0);
862
863 }
864
865 void
866 raidstrategy(struct buf *bp)
867 {
868 int s;
869
870 unsigned int raidID = raidunit(bp->b_dev);
871 RF_Raid_t *raidPtr;
872 struct raid_softc *rs = &raid_softc[raidID];
873 int wlabel;
874
875 if ((rs->sc_flags & RAIDF_INITED) ==0) {
876 bp->b_error = ENXIO;
877 goto done;
878 }
879 if (raidID >= numraid || !raidPtrs[raidID]) {
880 bp->b_error = ENODEV;
881 goto done;
882 }
883 raidPtr = raidPtrs[raidID];
884 if (!raidPtr->valid) {
885 bp->b_error = ENODEV;
886 goto done;
887 }
888 if (bp->b_bcount == 0) {
889 db1_printf(("b_bcount is zero..\n"));
890 goto done;
891 }
892
893 /*
894 * Do bounds checking and adjust transfer. If there's an
895 * error, the bounds check will flag that for us.
896 */
897
898 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
899 if (DISKPART(bp->b_dev) == RAW_PART) {
900 uint64_t size; /* device size in DEV_BSIZE unit */
901
902 if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
903 size = raidPtr->totalSectors <<
904 (raidPtr->logBytesPerSector - DEV_BSHIFT);
905 } else {
906 size = raidPtr->totalSectors >>
907 (DEV_BSHIFT - raidPtr->logBytesPerSector);
908 }
909 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
910 goto done;
911 }
912 } else {
913 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
914 db1_printf(("Bounds check failed!!:%d %d\n",
915 (int) bp->b_blkno, (int) wlabel));
916 goto done;
917 }
918 }
919 s = splbio();
920
921 bp->b_resid = 0;
922
923 /* stuff it onto our queue */
924 BUFQ_PUT(rs->buf_queue, bp);
925
926 /* scheduled the IO to happen at the next convenient time */
927 wakeup(&(raidPtrs[raidID]->iodone));
928
929 splx(s);
930 return;
931
932 done:
933 bp->b_resid = bp->b_bcount;
934 biodone(bp);
935 }
936 /* ARGSUSED */
937 int
938 raidread(dev_t dev, struct uio *uio, int flags)
939 {
940 int unit = raidunit(dev);
941 struct raid_softc *rs;
942
943 if (unit >= numraid)
944 return (ENXIO);
945 rs = &raid_softc[unit];
946
947 if ((rs->sc_flags & RAIDF_INITED) == 0)
948 return (ENXIO);
949
950 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
951
952 }
953 /* ARGSUSED */
954 int
955 raidwrite(dev_t dev, struct uio *uio, int flags)
956 {
957 int unit = raidunit(dev);
958 struct raid_softc *rs;
959
960 if (unit >= numraid)
961 return (ENXIO);
962 rs = &raid_softc[unit];
963
964 if ((rs->sc_flags & RAIDF_INITED) == 0)
965 return (ENXIO);
966
967 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
968
969 }
970
971 int
972 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
973 {
974 int unit = raidunit(dev);
975 int error = 0;
976 int part, pmask, s;
977 struct cfdata *cf;
978 struct raid_softc *rs;
979 RF_Config_t *k_cfg, *u_cfg;
980 RF_Raid_t *raidPtr;
981 RF_RaidDisk_t *diskPtr;
982 RF_AccTotals_t *totals;
983 RF_DeviceConfig_t *d_cfg, **ucfgp;
984 u_char *specific_buf;
985 int retcode = 0;
986 int column;
987 /* int raidid; */
988 struct rf_recon_req *rrcopy, *rr;
989 RF_ComponentLabel_t *clabel;
990 RF_ComponentLabel_t *ci_label;
991 RF_ComponentLabel_t **clabel_ptr;
992 RF_SingleComponent_t *sparePtr,*componentPtr;
993 RF_SingleComponent_t component;
994 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
995 int i, j, d;
996 #ifdef __HAVE_OLD_DISKLABEL
997 struct disklabel newlabel;
998 #endif
999 struct dkwedge_info *dkw;
1000
1001 if (unit >= numraid)
1002 return (ENXIO);
1003 rs = &raid_softc[unit];
1004 raidPtr = raidPtrs[unit];
1005
1006 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1007 (int) DISKPART(dev), (int) unit, cmd));
1008
1009 /* Must be open for writes for these commands... */
1010 switch (cmd) {
1011 #ifdef DIOCGSECTORSIZE
1012 case DIOCGSECTORSIZE:
1013 *(u_int *)data = raidPtr->bytesPerSector;
1014 return 0;
1015 case DIOCGMEDIASIZE:
1016 *(off_t *)data =
1017 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
1018 return 0;
1019 #endif
1020 case DIOCSDINFO:
1021 case DIOCWDINFO:
1022 #ifdef __HAVE_OLD_DISKLABEL
1023 case ODIOCWDINFO:
1024 case ODIOCSDINFO:
1025 #endif
1026 case DIOCWLABEL:
1027 case DIOCAWEDGE:
1028 case DIOCDWEDGE:
1029 case DIOCSSTRATEGY:
1030 if ((flag & FWRITE) == 0)
1031 return (EBADF);
1032 }
1033
1034 /* Must be initialized for these... */
1035 switch (cmd) {
1036 case DIOCGDINFO:
1037 case DIOCSDINFO:
1038 case DIOCWDINFO:
1039 #ifdef __HAVE_OLD_DISKLABEL
1040 case ODIOCGDINFO:
1041 case ODIOCWDINFO:
1042 case ODIOCSDINFO:
1043 case ODIOCGDEFLABEL:
1044 #endif
1045 case DIOCGPART:
1046 case DIOCWLABEL:
1047 case DIOCGDEFLABEL:
1048 case DIOCAWEDGE:
1049 case DIOCDWEDGE:
1050 case DIOCLWEDGES:
1051 case DIOCCACHESYNC:
1052 case RAIDFRAME_SHUTDOWN:
1053 case RAIDFRAME_REWRITEPARITY:
1054 case RAIDFRAME_GET_INFO:
1055 case RAIDFRAME_RESET_ACCTOTALS:
1056 case RAIDFRAME_GET_ACCTOTALS:
1057 case RAIDFRAME_KEEP_ACCTOTALS:
1058 case RAIDFRAME_GET_SIZE:
1059 case RAIDFRAME_FAIL_DISK:
1060 case RAIDFRAME_COPYBACK:
1061 case RAIDFRAME_CHECK_RECON_STATUS:
1062 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1063 case RAIDFRAME_GET_COMPONENT_LABEL:
1064 case RAIDFRAME_SET_COMPONENT_LABEL:
1065 case RAIDFRAME_ADD_HOT_SPARE:
1066 case RAIDFRAME_REMOVE_HOT_SPARE:
1067 case RAIDFRAME_INIT_LABELS:
1068 case RAIDFRAME_REBUILD_IN_PLACE:
1069 case RAIDFRAME_CHECK_PARITY:
1070 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1071 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1072 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1073 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1074 case RAIDFRAME_SET_AUTOCONFIG:
1075 case RAIDFRAME_SET_ROOT:
1076 case RAIDFRAME_DELETE_COMPONENT:
1077 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1078 case RAIDFRAME_PARITYMAP_STATUS:
1079 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1080 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1081 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1082 case DIOCGSTRATEGY:
1083 case DIOCSSTRATEGY:
1084 if ((rs->sc_flags & RAIDF_INITED) == 0)
1085 return (ENXIO);
1086 }
1087
1088 switch (cmd) {
1089
1090 /* configure the system */
1091 case RAIDFRAME_CONFIGURE:
1092
1093 if (raidPtr->valid) {
1094 /* There is a valid RAID set running on this unit! */
1095 printf("raid%d: Device already configured!\n",unit);
1096 return(EINVAL);
1097 }
1098
1099 /* copy-in the configuration information */
1100 /* data points to a pointer to the configuration structure */
1101
1102 u_cfg = *((RF_Config_t **) data);
1103 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1104 if (k_cfg == NULL) {
1105 return (ENOMEM);
1106 }
1107 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1108 if (retcode) {
1109 RF_Free(k_cfg, sizeof(RF_Config_t));
1110 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1111 retcode));
1112 return (retcode);
1113 }
1114 /* allocate a buffer for the layout-specific data, and copy it
1115 * in */
1116 if (k_cfg->layoutSpecificSize) {
1117 if (k_cfg->layoutSpecificSize > 10000) {
1118 /* sanity check */
1119 RF_Free(k_cfg, sizeof(RF_Config_t));
1120 return (EINVAL);
1121 }
1122 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1123 (u_char *));
1124 if (specific_buf == NULL) {
1125 RF_Free(k_cfg, sizeof(RF_Config_t));
1126 return (ENOMEM);
1127 }
1128 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1129 k_cfg->layoutSpecificSize);
1130 if (retcode) {
1131 RF_Free(k_cfg, sizeof(RF_Config_t));
1132 RF_Free(specific_buf,
1133 k_cfg->layoutSpecificSize);
1134 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1135 retcode));
1136 return (retcode);
1137 }
1138 } else
1139 specific_buf = NULL;
1140 k_cfg->layoutSpecific = specific_buf;
1141
1142 /* should do some kind of sanity check on the configuration.
1143 * Store the sum of all the bytes in the last byte? */
1144
1145 /* configure the system */
1146
1147 /*
1148 * Clear the entire RAID descriptor, just to make sure
1149 * there is no stale data left in the case of a
1150 * reconfiguration
1151 */
1152 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
1153 raidPtr->raidid = unit;
1154
1155 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1156
1157 if (retcode == 0) {
1158
1159 /* allow this many simultaneous IO's to
1160 this RAID device */
1161 raidPtr->openings = RAIDOUTSTANDING;
1162
1163 raidinit(raidPtr);
1164 rf_markalldirty(raidPtr);
1165 }
1166 /* free the buffers. No return code here. */
1167 if (k_cfg->layoutSpecificSize) {
1168 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1169 }
1170 RF_Free(k_cfg, sizeof(RF_Config_t));
1171
1172 return (retcode);
1173
1174 /* shutdown the system */
1175 case RAIDFRAME_SHUTDOWN:
1176
1177 if ((error = raidlock(rs)) != 0)
1178 return (error);
1179
1180 /*
1181 * If somebody has a partition mounted, we shouldn't
1182 * shutdown.
1183 */
1184
1185 part = DISKPART(dev);
1186 pmask = (1 << part);
1187 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
1188 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1189 (rs->sc_dkdev.dk_copenmask & pmask))) {
1190 raidunlock(rs);
1191 return (EBUSY);
1192 }
1193
1194 retcode = rf_Shutdown(raidPtr);
1195
1196 /* It's no longer initialized... */
1197 rs->sc_flags &= ~RAIDF_INITED;
1198
1199 /* free the pseudo device attach bits */
1200
1201 cf = device_cfdata(rs->sc_dev);
1202 /* XXX this causes us to not return any errors
1203 from the above call to rf_Shutdown() */
1204 retcode = config_detach(rs->sc_dev, DETACH_QUIET);
1205 free(cf, M_RAIDFRAME);
1206
1207 /* Detach the disk. */
1208 dkwedge_delall(&rs->sc_dkdev);
1209 disk_detach(&rs->sc_dkdev);
1210 disk_destroy(&rs->sc_dkdev);
1211
1212 raidunlock(rs);
1213
1214 return (retcode);
1215 case RAIDFRAME_GET_COMPONENT_LABEL:
1216 clabel_ptr = (RF_ComponentLabel_t **) data;
1217 /* need to read the component label for the disk indicated
1218 by row,column in clabel */
1219
1220 /*
1221 * Perhaps there should be an option to skip the in-core
1222 * copy and hit the disk, as with disklabel(8).
1223 */
1224 RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *));
1225
1226 retcode = copyin( *clabel_ptr, clabel,
1227 sizeof(RF_ComponentLabel_t));
1228
1229 if (retcode) {
1230 return(retcode);
1231 }
1232
1233 clabel->row = 0; /* Don't allow looking at anything else.*/
1234
1235 column = clabel->column;
1236
1237 if ((column < 0) || (column >= raidPtr->numCol +
1238 raidPtr->numSpare)) {
1239 return(EINVAL);
1240 }
1241
1242 RF_Free(clabel, sizeof(*clabel));
1243
1244 clabel = raidget_component_label(raidPtr, column);
1245
1246 if (retcode == 0) {
1247 retcode = copyout(clabel, *clabel_ptr,
1248 sizeof(RF_ComponentLabel_t));
1249 }
1250 return (retcode);
1251
1252 #if 0
1253 case RAIDFRAME_SET_COMPONENT_LABEL:
1254 clabel = (RF_ComponentLabel_t *) data;
1255
1256 /* XXX check the label for valid stuff... */
1257 /* Note that some things *should not* get modified --
1258 the user should be re-initing the labels instead of
1259 trying to patch things.
1260 */
1261
1262 raidid = raidPtr->raidid;
1263 #ifdef DEBUG
1264 printf("raid%d: Got component label:\n", raidid);
1265 printf("raid%d: Version: %d\n", raidid, clabel->version);
1266 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1267 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1268 printf("raid%d: Column: %d\n", raidid, clabel->column);
1269 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1270 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1271 printf("raid%d: Status: %d\n", raidid, clabel->status);
1272 #endif
1273 clabel->row = 0;
1274 column = clabel->column;
1275
1276 if ((column < 0) || (column >= raidPtr->numCol)) {
1277 return(EINVAL);
1278 }
1279
1280 /* XXX this isn't allowed to do anything for now :-) */
1281
1282 /* XXX and before it is, we need to fill in the rest
1283 of the fields!?!?!?! */
1284 memcpy(raidget_component_label(raidPtr, column),
1285 clabel, sizeof(*clabel));
1286 raidflush_component_label(raidPtr, column);
1287 return (0);
1288 #endif
1289
1290 case RAIDFRAME_INIT_LABELS:
1291 clabel = (RF_ComponentLabel_t *) data;
1292 /*
1293 we only want the serial number from
1294 the above. We get all the rest of the information
1295 from the config that was used to create this RAID
1296 set.
1297 */
1298
1299 raidPtr->serial_number = clabel->serial_number;
1300
1301 for(column=0;column<raidPtr->numCol;column++) {
1302 diskPtr = &raidPtr->Disks[column];
1303 if (!RF_DEAD_DISK(diskPtr->status)) {
1304 ci_label = raidget_component_label(raidPtr,
1305 column);
1306 /* Zeroing this is important. */
1307 memset(ci_label, 0, sizeof(*ci_label));
1308 raid_init_component_label(raidPtr, ci_label);
1309 ci_label->serial_number =
1310 raidPtr->serial_number;
1311 ci_label->row = 0; /* we dont' pretend to support more */
1312 rf_component_label_set_partitionsize(ci_label,
1313 diskPtr->partitionSize);
1314 ci_label->column = column;
1315 raidflush_component_label(raidPtr, column);
1316 }
1317 /* XXXjld what about the spares? */
1318 }
1319
1320 return (retcode);
1321 case RAIDFRAME_SET_AUTOCONFIG:
1322 d = rf_set_autoconfig(raidPtr, *(int *) data);
1323 printf("raid%d: New autoconfig value is: %d\n",
1324 raidPtr->raidid, d);
1325 *(int *) data = d;
1326 return (retcode);
1327
1328 case RAIDFRAME_SET_ROOT:
1329 d = rf_set_rootpartition(raidPtr, *(int *) data);
1330 printf("raid%d: New rootpartition value is: %d\n",
1331 raidPtr->raidid, d);
1332 *(int *) data = d;
1333 return (retcode);
1334
1335 /* initialize all parity */
1336 case RAIDFRAME_REWRITEPARITY:
1337
1338 if (raidPtr->Layout.map->faultsTolerated == 0) {
1339 /* Parity for RAID 0 is trivially correct */
1340 raidPtr->parity_good = RF_RAID_CLEAN;
1341 return(0);
1342 }
1343
1344 if (raidPtr->parity_rewrite_in_progress == 1) {
1345 /* Re-write is already in progress! */
1346 return(EINVAL);
1347 }
1348
1349 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1350 rf_RewriteParityThread,
1351 raidPtr,"raid_parity");
1352 return (retcode);
1353
1354
1355 case RAIDFRAME_ADD_HOT_SPARE:
1356 sparePtr = (RF_SingleComponent_t *) data;
1357 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1358 retcode = rf_add_hot_spare(raidPtr, &component);
1359 return(retcode);
1360
1361 case RAIDFRAME_REMOVE_HOT_SPARE:
1362 return(retcode);
1363
1364 case RAIDFRAME_DELETE_COMPONENT:
1365 componentPtr = (RF_SingleComponent_t *)data;
1366 memcpy( &component, componentPtr,
1367 sizeof(RF_SingleComponent_t));
1368 retcode = rf_delete_component(raidPtr, &component);
1369 return(retcode);
1370
1371 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1372 componentPtr = (RF_SingleComponent_t *)data;
1373 memcpy( &component, componentPtr,
1374 sizeof(RF_SingleComponent_t));
1375 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1376 return(retcode);
1377
1378 case RAIDFRAME_REBUILD_IN_PLACE:
1379
1380 if (raidPtr->Layout.map->faultsTolerated == 0) {
1381 /* Can't do this on a RAID 0!! */
1382 return(EINVAL);
1383 }
1384
1385 if (raidPtr->recon_in_progress == 1) {
1386 /* a reconstruct is already in progress! */
1387 return(EINVAL);
1388 }
1389
1390 componentPtr = (RF_SingleComponent_t *) data;
1391 memcpy( &component, componentPtr,
1392 sizeof(RF_SingleComponent_t));
1393 component.row = 0; /* we don't support any more */
1394 column = component.column;
1395
1396 if ((column < 0) || (column >= raidPtr->numCol)) {
1397 return(EINVAL);
1398 }
1399
1400 RF_LOCK_MUTEX(raidPtr->mutex);
1401 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1402 (raidPtr->numFailures > 0)) {
1403 /* XXX 0 above shouldn't be constant!!! */
1404 /* some component other than this has failed.
1405 Let's not make things worse than they already
1406 are... */
1407 printf("raid%d: Unable to reconstruct to disk at:\n",
1408 raidPtr->raidid);
1409 printf("raid%d: Col: %d Too many failures.\n",
1410 raidPtr->raidid, column);
1411 RF_UNLOCK_MUTEX(raidPtr->mutex);
1412 return (EINVAL);
1413 }
1414 if (raidPtr->Disks[column].status ==
1415 rf_ds_reconstructing) {
1416 printf("raid%d: Unable to reconstruct to disk at:\n",
1417 raidPtr->raidid);
1418 printf("raid%d: Col: %d Reconstruction already occuring!\n", raidPtr->raidid, column);
1419
1420 RF_UNLOCK_MUTEX(raidPtr->mutex);
1421 return (EINVAL);
1422 }
1423 if (raidPtr->Disks[column].status == rf_ds_spared) {
1424 RF_UNLOCK_MUTEX(raidPtr->mutex);
1425 return (EINVAL);
1426 }
1427 RF_UNLOCK_MUTEX(raidPtr->mutex);
1428
1429 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1430 if (rrcopy == NULL)
1431 return(ENOMEM);
1432
1433 rrcopy->raidPtr = (void *) raidPtr;
1434 rrcopy->col = column;
1435
1436 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1437 rf_ReconstructInPlaceThread,
1438 rrcopy,"raid_reconip");
1439 return(retcode);
1440
1441 case RAIDFRAME_GET_INFO:
1442 if (!raidPtr->valid)
1443 return (ENODEV);
1444 ucfgp = (RF_DeviceConfig_t **) data;
1445 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1446 (RF_DeviceConfig_t *));
1447 if (d_cfg == NULL)
1448 return (ENOMEM);
1449 d_cfg->rows = 1; /* there is only 1 row now */
1450 d_cfg->cols = raidPtr->numCol;
1451 d_cfg->ndevs = raidPtr->numCol;
1452 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1453 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1454 return (ENOMEM);
1455 }
1456 d_cfg->nspares = raidPtr->numSpare;
1457 if (d_cfg->nspares >= RF_MAX_DISKS) {
1458 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1459 return (ENOMEM);
1460 }
1461 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1462 d = 0;
1463 for (j = 0; j < d_cfg->cols; j++) {
1464 d_cfg->devs[d] = raidPtr->Disks[j];
1465 d++;
1466 }
1467 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1468 d_cfg->spares[i] = raidPtr->Disks[j];
1469 }
1470 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1471 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1472
1473 return (retcode);
1474
1475 case RAIDFRAME_CHECK_PARITY:
1476 *(int *) data = raidPtr->parity_good;
1477 return (0);
1478
1479 case RAIDFRAME_PARITYMAP_STATUS:
1480 rf_paritymap_status(raidPtr->parity_map,
1481 (struct rf_pmstat *)data);
1482 return 0;
1483
1484 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1485 if (raidPtr->parity_map == NULL)
1486 return ENOENT; /* ??? */
1487 if (0 != rf_paritymap_set_params(raidPtr->parity_map,
1488 (struct rf_pmparams *)data, 1))
1489 return EINVAL;
1490 return 0;
1491
1492 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1493 *(int *) data = rf_paritymap_get_disable(raidPtr);
1494 return 0;
1495
1496 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1497 rf_paritymap_set_disable(raidPtr, *(int *)data);
1498 /* XXX should errors be passed up? */
1499 return 0;
1500
1501 case RAIDFRAME_RESET_ACCTOTALS:
1502 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1503 return (0);
1504
1505 case RAIDFRAME_GET_ACCTOTALS:
1506 totals = (RF_AccTotals_t *) data;
1507 *totals = raidPtr->acc_totals;
1508 return (0);
1509
1510 case RAIDFRAME_KEEP_ACCTOTALS:
1511 raidPtr->keep_acc_totals = *(int *)data;
1512 return (0);
1513
1514 case RAIDFRAME_GET_SIZE:
1515 *(int *) data = raidPtr->totalSectors;
1516 return (0);
1517
1518 /* fail a disk & optionally start reconstruction */
1519 case RAIDFRAME_FAIL_DISK:
1520
1521 if (raidPtr->Layout.map->faultsTolerated == 0) {
1522 /* Can't do this on a RAID 0!! */
1523 return(EINVAL);
1524 }
1525
1526 rr = (struct rf_recon_req *) data;
1527 rr->row = 0;
1528 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1529 return (EINVAL);
1530
1531
1532 RF_LOCK_MUTEX(raidPtr->mutex);
1533 if (raidPtr->status == rf_rs_reconstructing) {
1534 /* you can't fail a disk while we're reconstructing! */
1535 /* XXX wrong for RAID6 */
1536 RF_UNLOCK_MUTEX(raidPtr->mutex);
1537 return (EINVAL);
1538 }
1539 if ((raidPtr->Disks[rr->col].status ==
1540 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1541 /* some other component has failed. Let's not make
1542 things worse. XXX wrong for RAID6 */
1543 RF_UNLOCK_MUTEX(raidPtr->mutex);
1544 return (EINVAL);
1545 }
1546 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1547 /* Can't fail a spared disk! */
1548 RF_UNLOCK_MUTEX(raidPtr->mutex);
1549 return (EINVAL);
1550 }
1551 RF_UNLOCK_MUTEX(raidPtr->mutex);
1552
1553 /* make a copy of the recon request so that we don't rely on
1554 * the user's buffer */
1555 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1556 if (rrcopy == NULL)
1557 return(ENOMEM);
1558 memcpy(rrcopy, rr, sizeof(*rr));
1559 rrcopy->raidPtr = (void *) raidPtr;
1560
1561 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1562 rf_ReconThread,
1563 rrcopy,"raid_recon");
1564 return (0);
1565
1566 /* invoke a copyback operation after recon on whatever disk
1567 * needs it, if any */
1568 case RAIDFRAME_COPYBACK:
1569
1570 if (raidPtr->Layout.map->faultsTolerated == 0) {
1571 /* This makes no sense on a RAID 0!! */
1572 return(EINVAL);
1573 }
1574
1575 if (raidPtr->copyback_in_progress == 1) {
1576 /* Copyback is already in progress! */
1577 return(EINVAL);
1578 }
1579
1580 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1581 rf_CopybackThread,
1582 raidPtr,"raid_copyback");
1583 return (retcode);
1584
1585 /* return the percentage completion of reconstruction */
1586 case RAIDFRAME_CHECK_RECON_STATUS:
1587 if (raidPtr->Layout.map->faultsTolerated == 0) {
1588 /* This makes no sense on a RAID 0, so tell the
1589 user it's done. */
1590 *(int *) data = 100;
1591 return(0);
1592 }
1593 if (raidPtr->status != rf_rs_reconstructing)
1594 *(int *) data = 100;
1595 else {
1596 if (raidPtr->reconControl->numRUsTotal > 0) {
1597 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1598 } else {
1599 *(int *) data = 0;
1600 }
1601 }
1602 return (0);
1603 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1604 progressInfoPtr = (RF_ProgressInfo_t **) data;
1605 if (raidPtr->status != rf_rs_reconstructing) {
1606 progressInfo.remaining = 0;
1607 progressInfo.completed = 100;
1608 progressInfo.total = 100;
1609 } else {
1610 progressInfo.total =
1611 raidPtr->reconControl->numRUsTotal;
1612 progressInfo.completed =
1613 raidPtr->reconControl->numRUsComplete;
1614 progressInfo.remaining = progressInfo.total -
1615 progressInfo.completed;
1616 }
1617 retcode = copyout(&progressInfo, *progressInfoPtr,
1618 sizeof(RF_ProgressInfo_t));
1619 return (retcode);
1620
1621 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1622 if (raidPtr->Layout.map->faultsTolerated == 0) {
1623 /* This makes no sense on a RAID 0, so tell the
1624 user it's done. */
1625 *(int *) data = 100;
1626 return(0);
1627 }
1628 if (raidPtr->parity_rewrite_in_progress == 1) {
1629 *(int *) data = 100 *
1630 raidPtr->parity_rewrite_stripes_done /
1631 raidPtr->Layout.numStripe;
1632 } else {
1633 *(int *) data = 100;
1634 }
1635 return (0);
1636
1637 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1638 progressInfoPtr = (RF_ProgressInfo_t **) data;
1639 if (raidPtr->parity_rewrite_in_progress == 1) {
1640 progressInfo.total = raidPtr->Layout.numStripe;
1641 progressInfo.completed =
1642 raidPtr->parity_rewrite_stripes_done;
1643 progressInfo.remaining = progressInfo.total -
1644 progressInfo.completed;
1645 } else {
1646 progressInfo.remaining = 0;
1647 progressInfo.completed = 100;
1648 progressInfo.total = 100;
1649 }
1650 retcode = copyout(&progressInfo, *progressInfoPtr,
1651 sizeof(RF_ProgressInfo_t));
1652 return (retcode);
1653
1654 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1655 if (raidPtr->Layout.map->faultsTolerated == 0) {
1656 /* This makes no sense on a RAID 0 */
1657 *(int *) data = 100;
1658 return(0);
1659 }
1660 if (raidPtr->copyback_in_progress == 1) {
1661 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1662 raidPtr->Layout.numStripe;
1663 } else {
1664 *(int *) data = 100;
1665 }
1666 return (0);
1667
1668 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1669 progressInfoPtr = (RF_ProgressInfo_t **) data;
1670 if (raidPtr->copyback_in_progress == 1) {
1671 progressInfo.total = raidPtr->Layout.numStripe;
1672 progressInfo.completed =
1673 raidPtr->copyback_stripes_done;
1674 progressInfo.remaining = progressInfo.total -
1675 progressInfo.completed;
1676 } else {
1677 progressInfo.remaining = 0;
1678 progressInfo.completed = 100;
1679 progressInfo.total = 100;
1680 }
1681 retcode = copyout(&progressInfo, *progressInfoPtr,
1682 sizeof(RF_ProgressInfo_t));
1683 return (retcode);
1684
1685 /* the sparetable daemon calls this to wait for the kernel to
1686 * need a spare table. this ioctl does not return until a
1687 * spare table is needed. XXX -- calling mpsleep here in the
1688 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1689 * -- I should either compute the spare table in the kernel,
1690 * or have a different -- XXX XXX -- interface (a different
1691 * character device) for delivering the table -- XXX */
1692 #if 0
1693 case RAIDFRAME_SPARET_WAIT:
1694 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1695 while (!rf_sparet_wait_queue)
1696 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1697 waitreq = rf_sparet_wait_queue;
1698 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1699 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1700
1701 /* structure assignment */
1702 *((RF_SparetWait_t *) data) = *waitreq;
1703
1704 RF_Free(waitreq, sizeof(*waitreq));
1705 return (0);
1706
1707 /* wakes up a process waiting on SPARET_WAIT and puts an error
1708 * code in it that will cause the dameon to exit */
1709 case RAIDFRAME_ABORT_SPARET_WAIT:
1710 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1711 waitreq->fcol = -1;
1712 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1713 waitreq->next = rf_sparet_wait_queue;
1714 rf_sparet_wait_queue = waitreq;
1715 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1716 wakeup(&rf_sparet_wait_queue);
1717 return (0);
1718
1719 /* used by the spare table daemon to deliver a spare table
1720 * into the kernel */
1721 case RAIDFRAME_SEND_SPARET:
1722
1723 /* install the spare table */
1724 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1725
1726 /* respond to the requestor. the return status of the spare
1727 * table installation is passed in the "fcol" field */
1728 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1729 waitreq->fcol = retcode;
1730 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1731 waitreq->next = rf_sparet_resp_queue;
1732 rf_sparet_resp_queue = waitreq;
1733 wakeup(&rf_sparet_resp_queue);
1734 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1735
1736 return (retcode);
1737 #endif
1738
1739 default:
1740 break; /* fall through to the os-specific code below */
1741
1742 }
1743
1744 if (!raidPtr->valid)
1745 return (EINVAL);
1746
1747 /*
1748 * Add support for "regular" device ioctls here.
1749 */
1750
1751 switch (cmd) {
1752 case DIOCGDINFO:
1753 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1754 break;
1755 #ifdef __HAVE_OLD_DISKLABEL
1756 case ODIOCGDINFO:
1757 newlabel = *(rs->sc_dkdev.dk_label);
1758 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1759 return ENOTTY;
1760 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1761 break;
1762 #endif
1763
1764 case DIOCGPART:
1765 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1766 ((struct partinfo *) data)->part =
1767 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1768 break;
1769
1770 case DIOCWDINFO:
1771 case DIOCSDINFO:
1772 #ifdef __HAVE_OLD_DISKLABEL
1773 case ODIOCWDINFO:
1774 case ODIOCSDINFO:
1775 #endif
1776 {
1777 struct disklabel *lp;
1778 #ifdef __HAVE_OLD_DISKLABEL
1779 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1780 memset(&newlabel, 0, sizeof newlabel);
1781 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1782 lp = &newlabel;
1783 } else
1784 #endif
1785 lp = (struct disklabel *)data;
1786
1787 if ((error = raidlock(rs)) != 0)
1788 return (error);
1789
1790 rs->sc_flags |= RAIDF_LABELLING;
1791
1792 error = setdisklabel(rs->sc_dkdev.dk_label,
1793 lp, 0, rs->sc_dkdev.dk_cpulabel);
1794 if (error == 0) {
1795 if (cmd == DIOCWDINFO
1796 #ifdef __HAVE_OLD_DISKLABEL
1797 || cmd == ODIOCWDINFO
1798 #endif
1799 )
1800 error = writedisklabel(RAIDLABELDEV(dev),
1801 raidstrategy, rs->sc_dkdev.dk_label,
1802 rs->sc_dkdev.dk_cpulabel);
1803 }
1804 rs->sc_flags &= ~RAIDF_LABELLING;
1805
1806 raidunlock(rs);
1807
1808 if (error)
1809 return (error);
1810 break;
1811 }
1812
1813 case DIOCWLABEL:
1814 if (*(int *) data != 0)
1815 rs->sc_flags |= RAIDF_WLABEL;
1816 else
1817 rs->sc_flags &= ~RAIDF_WLABEL;
1818 break;
1819
1820 case DIOCGDEFLABEL:
1821 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1822 break;
1823
1824 #ifdef __HAVE_OLD_DISKLABEL
1825 case ODIOCGDEFLABEL:
1826 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1827 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1828 return ENOTTY;
1829 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1830 break;
1831 #endif
1832
1833 case DIOCAWEDGE:
1834 case DIOCDWEDGE:
1835 dkw = (void *)data;
1836
1837 /* If the ioctl happens here, the parent is us. */
1838 (void)strcpy(dkw->dkw_parent, rs->sc_xname);
1839 return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw);
1840
1841 case DIOCLWEDGES:
1842 return dkwedge_list(&rs->sc_dkdev,
1843 (struct dkwedge_list *)data, l);
1844 case DIOCCACHESYNC:
1845 return rf_sync_component_caches(raidPtr);
1846
1847 case DIOCGSTRATEGY:
1848 {
1849 struct disk_strategy *dks = (void *)data;
1850
1851 s = splbio();
1852 strlcpy(dks->dks_name, bufq_getstrategyname(rs->buf_queue),
1853 sizeof(dks->dks_name));
1854 splx(s);
1855 dks->dks_paramlen = 0;
1856
1857 return 0;
1858 }
1859
1860 case DIOCSSTRATEGY:
1861 {
1862 struct disk_strategy *dks = (void *)data;
1863 struct bufq_state *new;
1864 struct bufq_state *old;
1865
1866 if (dks->dks_param != NULL) {
1867 return EINVAL;
1868 }
1869 dks->dks_name[sizeof(dks->dks_name) - 1] = 0; /* ensure term */
1870 error = bufq_alloc(&new, dks->dks_name,
1871 BUFQ_EXACT|BUFQ_SORT_RAWBLOCK);
1872 if (error) {
1873 return error;
1874 }
1875 s = splbio();
1876 old = rs->buf_queue;
1877 bufq_move(new, old);
1878 rs->buf_queue = new;
1879 splx(s);
1880 bufq_free(old);
1881
1882 return 0;
1883 }
1884
1885 default:
1886 retcode = ENOTTY;
1887 }
1888 return (retcode);
1889
1890 }
1891
1892
1893 /* raidinit -- complete the rest of the initialization for the
1894 RAIDframe device. */
1895
1896
1897 static void
1898 raidinit(RF_Raid_t *raidPtr)
1899 {
1900 struct cfdata *cf;
1901 struct raid_softc *rs;
1902 int unit;
1903
1904 unit = raidPtr->raidid;
1905
1906 rs = &raid_softc[unit];
1907
1908 /* XXX should check return code first... */
1909 rs->sc_flags |= RAIDF_INITED;
1910
1911 /* XXX doesn't check bounds. */
1912 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1913
1914 /* attach the pseudo device */
1915 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1916 cf->cf_name = raid_cd.cd_name;
1917 cf->cf_atname = raid_cd.cd_name;
1918 cf->cf_unit = unit;
1919 cf->cf_fstate = FSTATE_STAR;
1920
1921 rs->sc_dev = config_attach_pseudo(cf);
1922
1923 if (rs->sc_dev==NULL) {
1924 printf("raid%d: config_attach_pseudo failed\n",
1925 raidPtr->raidid);
1926 }
1927
1928 /* disk_attach actually creates space for the CPU disklabel, among
1929 * other things, so it's critical to call this *BEFORE* we try putzing
1930 * with disklabels. */
1931
1932 disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1933 disk_attach(&rs->sc_dkdev);
1934 disk_blocksize(&rs->sc_dkdev, raidPtr->bytesPerSector);
1935
1936 /* XXX There may be a weird interaction here between this, and
1937 * protectedSectors, as used in RAIDframe. */
1938
1939 rs->sc_size = raidPtr->totalSectors;
1940
1941 dkwedge_discover(&rs->sc_dkdev);
1942
1943 rf_set_properties(rs, raidPtr);
1944
1945 }
1946 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1947 /* wake up the daemon & tell it to get us a spare table
1948 * XXX
1949 * the entries in the queues should be tagged with the raidPtr
1950 * so that in the extremely rare case that two recons happen at once,
1951 * we know for which device were requesting a spare table
1952 * XXX
1953 *
1954 * XXX This code is not currently used. GO
1955 */
1956 int
1957 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1958 {
1959 int retcode;
1960
1961 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1962 req->next = rf_sparet_wait_queue;
1963 rf_sparet_wait_queue = req;
1964 wakeup(&rf_sparet_wait_queue);
1965
1966 /* mpsleep unlocks the mutex */
1967 while (!rf_sparet_resp_queue) {
1968 tsleep(&rf_sparet_resp_queue, PRIBIO,
1969 "raidframe getsparetable", 0);
1970 }
1971 req = rf_sparet_resp_queue;
1972 rf_sparet_resp_queue = req->next;
1973 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1974
1975 retcode = req->fcol;
1976 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1977 * alloc'd */
1978 return (retcode);
1979 }
1980 #endif
1981
1982 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1983 * bp & passes it down.
1984 * any calls originating in the kernel must use non-blocking I/O
1985 * do some extra sanity checking to return "appropriate" error values for
1986 * certain conditions (to make some standard utilities work)
1987 *
1988 * Formerly known as: rf_DoAccessKernel
1989 */
1990 void
1991 raidstart(RF_Raid_t *raidPtr)
1992 {
1993 RF_SectorCount_t num_blocks, pb, sum;
1994 RF_RaidAddr_t raid_addr;
1995 struct partition *pp;
1996 daddr_t blocknum;
1997 int unit;
1998 struct raid_softc *rs;
1999 int do_async;
2000 struct buf *bp;
2001 int rc;
2002
2003 unit = raidPtr->raidid;
2004 rs = &raid_softc[unit];
2005
2006 /* quick check to see if anything has died recently */
2007 RF_LOCK_MUTEX(raidPtr->mutex);
2008 if (raidPtr->numNewFailures > 0) {
2009 RF_UNLOCK_MUTEX(raidPtr->mutex);
2010 rf_update_component_labels(raidPtr,
2011 RF_NORMAL_COMPONENT_UPDATE);
2012 RF_LOCK_MUTEX(raidPtr->mutex);
2013 raidPtr->numNewFailures--;
2014 }
2015
2016 /* Check to see if we're at the limit... */
2017 while (raidPtr->openings > 0) {
2018 RF_UNLOCK_MUTEX(raidPtr->mutex);
2019
2020 /* get the next item, if any, from the queue */
2021 if ((bp = BUFQ_GET(rs->buf_queue)) == NULL) {
2022 /* nothing more to do */
2023 return;
2024 }
2025
2026 /* Ok, for the bp we have here, bp->b_blkno is relative to the
2027 * partition.. Need to make it absolute to the underlying
2028 * device.. */
2029
2030 blocknum = bp->b_blkno << DEV_BSHIFT >> raidPtr->logBytesPerSector;
2031 if (DISKPART(bp->b_dev) != RAW_PART) {
2032 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
2033 blocknum += pp->p_offset;
2034 }
2035
2036 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
2037 (int) blocknum));
2038
2039 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
2040 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
2041
2042 /* *THIS* is where we adjust what block we're going to...
2043 * but DO NOT TOUCH bp->b_blkno!!! */
2044 raid_addr = blocknum;
2045
2046 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
2047 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
2048 sum = raid_addr + num_blocks + pb;
2049 if (1 || rf_debugKernelAccess) {
2050 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
2051 (int) raid_addr, (int) sum, (int) num_blocks,
2052 (int) pb, (int) bp->b_resid));
2053 }
2054 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
2055 || (sum < num_blocks) || (sum < pb)) {
2056 bp->b_error = ENOSPC;
2057 bp->b_resid = bp->b_bcount;
2058 biodone(bp);
2059 RF_LOCK_MUTEX(raidPtr->mutex);
2060 continue;
2061 }
2062 /*
2063 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
2064 */
2065
2066 if (bp->b_bcount & raidPtr->sectorMask) {
2067 bp->b_error = EINVAL;
2068 bp->b_resid = bp->b_bcount;
2069 biodone(bp);
2070 RF_LOCK_MUTEX(raidPtr->mutex);
2071 continue;
2072
2073 }
2074 db1_printf(("Calling DoAccess..\n"));
2075
2076
2077 RF_LOCK_MUTEX(raidPtr->mutex);
2078 raidPtr->openings--;
2079 RF_UNLOCK_MUTEX(raidPtr->mutex);
2080
2081 /*
2082 * Everything is async.
2083 */
2084 do_async = 1;
2085
2086 disk_busy(&rs->sc_dkdev);
2087
2088 /* XXX we're still at splbio() here... do we *really*
2089 need to be? */
2090
2091 /* don't ever condition on bp->b_flags & B_WRITE.
2092 * always condition on B_READ instead */
2093
2094 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
2095 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
2096 do_async, raid_addr, num_blocks,
2097 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
2098
2099 if (rc) {
2100 bp->b_error = rc;
2101 bp->b_resid = bp->b_bcount;
2102 biodone(bp);
2103 /* continue loop */
2104 }
2105
2106 RF_LOCK_MUTEX(raidPtr->mutex);
2107 }
2108 RF_UNLOCK_MUTEX(raidPtr->mutex);
2109 }
2110
2111
2112
2113
2114 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
2115
2116 int
2117 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
2118 {
2119 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2120 struct buf *bp;
2121
2122 req->queue = queue;
2123
2124 #if DIAGNOSTIC
2125 if (queue->raidPtr->raidid >= numraid) {
2126 printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
2127 numraid);
2128 panic("Invalid Unit number in rf_DispatchKernelIO");
2129 }
2130 #endif
2131
2132 bp = req->bp;
2133
2134 switch (req->type) {
2135 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2136 /* XXX need to do something extra here.. */
2137 /* I'm leaving this in, as I've never actually seen it used,
2138 * and I'd like folks to report it... GO */
2139 printf(("WAKEUP CALLED\n"));
2140 queue->numOutstanding++;
2141
2142 bp->b_flags = 0;
2143 bp->b_private = req;
2144
2145 KernelWakeupFunc(bp);
2146 break;
2147
2148 case RF_IO_TYPE_READ:
2149 case RF_IO_TYPE_WRITE:
2150 #if RF_ACC_TRACE > 0
2151 if (req->tracerec) {
2152 RF_ETIMER_START(req->tracerec->timer);
2153 }
2154 #endif
2155 InitBP(bp, queue->rf_cinfo->ci_vp,
2156 op, queue->rf_cinfo->ci_dev,
2157 req->sectorOffset, req->numSector,
2158 req->buf, KernelWakeupFunc, (void *) req,
2159 queue->raidPtr->logBytesPerSector, req->b_proc);
2160
2161 if (rf_debugKernelAccess) {
2162 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2163 (long) bp->b_blkno));
2164 }
2165 queue->numOutstanding++;
2166 queue->last_deq_sector = req->sectorOffset;
2167 /* acc wouldn't have been let in if there were any pending
2168 * reqs at any other priority */
2169 queue->curPriority = req->priority;
2170
2171 db1_printf(("Going for %c to unit %d col %d\n",
2172 req->type, queue->raidPtr->raidid,
2173 queue->col));
2174 db1_printf(("sector %d count %d (%d bytes) %d\n",
2175 (int) req->sectorOffset, (int) req->numSector,
2176 (int) (req->numSector <<
2177 queue->raidPtr->logBytesPerSector),
2178 (int) queue->raidPtr->logBytesPerSector));
2179
2180 /*
2181 * XXX: drop lock here since this can block at
2182 * least with backing SCSI devices. Retake it
2183 * to minimize fuss with calling interfaces.
2184 */
2185
2186 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2187 bdev_strategy(bp);
2188 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2189 break;
2190
2191 default:
2192 panic("bad req->type in rf_DispatchKernelIO");
2193 }
2194 db1_printf(("Exiting from DispatchKernelIO\n"));
2195
2196 return (0);
2197 }
2198 /* this is the callback function associated with a I/O invoked from
2199 kernel code.
2200 */
2201 static void
2202 KernelWakeupFunc(struct buf *bp)
2203 {
2204 RF_DiskQueueData_t *req = NULL;
2205 RF_DiskQueue_t *queue;
2206 int s;
2207
2208 s = splbio();
2209 db1_printf(("recovering the request queue:\n"));
2210 req = bp->b_private;
2211
2212 queue = (RF_DiskQueue_t *) req->queue;
2213
2214 #if RF_ACC_TRACE > 0
2215 if (req->tracerec) {
2216 RF_ETIMER_STOP(req->tracerec->timer);
2217 RF_ETIMER_EVAL(req->tracerec->timer);
2218 RF_LOCK_MUTEX(rf_tracing_mutex);
2219 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2220 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2221 req->tracerec->num_phys_ios++;
2222 RF_UNLOCK_MUTEX(rf_tracing_mutex);
2223 }
2224 #endif
2225
2226 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2227 * ballistic, and mark the component as hosed... */
2228
2229 if (bp->b_error != 0) {
2230 /* Mark the disk as dead */
2231 /* but only mark it once... */
2232 /* and only if it wouldn't leave this RAID set
2233 completely broken */
2234 if (((queue->raidPtr->Disks[queue->col].status ==
2235 rf_ds_optimal) ||
2236 (queue->raidPtr->Disks[queue->col].status ==
2237 rf_ds_used_spare)) &&
2238 (queue->raidPtr->numFailures <
2239 queue->raidPtr->Layout.map->faultsTolerated)) {
2240 printf("raid%d: IO Error. Marking %s as failed.\n",
2241 queue->raidPtr->raidid,
2242 queue->raidPtr->Disks[queue->col].devname);
2243 queue->raidPtr->Disks[queue->col].status =
2244 rf_ds_failed;
2245 queue->raidPtr->status = rf_rs_degraded;
2246 queue->raidPtr->numFailures++;
2247 queue->raidPtr->numNewFailures++;
2248 } else { /* Disk is already dead... */
2249 /* printf("Disk already marked as dead!\n"); */
2250 }
2251
2252 }
2253
2254 /* Fill in the error value */
2255
2256 req->error = bp->b_error;
2257
2258 simple_lock(&queue->raidPtr->iodone_lock);
2259
2260 /* Drop this one on the "finished" queue... */
2261 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2262
2263 /* Let the raidio thread know there is work to be done. */
2264 wakeup(&(queue->raidPtr->iodone));
2265
2266 simple_unlock(&queue->raidPtr->iodone_lock);
2267
2268 splx(s);
2269 }
2270
2271
2272
2273 /*
2274 * initialize a buf structure for doing an I/O in the kernel.
2275 */
2276 static void
2277 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2278 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2279 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2280 struct proc *b_proc)
2281 {
2282 /* bp->b_flags = B_PHYS | rw_flag; */
2283 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */
2284 bp->b_oflags = 0;
2285 bp->b_cflags = 0;
2286 bp->b_bcount = numSect << logBytesPerSector;
2287 bp->b_bufsize = bp->b_bcount;
2288 bp->b_error = 0;
2289 bp->b_dev = dev;
2290 bp->b_data = bf;
2291 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2292 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2293 if (bp->b_bcount == 0) {
2294 panic("bp->b_bcount is zero in InitBP!!");
2295 }
2296 bp->b_proc = b_proc;
2297 bp->b_iodone = cbFunc;
2298 bp->b_private = cbArg;
2299 }
2300
2301 static void
2302 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2303 struct disklabel *lp)
2304 {
2305 memset(lp, 0, sizeof(*lp));
2306
2307 /* fabricate a label... */
2308 lp->d_secperunit = raidPtr->totalSectors;
2309 lp->d_secsize = raidPtr->bytesPerSector;
2310 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2311 lp->d_ntracks = 4 * raidPtr->numCol;
2312 lp->d_ncylinders = raidPtr->totalSectors /
2313 (lp->d_nsectors * lp->d_ntracks);
2314 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2315
2316 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2317 lp->d_type = DTYPE_RAID;
2318 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2319 lp->d_rpm = 3600;
2320 lp->d_interleave = 1;
2321 lp->d_flags = 0;
2322
2323 lp->d_partitions[RAW_PART].p_offset = 0;
2324 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2325 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2326 lp->d_npartitions = RAW_PART + 1;
2327
2328 lp->d_magic = DISKMAGIC;
2329 lp->d_magic2 = DISKMAGIC;
2330 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2331
2332 }
2333 /*
2334 * Read the disklabel from the raid device. If one is not present, fake one
2335 * up.
2336 */
2337 static void
2338 raidgetdisklabel(dev_t dev)
2339 {
2340 int unit = raidunit(dev);
2341 struct raid_softc *rs = &raid_softc[unit];
2342 const char *errstring;
2343 struct disklabel *lp = rs->sc_dkdev.dk_label;
2344 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2345 RF_Raid_t *raidPtr;
2346
2347 db1_printf(("Getting the disklabel...\n"));
2348
2349 memset(clp, 0, sizeof(*clp));
2350
2351 raidPtr = raidPtrs[unit];
2352
2353 raidgetdefaultlabel(raidPtr, rs, lp);
2354
2355 /*
2356 * Call the generic disklabel extraction routine.
2357 */
2358 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2359 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2360 if (errstring)
2361 raidmakedisklabel(rs);
2362 else {
2363 int i;
2364 struct partition *pp;
2365
2366 /*
2367 * Sanity check whether the found disklabel is valid.
2368 *
2369 * This is necessary since total size of the raid device
2370 * may vary when an interleave is changed even though exactly
2371 * same components are used, and old disklabel may used
2372 * if that is found.
2373 */
2374 if (lp->d_secperunit != rs->sc_size)
2375 printf("raid%d: WARNING: %s: "
2376 "total sector size in disklabel (%" PRIu32 ") != "
2377 "the size of raid (%" PRIu64 ")\n", unit, rs->sc_xname,
2378 lp->d_secperunit, rs->sc_size);
2379 for (i = 0; i < lp->d_npartitions; i++) {
2380 pp = &lp->d_partitions[i];
2381 if (pp->p_offset + pp->p_size > rs->sc_size)
2382 printf("raid%d: WARNING: %s: end of partition `%c' "
2383 "exceeds the size of raid (%" PRIu64 ")\n",
2384 unit, rs->sc_xname, 'a' + i, rs->sc_size);
2385 }
2386 }
2387
2388 }
2389 /*
2390 * Take care of things one might want to take care of in the event
2391 * that a disklabel isn't present.
2392 */
2393 static void
2394 raidmakedisklabel(struct raid_softc *rs)
2395 {
2396 struct disklabel *lp = rs->sc_dkdev.dk_label;
2397 db1_printf(("Making a label..\n"));
2398
2399 /*
2400 * For historical reasons, if there's no disklabel present
2401 * the raw partition must be marked FS_BSDFFS.
2402 */
2403
2404 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2405
2406 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2407
2408 lp->d_checksum = dkcksum(lp);
2409 }
2410 /*
2411 * Wait interruptibly for an exclusive lock.
2412 *
2413 * XXX
2414 * Several drivers do this; it should be abstracted and made MP-safe.
2415 * (Hmm... where have we seen this warning before :-> GO )
2416 */
2417 static int
2418 raidlock(struct raid_softc *rs)
2419 {
2420 int error;
2421
2422 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2423 rs->sc_flags |= RAIDF_WANTED;
2424 if ((error =
2425 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2426 return (error);
2427 }
2428 rs->sc_flags |= RAIDF_LOCKED;
2429 return (0);
2430 }
2431 /*
2432 * Unlock and wake up any waiters.
2433 */
2434 static void
2435 raidunlock(struct raid_softc *rs)
2436 {
2437
2438 rs->sc_flags &= ~RAIDF_LOCKED;
2439 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2440 rs->sc_flags &= ~RAIDF_WANTED;
2441 wakeup(rs);
2442 }
2443 }
2444
2445
2446 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2447 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2448 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2449
2450 static daddr_t
2451 rf_component_info_offset(void)
2452 {
2453
2454 return RF_COMPONENT_INFO_OFFSET;
2455 }
2456
2457 static daddr_t
2458 rf_component_info_size(unsigned secsize)
2459 {
2460 daddr_t info_size;
2461
2462 KASSERT(secsize);
2463 if (secsize > RF_COMPONENT_INFO_SIZE)
2464 info_size = secsize;
2465 else
2466 info_size = RF_COMPONENT_INFO_SIZE;
2467
2468 return info_size;
2469 }
2470
2471 static daddr_t
2472 rf_parity_map_offset(RF_Raid_t *raidPtr)
2473 {
2474 daddr_t map_offset;
2475
2476 KASSERT(raidPtr->bytesPerSector);
2477 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2478 map_offset = raidPtr->bytesPerSector;
2479 else
2480 map_offset = RF_COMPONENT_INFO_SIZE;
2481 map_offset += rf_component_info_offset();
2482
2483 return map_offset;
2484 }
2485
2486 static daddr_t
2487 rf_parity_map_size(RF_Raid_t *raidPtr)
2488 {
2489 daddr_t map_size;
2490
2491 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2492 map_size = raidPtr->bytesPerSector;
2493 else
2494 map_size = RF_PARITY_MAP_SIZE;
2495
2496 return map_size;
2497 }
2498
2499 int
2500 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2501 {
2502 RF_ComponentLabel_t *clabel;
2503
2504 clabel = raidget_component_label(raidPtr, col);
2505 clabel->clean = RF_RAID_CLEAN;
2506 raidflush_component_label(raidPtr, col);
2507 return(0);
2508 }
2509
2510
2511 int
2512 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2513 {
2514 RF_ComponentLabel_t *clabel;
2515
2516 clabel = raidget_component_label(raidPtr, col);
2517 clabel->clean = RF_RAID_DIRTY;
2518 raidflush_component_label(raidPtr, col);
2519 return(0);
2520 }
2521
2522 int
2523 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2524 {
2525 KASSERT(raidPtr->bytesPerSector);
2526 return raidread_component_label(raidPtr->bytesPerSector,
2527 raidPtr->Disks[col].dev,
2528 raidPtr->raid_cinfo[col].ci_vp,
2529 &raidPtr->raid_cinfo[col].ci_label);
2530 }
2531
2532 RF_ComponentLabel_t *
2533 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2534 {
2535 return &raidPtr->raid_cinfo[col].ci_label;
2536 }
2537
2538 int
2539 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2540 {
2541 RF_ComponentLabel_t *label;
2542
2543 label = &raidPtr->raid_cinfo[col].ci_label;
2544 label->mod_counter = raidPtr->mod_counter;
2545 #ifndef RF_NO_PARITY_MAP
2546 label->parity_map_modcount = label->mod_counter;
2547 #endif
2548 return raidwrite_component_label(raidPtr->bytesPerSector,
2549 raidPtr->Disks[col].dev,
2550 raidPtr->raid_cinfo[col].ci_vp, label);
2551 }
2552
2553
2554 static int
2555 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2556 RF_ComponentLabel_t *clabel)
2557 {
2558 return raidread_component_area(dev, b_vp, clabel,
2559 sizeof(RF_ComponentLabel_t),
2560 rf_component_info_offset(),
2561 rf_component_info_size(secsize));
2562 }
2563
2564 /* ARGSUSED */
2565 static int
2566 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2567 size_t msize, daddr_t offset, daddr_t dsize)
2568 {
2569 struct buf *bp;
2570 const struct bdevsw *bdev;
2571 int error;
2572
2573 /* XXX should probably ensure that we don't try to do this if
2574 someone has changed rf_protected_sectors. */
2575
2576 if (b_vp == NULL) {
2577 /* For whatever reason, this component is not valid.
2578 Don't try to read a component label from it. */
2579 return(EINVAL);
2580 }
2581
2582 /* get a block of the appropriate size... */
2583 bp = geteblk((int)dsize);
2584 bp->b_dev = dev;
2585
2586 /* get our ducks in a row for the read */
2587 bp->b_blkno = offset / DEV_BSIZE;
2588 bp->b_bcount = dsize;
2589 bp->b_flags |= B_READ;
2590 bp->b_resid = dsize;
2591
2592 bdev = bdevsw_lookup(bp->b_dev);
2593 if (bdev == NULL)
2594 return (ENXIO);
2595 (*bdev->d_strategy)(bp);
2596
2597 error = biowait(bp);
2598
2599 if (!error) {
2600 memcpy(data, bp->b_data, msize);
2601 }
2602
2603 brelse(bp, 0);
2604 return(error);
2605 }
2606
2607
2608 static int
2609 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2610 RF_ComponentLabel_t *clabel)
2611 {
2612 return raidwrite_component_area(dev, b_vp, clabel,
2613 sizeof(RF_ComponentLabel_t),
2614 rf_component_info_offset(),
2615 rf_component_info_size(secsize), 0);
2616 }
2617
2618 /* ARGSUSED */
2619 static int
2620 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2621 size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2622 {
2623 struct buf *bp;
2624 const struct bdevsw *bdev;
2625 int error;
2626
2627 /* get a block of the appropriate size... */
2628 bp = geteblk((int)dsize);
2629 bp->b_dev = dev;
2630
2631 /* get our ducks in a row for the write */
2632 bp->b_blkno = offset / DEV_BSIZE;
2633 bp->b_bcount = dsize;
2634 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2635 bp->b_resid = dsize;
2636
2637 memset(bp->b_data, 0, dsize);
2638 memcpy(bp->b_data, data, msize);
2639
2640 bdev = bdevsw_lookup(bp->b_dev);
2641 if (bdev == NULL)
2642 return (ENXIO);
2643 (*bdev->d_strategy)(bp);
2644 if (asyncp)
2645 return 0;
2646 error = biowait(bp);
2647 brelse(bp, 0);
2648 if (error) {
2649 #if 1
2650 printf("Failed to write RAID component info!\n");
2651 #endif
2652 }
2653
2654 return(error);
2655 }
2656
2657 void
2658 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2659 {
2660 int c;
2661
2662 for (c = 0; c < raidPtr->numCol; c++) {
2663 /* Skip dead disks. */
2664 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2665 continue;
2666 /* XXXjld: what if an error occurs here? */
2667 raidwrite_component_area(raidPtr->Disks[c].dev,
2668 raidPtr->raid_cinfo[c].ci_vp, map,
2669 RF_PARITYMAP_NBYTE,
2670 rf_parity_map_offset(raidPtr),
2671 rf_parity_map_size(raidPtr), 0);
2672 }
2673 }
2674
2675 void
2676 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2677 {
2678 struct rf_paritymap_ondisk tmp;
2679 int c,first;
2680
2681 first=1;
2682 for (c = 0; c < raidPtr->numCol; c++) {
2683 /* Skip dead disks. */
2684 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2685 continue;
2686 raidread_component_area(raidPtr->Disks[c].dev,
2687 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2688 RF_PARITYMAP_NBYTE,
2689 rf_parity_map_offset(raidPtr),
2690 rf_parity_map_size(raidPtr));
2691 if (first) {
2692 memcpy(map, &tmp, sizeof(*map));
2693 first = 0;
2694 } else {
2695 rf_paritymap_merge(map, &tmp);
2696 }
2697 }
2698 }
2699
2700 void
2701 rf_markalldirty(RF_Raid_t *raidPtr)
2702 {
2703 RF_ComponentLabel_t *clabel;
2704 int sparecol;
2705 int c;
2706 int j;
2707 int scol = -1;
2708
2709 raidPtr->mod_counter++;
2710 for (c = 0; c < raidPtr->numCol; c++) {
2711 /* we don't want to touch (at all) a disk that has
2712 failed */
2713 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2714 clabel = raidget_component_label(raidPtr, c);
2715 if (clabel->status == rf_ds_spared) {
2716 /* XXX do something special...
2717 but whatever you do, don't
2718 try to access it!! */
2719 } else {
2720 raidmarkdirty(raidPtr, c);
2721 }
2722 }
2723 }
2724
2725 for( c = 0; c < raidPtr->numSpare ; c++) {
2726 sparecol = raidPtr->numCol + c;
2727 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2728 /*
2729
2730 we claim this disk is "optimal" if it's
2731 rf_ds_used_spare, as that means it should be
2732 directly substitutable for the disk it replaced.
2733 We note that too...
2734
2735 */
2736
2737 for(j=0;j<raidPtr->numCol;j++) {
2738 if (raidPtr->Disks[j].spareCol == sparecol) {
2739 scol = j;
2740 break;
2741 }
2742 }
2743
2744 clabel = raidget_component_label(raidPtr, sparecol);
2745 /* make sure status is noted */
2746
2747 raid_init_component_label(raidPtr, clabel);
2748
2749 clabel->row = 0;
2750 clabel->column = scol;
2751 /* Note: we *don't* change status from rf_ds_used_spare
2752 to rf_ds_optimal */
2753 /* clabel.status = rf_ds_optimal; */
2754
2755 raidmarkdirty(raidPtr, sparecol);
2756 }
2757 }
2758 }
2759
2760
2761 void
2762 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2763 {
2764 RF_ComponentLabel_t *clabel;
2765 int sparecol;
2766 int c;
2767 int j;
2768 int scol;
2769
2770 scol = -1;
2771
2772 /* XXX should do extra checks to make sure things really are clean,
2773 rather than blindly setting the clean bit... */
2774
2775 raidPtr->mod_counter++;
2776
2777 for (c = 0; c < raidPtr->numCol; c++) {
2778 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2779 clabel = raidget_component_label(raidPtr, c);
2780 /* make sure status is noted */
2781 clabel->status = rf_ds_optimal;
2782
2783 /* note what unit we are configured as */
2784 clabel->last_unit = raidPtr->raidid;
2785
2786 raidflush_component_label(raidPtr, c);
2787 if (final == RF_FINAL_COMPONENT_UPDATE) {
2788 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2789 raidmarkclean(raidPtr, c);
2790 }
2791 }
2792 }
2793 /* else we don't touch it.. */
2794 }
2795
2796 for( c = 0; c < raidPtr->numSpare ; c++) {
2797 sparecol = raidPtr->numCol + c;
2798 /* Need to ensure that the reconstruct actually completed! */
2799 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2800 /*
2801
2802 we claim this disk is "optimal" if it's
2803 rf_ds_used_spare, as that means it should be
2804 directly substitutable for the disk it replaced.
2805 We note that too...
2806
2807 */
2808
2809 for(j=0;j<raidPtr->numCol;j++) {
2810 if (raidPtr->Disks[j].spareCol == sparecol) {
2811 scol = j;
2812 break;
2813 }
2814 }
2815
2816 /* XXX shouldn't *really* need this... */
2817 clabel = raidget_component_label(raidPtr, sparecol);
2818 /* make sure status is noted */
2819
2820 raid_init_component_label(raidPtr, clabel);
2821
2822 clabel->column = scol;
2823 clabel->status = rf_ds_optimal;
2824 clabel->last_unit = raidPtr->raidid;
2825
2826 raidflush_component_label(raidPtr, sparecol);
2827 if (final == RF_FINAL_COMPONENT_UPDATE) {
2828 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2829 raidmarkclean(raidPtr, sparecol);
2830 }
2831 }
2832 }
2833 }
2834 }
2835
2836 void
2837 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2838 {
2839
2840 if (vp != NULL) {
2841 if (auto_configured == 1) {
2842 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2843 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2844 vput(vp);
2845
2846 } else {
2847 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2848 }
2849 }
2850 }
2851
2852
2853 void
2854 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2855 {
2856 int r,c;
2857 struct vnode *vp;
2858 int acd;
2859
2860
2861 /* We take this opportunity to close the vnodes like we should.. */
2862
2863 for (c = 0; c < raidPtr->numCol; c++) {
2864 vp = raidPtr->raid_cinfo[c].ci_vp;
2865 acd = raidPtr->Disks[c].auto_configured;
2866 rf_close_component(raidPtr, vp, acd);
2867 raidPtr->raid_cinfo[c].ci_vp = NULL;
2868 raidPtr->Disks[c].auto_configured = 0;
2869 }
2870
2871 for (r = 0; r < raidPtr->numSpare; r++) {
2872 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2873 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2874 rf_close_component(raidPtr, vp, acd);
2875 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2876 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2877 }
2878 }
2879
2880
2881 void
2882 rf_ReconThread(struct rf_recon_req *req)
2883 {
2884 int s;
2885 RF_Raid_t *raidPtr;
2886
2887 s = splbio();
2888 raidPtr = (RF_Raid_t *) req->raidPtr;
2889 raidPtr->recon_in_progress = 1;
2890
2891 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2892 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2893
2894 RF_Free(req, sizeof(*req));
2895
2896 raidPtr->recon_in_progress = 0;
2897 splx(s);
2898
2899 /* That's all... */
2900 kthread_exit(0); /* does not return */
2901 }
2902
2903 void
2904 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2905 {
2906 int retcode;
2907 int s;
2908
2909 raidPtr->parity_rewrite_stripes_done = 0;
2910 raidPtr->parity_rewrite_in_progress = 1;
2911 s = splbio();
2912 retcode = rf_RewriteParity(raidPtr);
2913 splx(s);
2914 if (retcode) {
2915 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2916 } else {
2917 /* set the clean bit! If we shutdown correctly,
2918 the clean bit on each component label will get
2919 set */
2920 raidPtr->parity_good = RF_RAID_CLEAN;
2921 }
2922 raidPtr->parity_rewrite_in_progress = 0;
2923
2924 /* Anyone waiting for us to stop? If so, inform them... */
2925 if (raidPtr->waitShutdown) {
2926 wakeup(&raidPtr->parity_rewrite_in_progress);
2927 }
2928
2929 /* That's all... */
2930 kthread_exit(0); /* does not return */
2931 }
2932
2933
2934 void
2935 rf_CopybackThread(RF_Raid_t *raidPtr)
2936 {
2937 int s;
2938
2939 raidPtr->copyback_in_progress = 1;
2940 s = splbio();
2941 rf_CopybackReconstructedData(raidPtr);
2942 splx(s);
2943 raidPtr->copyback_in_progress = 0;
2944
2945 /* That's all... */
2946 kthread_exit(0); /* does not return */
2947 }
2948
2949
2950 void
2951 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
2952 {
2953 int s;
2954 RF_Raid_t *raidPtr;
2955
2956 s = splbio();
2957 raidPtr = req->raidPtr;
2958 raidPtr->recon_in_progress = 1;
2959 rf_ReconstructInPlace(raidPtr, req->col);
2960 RF_Free(req, sizeof(*req));
2961 raidPtr->recon_in_progress = 0;
2962 splx(s);
2963
2964 /* That's all... */
2965 kthread_exit(0); /* does not return */
2966 }
2967
2968 static RF_AutoConfig_t *
2969 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
2970 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
2971 unsigned secsize)
2972 {
2973 int good_one = 0;
2974 RF_ComponentLabel_t *clabel;
2975 RF_AutoConfig_t *ac;
2976
2977 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
2978 if (clabel == NULL) {
2979 oomem:
2980 while(ac_list) {
2981 ac = ac_list;
2982 if (ac->clabel)
2983 free(ac->clabel, M_RAIDFRAME);
2984 ac_list = ac_list->next;
2985 free(ac, M_RAIDFRAME);
2986 }
2987 printf("RAID auto config: out of memory!\n");
2988 return NULL; /* XXX probably should panic? */
2989 }
2990
2991 if (!raidread_component_label(secsize, dev, vp, clabel)) {
2992 /* Got the label. Does it look reasonable? */
2993 if (rf_reasonable_label(clabel, numsecs) &&
2994 (rf_component_label_partitionsize(clabel) <= size)) {
2995 #ifdef DEBUG
2996 printf("Component on: %s: %llu\n",
2997 cname, (unsigned long long)size);
2998 rf_print_component_label(clabel);
2999 #endif
3000 /* if it's reasonable, add it, else ignore it. */
3001 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
3002 M_NOWAIT);
3003 if (ac == NULL) {
3004 free(clabel, M_RAIDFRAME);
3005 goto oomem;
3006 }
3007 strlcpy(ac->devname, cname, sizeof(ac->devname));
3008 ac->dev = dev;
3009 ac->vp = vp;
3010 ac->clabel = clabel;
3011 ac->next = ac_list;
3012 ac_list = ac;
3013 good_one = 1;
3014 }
3015 }
3016 if (!good_one) {
3017 /* cleanup */
3018 free(clabel, M_RAIDFRAME);
3019 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3020 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3021 vput(vp);
3022 }
3023 return ac_list;
3024 }
3025
3026 RF_AutoConfig_t *
3027 rf_find_raid_components()
3028 {
3029 struct vnode *vp;
3030 struct disklabel label;
3031 struct device *dv;
3032 dev_t dev;
3033 int bmajor, bminor, wedge;
3034 int error;
3035 int i;
3036 RF_AutoConfig_t *ac_list;
3037 uint64_t numsecs;
3038 unsigned secsize;
3039
3040 RF_ASSERT(raidPtr->bytesPerSector < rf_component_info_offset());
3041
3042 /* initialize the AutoConfig list */
3043 ac_list = NULL;
3044
3045 /* we begin by trolling through *all* the devices on the system */
3046
3047 for (dv = alldevs.tqh_first; dv != NULL;
3048 dv = dv->dv_list.tqe_next) {
3049
3050 /* we are only interested in disks... */
3051 if (device_class(dv) != DV_DISK)
3052 continue;
3053
3054 /* we don't care about floppies... */
3055 if (device_is_a(dv, "fd")) {
3056 continue;
3057 }
3058
3059 /* we don't care about CD's... */
3060 if (device_is_a(dv, "cd")) {
3061 continue;
3062 }
3063
3064 /* we don't care about md's... */
3065 if (device_is_a(dv, "md")) {
3066 continue;
3067 }
3068
3069 /* hdfd is the Atari/Hades floppy driver */
3070 if (device_is_a(dv, "hdfd")) {
3071 continue;
3072 }
3073
3074 /* fdisa is the Atari/Milan floppy driver */
3075 if (device_is_a(dv, "fdisa")) {
3076 continue;
3077 }
3078
3079 /* need to find the device_name_to_block_device_major stuff */
3080 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
3081
3082 /* get a vnode for the raw partition of this disk */
3083
3084 wedge = device_is_a(dv, "dk");
3085 bminor = minor(device_unit(dv));
3086 dev = wedge ? makedev(bmajor, bminor) :
3087 MAKEDISKDEV(bmajor, bminor, RAW_PART);
3088 if (bdevvp(dev, &vp))
3089 panic("RAID can't alloc vnode");
3090
3091 error = VOP_OPEN(vp, FREAD, NOCRED);
3092
3093 if (error) {
3094 /* "Who cares." Continue looking
3095 for something that exists*/
3096 vput(vp);
3097 continue;
3098 }
3099
3100 error = getdisksize(vp, &numsecs, &secsize);
3101 if (error) {
3102 vput(vp);
3103 continue;
3104 }
3105 if (wedge) {
3106 struct dkwedge_info dkw;
3107 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
3108 NOCRED);
3109 if (error) {
3110 printf("RAIDframe: can't get wedge info for "
3111 "dev %s (%d)\n", device_xname(dv), error);
3112 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3113 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3114 vput(vp);
3115 continue;
3116 }
3117
3118 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
3119 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3120 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3121 vput(vp);
3122 continue;
3123 }
3124
3125 ac_list = rf_get_component(ac_list, dev, vp,
3126 device_xname(dv), dkw.dkw_size, numsecs, secsize);
3127 continue;
3128 }
3129
3130 /* Ok, the disk exists. Go get the disklabel. */
3131 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
3132 if (error) {
3133 /*
3134 * XXX can't happen - open() would
3135 * have errored out (or faked up one)
3136 */
3137 if (error != ENOTTY)
3138 printf("RAIDframe: can't get label for dev "
3139 "%s (%d)\n", device_xname(dv), error);
3140 }
3141
3142 /* don't need this any more. We'll allocate it again
3143 a little later if we really do... */
3144 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3145 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3146 vput(vp);
3147
3148 if (error)
3149 continue;
3150
3151 for (i = 0; i < label.d_npartitions; i++) {
3152 char cname[sizeof(ac_list->devname)];
3153
3154 /* We only support partitions marked as RAID */
3155 if (label.d_partitions[i].p_fstype != FS_RAID)
3156 continue;
3157
3158 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
3159 if (bdevvp(dev, &vp))
3160 panic("RAID can't alloc vnode");
3161
3162 error = VOP_OPEN(vp, FREAD, NOCRED);
3163 if (error) {
3164 /* Whatever... */
3165 vput(vp);
3166 continue;
3167 }
3168 snprintf(cname, sizeof(cname), "%s%c",
3169 device_xname(dv), 'a' + i);
3170 ac_list = rf_get_component(ac_list, dev, vp, cname,
3171 label.d_partitions[i].p_size, numsecs, secsize);
3172 }
3173 }
3174 return ac_list;
3175 }
3176
3177
3178 static int
3179 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3180 {
3181
3182 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
3183 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
3184 ((clabel->clean == RF_RAID_CLEAN) ||
3185 (clabel->clean == RF_RAID_DIRTY)) &&
3186 clabel->row >=0 &&
3187 clabel->column >= 0 &&
3188 clabel->num_rows > 0 &&
3189 clabel->num_columns > 0 &&
3190 clabel->row < clabel->num_rows &&
3191 clabel->column < clabel->num_columns &&
3192 clabel->blockSize > 0 &&
3193 /*
3194 * numBlocksHi may contain garbage, but it is ok since
3195 * the type is unsigned. If it is really garbage,
3196 * rf_fix_old_label_size() will fix it.
3197 */
3198 rf_component_label_numblocks(clabel) > 0) {
3199 /*
3200 * label looks reasonable enough...
3201 * let's make sure it has no old garbage.
3202 */
3203 rf_fix_old_label_size(clabel, numsecs);
3204 return(1);
3205 }
3206 return(0);
3207 }
3208
3209
3210 /*
3211 * For reasons yet unknown, some old component labels have garbage in
3212 * the newer numBlocksHi region, and this causes lossage. Since those
3213 * disks will also have numsecs set to less than 32 bits of sectors,
3214 * we can determine when this corruption has occured, and fix it.
3215 *
3216 * The exact same problem, with the same unknown reason, happens to
3217 * the partitionSizeHi member as well.
3218 */
3219 static void
3220 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3221 {
3222
3223 if (numsecs < ((uint64_t)1 << 32)) {
3224 if (clabel->numBlocksHi) {
3225 printf("WARNING: total sectors < 32 bits, yet "
3226 "numBlocksHi set\n"
3227 "WARNING: resetting numBlocksHi to zero.\n");
3228 clabel->numBlocksHi = 0;
3229 }
3230
3231 if (clabel->partitionSizeHi) {
3232 printf("WARNING: total sectors < 32 bits, yet "
3233 "partitionSizeHi set\n"
3234 "WARNING: resetting partitionSizeHi to zero.\n");
3235 clabel->partitionSizeHi = 0;
3236 }
3237 }
3238 }
3239
3240
3241 #ifdef DEBUG
3242 void
3243 rf_print_component_label(RF_ComponentLabel_t *clabel)
3244 {
3245 uint64_t numBlocks;
3246
3247 numBlocks = rf_component_label_numblocks(clabel);
3248
3249 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3250 clabel->row, clabel->column,
3251 clabel->num_rows, clabel->num_columns);
3252 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3253 clabel->version, clabel->serial_number,
3254 clabel->mod_counter);
3255 printf(" Clean: %s Status: %d\n",
3256 clabel->clean ? "Yes" : "No", clabel->status );
3257 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3258 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3259 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3260 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3261 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
3262 printf(" Contains root partition: %s\n",
3263 clabel->root_partition ? "Yes" : "No" );
3264 printf(" Last configured as: raid%d\n", clabel->last_unit );
3265 #if 0
3266 printf(" Config order: %d\n", clabel->config_order);
3267 #endif
3268
3269 }
3270 #endif
3271
3272 RF_ConfigSet_t *
3273 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3274 {
3275 RF_AutoConfig_t *ac;
3276 RF_ConfigSet_t *config_sets;
3277 RF_ConfigSet_t *cset;
3278 RF_AutoConfig_t *ac_next;
3279
3280
3281 config_sets = NULL;
3282
3283 /* Go through the AutoConfig list, and figure out which components
3284 belong to what sets. */
3285 ac = ac_list;
3286 while(ac!=NULL) {
3287 /* we're going to putz with ac->next, so save it here
3288 for use at the end of the loop */
3289 ac_next = ac->next;
3290
3291 if (config_sets == NULL) {
3292 /* will need at least this one... */
3293 config_sets = (RF_ConfigSet_t *)
3294 malloc(sizeof(RF_ConfigSet_t),
3295 M_RAIDFRAME, M_NOWAIT);
3296 if (config_sets == NULL) {
3297 panic("rf_create_auto_sets: No memory!");
3298 }
3299 /* this one is easy :) */
3300 config_sets->ac = ac;
3301 config_sets->next = NULL;
3302 config_sets->rootable = 0;
3303 ac->next = NULL;
3304 } else {
3305 /* which set does this component fit into? */
3306 cset = config_sets;
3307 while(cset!=NULL) {
3308 if (rf_does_it_fit(cset, ac)) {
3309 /* looks like it matches... */
3310 ac->next = cset->ac;
3311 cset->ac = ac;
3312 break;
3313 }
3314 cset = cset->next;
3315 }
3316 if (cset==NULL) {
3317 /* didn't find a match above... new set..*/
3318 cset = (RF_ConfigSet_t *)
3319 malloc(sizeof(RF_ConfigSet_t),
3320 M_RAIDFRAME, M_NOWAIT);
3321 if (cset == NULL) {
3322 panic("rf_create_auto_sets: No memory!");
3323 }
3324 cset->ac = ac;
3325 ac->next = NULL;
3326 cset->next = config_sets;
3327 cset->rootable = 0;
3328 config_sets = cset;
3329 }
3330 }
3331 ac = ac_next;
3332 }
3333
3334
3335 return(config_sets);
3336 }
3337
3338 static int
3339 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3340 {
3341 RF_ComponentLabel_t *clabel1, *clabel2;
3342
3343 /* If this one matches the *first* one in the set, that's good
3344 enough, since the other members of the set would have been
3345 through here too... */
3346 /* note that we are not checking partitionSize here..
3347
3348 Note that we are also not checking the mod_counters here.
3349 If everything else matches execpt the mod_counter, that's
3350 good enough for this test. We will deal with the mod_counters
3351 a little later in the autoconfiguration process.
3352
3353 (clabel1->mod_counter == clabel2->mod_counter) &&
3354
3355 The reason we don't check for this is that failed disks
3356 will have lower modification counts. If those disks are
3357 not added to the set they used to belong to, then they will
3358 form their own set, which may result in 2 different sets,
3359 for example, competing to be configured at raid0, and
3360 perhaps competing to be the root filesystem set. If the
3361 wrong ones get configured, or both attempt to become /,
3362 weird behaviour and or serious lossage will occur. Thus we
3363 need to bring them into the fold here, and kick them out at
3364 a later point.
3365
3366 */
3367
3368 clabel1 = cset->ac->clabel;
3369 clabel2 = ac->clabel;
3370 if ((clabel1->version == clabel2->version) &&
3371 (clabel1->serial_number == clabel2->serial_number) &&
3372 (clabel1->num_rows == clabel2->num_rows) &&
3373 (clabel1->num_columns == clabel2->num_columns) &&
3374 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3375 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3376 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3377 (clabel1->parityConfig == clabel2->parityConfig) &&
3378 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3379 (clabel1->blockSize == clabel2->blockSize) &&
3380 rf_component_label_numblocks(clabel1) ==
3381 rf_component_label_numblocks(clabel2) &&
3382 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3383 (clabel1->root_partition == clabel2->root_partition) &&
3384 (clabel1->last_unit == clabel2->last_unit) &&
3385 (clabel1->config_order == clabel2->config_order)) {
3386 /* if it get's here, it almost *has* to be a match */
3387 } else {
3388 /* it's not consistent with somebody in the set..
3389 punt */
3390 return(0);
3391 }
3392 /* all was fine.. it must fit... */
3393 return(1);
3394 }
3395
3396 int
3397 rf_have_enough_components(RF_ConfigSet_t *cset)
3398 {
3399 RF_AutoConfig_t *ac;
3400 RF_AutoConfig_t *auto_config;
3401 RF_ComponentLabel_t *clabel;
3402 int c;
3403 int num_cols;
3404 int num_missing;
3405 int mod_counter;
3406 int mod_counter_found;
3407 int even_pair_failed;
3408 char parity_type;
3409
3410
3411 /* check to see that we have enough 'live' components
3412 of this set. If so, we can configure it if necessary */
3413
3414 num_cols = cset->ac->clabel->num_columns;
3415 parity_type = cset->ac->clabel->parityConfig;
3416
3417 /* XXX Check for duplicate components!?!?!? */
3418
3419 /* Determine what the mod_counter is supposed to be for this set. */
3420
3421 mod_counter_found = 0;
3422 mod_counter = 0;
3423 ac = cset->ac;
3424 while(ac!=NULL) {
3425 if (mod_counter_found==0) {
3426 mod_counter = ac->clabel->mod_counter;
3427 mod_counter_found = 1;
3428 } else {
3429 if (ac->clabel->mod_counter > mod_counter) {
3430 mod_counter = ac->clabel->mod_counter;
3431 }
3432 }
3433 ac = ac->next;
3434 }
3435
3436 num_missing = 0;
3437 auto_config = cset->ac;
3438
3439 even_pair_failed = 0;
3440 for(c=0; c<num_cols; c++) {
3441 ac = auto_config;
3442 while(ac!=NULL) {
3443 if ((ac->clabel->column == c) &&
3444 (ac->clabel->mod_counter == mod_counter)) {
3445 /* it's this one... */
3446 #ifdef DEBUG
3447 printf("Found: %s at %d\n",
3448 ac->devname,c);
3449 #endif
3450 break;
3451 }
3452 ac=ac->next;
3453 }
3454 if (ac==NULL) {
3455 /* Didn't find one here! */
3456 /* special case for RAID 1, especially
3457 where there are more than 2
3458 components (where RAIDframe treats
3459 things a little differently :( ) */
3460 if (parity_type == '1') {
3461 if (c%2 == 0) { /* even component */
3462 even_pair_failed = 1;
3463 } else { /* odd component. If
3464 we're failed, and
3465 so is the even
3466 component, it's
3467 "Good Night, Charlie" */
3468 if (even_pair_failed == 1) {
3469 return(0);
3470 }
3471 }
3472 } else {
3473 /* normal accounting */
3474 num_missing++;
3475 }
3476 }
3477 if ((parity_type == '1') && (c%2 == 1)) {
3478 /* Just did an even component, and we didn't
3479 bail.. reset the even_pair_failed flag,
3480 and go on to the next component.... */
3481 even_pair_failed = 0;
3482 }
3483 }
3484
3485 clabel = cset->ac->clabel;
3486
3487 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3488 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3489 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3490 /* XXX this needs to be made *much* more general */
3491 /* Too many failures */
3492 return(0);
3493 }
3494 /* otherwise, all is well, and we've got enough to take a kick
3495 at autoconfiguring this set */
3496 return(1);
3497 }
3498
3499 void
3500 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3501 RF_Raid_t *raidPtr)
3502 {
3503 RF_ComponentLabel_t *clabel;
3504 int i;
3505
3506 clabel = ac->clabel;
3507
3508 /* 1. Fill in the common stuff */
3509 config->numRow = clabel->num_rows = 1;
3510 config->numCol = clabel->num_columns;
3511 config->numSpare = 0; /* XXX should this be set here? */
3512 config->sectPerSU = clabel->sectPerSU;
3513 config->SUsPerPU = clabel->SUsPerPU;
3514 config->SUsPerRU = clabel->SUsPerRU;
3515 config->parityConfig = clabel->parityConfig;
3516 /* XXX... */
3517 strcpy(config->diskQueueType,"fifo");
3518 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3519 config->layoutSpecificSize = 0; /* XXX ?? */
3520
3521 while(ac!=NULL) {
3522 /* row/col values will be in range due to the checks
3523 in reasonable_label() */
3524 strcpy(config->devnames[0][ac->clabel->column],
3525 ac->devname);
3526 ac = ac->next;
3527 }
3528
3529 for(i=0;i<RF_MAXDBGV;i++) {
3530 config->debugVars[i][0] = 0;
3531 }
3532 }
3533
3534 int
3535 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3536 {
3537 RF_ComponentLabel_t *clabel;
3538 int column;
3539 int sparecol;
3540
3541 raidPtr->autoconfigure = new_value;
3542
3543 for(column=0; column<raidPtr->numCol; column++) {
3544 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3545 clabel = raidget_component_label(raidPtr, column);
3546 clabel->autoconfigure = new_value;
3547 raidflush_component_label(raidPtr, column);
3548 }
3549 }
3550 for(column = 0; column < raidPtr->numSpare ; column++) {
3551 sparecol = raidPtr->numCol + column;
3552 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3553 clabel = raidget_component_label(raidPtr, sparecol);
3554 clabel->autoconfigure = new_value;
3555 raidflush_component_label(raidPtr, sparecol);
3556 }
3557 }
3558 return(new_value);
3559 }
3560
3561 int
3562 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3563 {
3564 RF_ComponentLabel_t *clabel;
3565 int column;
3566 int sparecol;
3567
3568 raidPtr->root_partition = new_value;
3569 for(column=0; column<raidPtr->numCol; column++) {
3570 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3571 clabel = raidget_component_label(raidPtr, column);
3572 clabel->root_partition = new_value;
3573 raidflush_component_label(raidPtr, column);
3574 }
3575 }
3576 for(column = 0; column < raidPtr->numSpare ; column++) {
3577 sparecol = raidPtr->numCol + column;
3578 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3579 clabel = raidget_component_label(raidPtr, sparecol);
3580 clabel->root_partition = new_value;
3581 raidflush_component_label(raidPtr, sparecol);
3582 }
3583 }
3584 return(new_value);
3585 }
3586
3587 void
3588 rf_release_all_vps(RF_ConfigSet_t *cset)
3589 {
3590 RF_AutoConfig_t *ac;
3591
3592 ac = cset->ac;
3593 while(ac!=NULL) {
3594 /* Close the vp, and give it back */
3595 if (ac->vp) {
3596 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3597 VOP_CLOSE(ac->vp, FREAD, NOCRED);
3598 vput(ac->vp);
3599 ac->vp = NULL;
3600 }
3601 ac = ac->next;
3602 }
3603 }
3604
3605
3606 void
3607 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3608 {
3609 RF_AutoConfig_t *ac;
3610 RF_AutoConfig_t *next_ac;
3611
3612 ac = cset->ac;
3613 while(ac!=NULL) {
3614 next_ac = ac->next;
3615 /* nuke the label */
3616 free(ac->clabel, M_RAIDFRAME);
3617 /* cleanup the config structure */
3618 free(ac, M_RAIDFRAME);
3619 /* "next.." */
3620 ac = next_ac;
3621 }
3622 /* and, finally, nuke the config set */
3623 free(cset, M_RAIDFRAME);
3624 }
3625
3626
3627 void
3628 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3629 {
3630 /* current version number */
3631 clabel->version = RF_COMPONENT_LABEL_VERSION;
3632 clabel->serial_number = raidPtr->serial_number;
3633 clabel->mod_counter = raidPtr->mod_counter;
3634
3635 clabel->num_rows = 1;
3636 clabel->num_columns = raidPtr->numCol;
3637 clabel->clean = RF_RAID_DIRTY; /* not clean */
3638 clabel->status = rf_ds_optimal; /* "It's good!" */
3639
3640 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3641 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3642 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3643
3644 clabel->blockSize = raidPtr->bytesPerSector;
3645 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3646
3647 /* XXX not portable */
3648 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3649 clabel->maxOutstanding = raidPtr->maxOutstanding;
3650 clabel->autoconfigure = raidPtr->autoconfigure;
3651 clabel->root_partition = raidPtr->root_partition;
3652 clabel->last_unit = raidPtr->raidid;
3653 clabel->config_order = raidPtr->config_order;
3654
3655 #ifndef RF_NO_PARITY_MAP
3656 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3657 #endif
3658 }
3659
3660 int
3661 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
3662 {
3663 RF_Raid_t *raidPtr;
3664 RF_Config_t *config;
3665 int raidID;
3666 int retcode;
3667
3668 #ifdef DEBUG
3669 printf("RAID autoconfigure\n");
3670 #endif
3671
3672 retcode = 0;
3673 *unit = -1;
3674
3675 /* 1. Create a config structure */
3676
3677 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3678 M_RAIDFRAME,
3679 M_NOWAIT);
3680 if (config==NULL) {
3681 printf("Out of mem!?!?\n");
3682 /* XXX do something more intelligent here. */
3683 return(1);
3684 }
3685
3686 memset(config, 0, sizeof(RF_Config_t));
3687
3688 /*
3689 2. Figure out what RAID ID this one is supposed to live at
3690 See if we can get the same RAID dev that it was configured
3691 on last time..
3692 */
3693
3694 raidID = cset->ac->clabel->last_unit;
3695 if ((raidID < 0) || (raidID >= numraid)) {
3696 /* let's not wander off into lala land. */
3697 raidID = numraid - 1;
3698 }
3699 if (raidPtrs[raidID]->valid != 0) {
3700
3701 /*
3702 Nope... Go looking for an alternative...
3703 Start high so we don't immediately use raid0 if that's
3704 not taken.
3705 */
3706
3707 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3708 if (raidPtrs[raidID]->valid == 0) {
3709 /* can use this one! */
3710 break;
3711 }
3712 }
3713 }
3714
3715 if (raidID < 0) {
3716 /* punt... */
3717 printf("Unable to auto configure this set!\n");
3718 printf("(Out of RAID devs!)\n");
3719 free(config, M_RAIDFRAME);
3720 return(1);
3721 }
3722
3723 #ifdef DEBUG
3724 printf("Configuring raid%d:\n",raidID);
3725 #endif
3726
3727 raidPtr = raidPtrs[raidID];
3728
3729 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3730 raidPtr->raidid = raidID;
3731 raidPtr->openings = RAIDOUTSTANDING;
3732
3733 /* 3. Build the configuration structure */
3734 rf_create_configuration(cset->ac, config, raidPtr);
3735
3736 /* 4. Do the configuration */
3737 retcode = rf_Configure(raidPtr, config, cset->ac);
3738
3739 if (retcode == 0) {
3740
3741 raidinit(raidPtrs[raidID]);
3742
3743 rf_markalldirty(raidPtrs[raidID]);
3744 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3745 if (cset->ac->clabel->root_partition==1) {
3746 /* everything configured just fine. Make a note
3747 that this set is eligible to be root. */
3748 cset->rootable = 1;
3749 /* XXX do this here? */
3750 raidPtrs[raidID]->root_partition = 1;
3751 }
3752 }
3753
3754 /* 5. Cleanup */
3755 free(config, M_RAIDFRAME);
3756
3757 *unit = raidID;
3758 return(retcode);
3759 }
3760
3761 void
3762 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3763 {
3764 struct buf *bp;
3765
3766 bp = (struct buf *)desc->bp;
3767 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3768 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
3769 }
3770
3771 void
3772 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3773 size_t xmin, size_t xmax)
3774 {
3775 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3776 pool_sethiwat(p, xmax);
3777 pool_prime(p, xmin);
3778 pool_setlowat(p, xmin);
3779 }
3780
3781 /*
3782 * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see
3783 * if there is IO pending and if that IO could possibly be done for a
3784 * given RAID set. Returns 0 if IO is waiting and can be done, 1
3785 * otherwise.
3786 *
3787 */
3788
3789 int
3790 rf_buf_queue_check(int raidid)
3791 {
3792 if ((BUFQ_PEEK(raid_softc[raidid].buf_queue) != NULL) &&
3793 raidPtrs[raidid]->openings > 0) {
3794 /* there is work to do */
3795 return 0;
3796 }
3797 /* default is nothing to do */
3798 return 1;
3799 }
3800
3801 int
3802 rf_getdisksize(struct vnode *vp, struct lwp *l, RF_RaidDisk_t *diskPtr)
3803 {
3804 struct partinfo dpart;
3805 struct dkwedge_info dkw;
3806 int error;
3807
3808 error = VOP_IOCTL(vp, DIOCGPART, &dpart, FREAD, l->l_cred);
3809 if (error == 0) {
3810 diskPtr->blockSize = dpart.disklab->d_secsize;
3811 diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
3812 diskPtr->partitionSize = dpart.part->p_size;
3813 return 0;
3814 }
3815
3816 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD, l->l_cred);
3817 if (error == 0) {
3818 struct disk *pdk;
3819
3820 if ((pdk = disk_find(dkw.dkw_parent)) != NULL)
3821 diskPtr->blockSize = DEV_BSIZE << pdk->dk_blkshift;
3822 else
3823 diskPtr->blockSize = 512; /* XXX */
3824 diskPtr->numBlocks = dkw.dkw_size - rf_protectedSectors;
3825 diskPtr->partitionSize = dkw.dkw_size;
3826 return 0;
3827 }
3828 return error;
3829 }
3830
3831 static int
3832 raid_match(struct device *self, struct cfdata *cfdata,
3833 void *aux)
3834 {
3835 return 1;
3836 }
3837
3838 static void
3839 raid_attach(struct device *parent, struct device *self,
3840 void *aux)
3841 {
3842
3843 }
3844
3845
3846 static int
3847 raid_detach(struct device *self, int flags)
3848 {
3849 struct raid_softc *rs = (struct raid_softc *)self;
3850
3851 if (rs->sc_flags & RAIDF_INITED)
3852 return EBUSY;
3853
3854 return 0;
3855 }
3856
3857 static void
3858 rf_set_properties(struct raid_softc *rs, RF_Raid_t *raidPtr)
3859 {
3860 prop_dictionary_t disk_info, odisk_info, geom;
3861 disk_info = prop_dictionary_create();
3862 geom = prop_dictionary_create();
3863 prop_dictionary_set_uint64(geom, "sectors-per-unit",
3864 raidPtr->totalSectors);
3865 prop_dictionary_set_uint32(geom, "sector-size",
3866 raidPtr->bytesPerSector);
3867
3868 prop_dictionary_set_uint16(geom, "sectors-per-track",
3869 raidPtr->Layout.dataSectorsPerStripe);
3870 prop_dictionary_set_uint16(geom, "tracks-per-cylinder",
3871 4 * raidPtr->numCol);
3872
3873 prop_dictionary_set_uint64(geom, "cylinders-per-unit",
3874 raidPtr->totalSectors / (raidPtr->Layout.dataSectorsPerStripe *
3875 (4 * raidPtr->numCol)));
3876
3877 prop_dictionary_set(disk_info, "geometry", geom);
3878 prop_object_release(geom);
3879 prop_dictionary_set(device_properties(rs->sc_dev),
3880 "disk-info", disk_info);
3881 odisk_info = rs->sc_dkdev.dk_info;
3882 rs->sc_dkdev.dk_info = disk_info;
3883 if (odisk_info)
3884 prop_object_release(odisk_info);
3885 }
3886
3887 /*
3888 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3889 * We end up returning whatever error was returned by the first cache flush
3890 * that fails.
3891 */
3892
3893 int
3894 rf_sync_component_caches(RF_Raid_t *raidPtr)
3895 {
3896 int c, sparecol;
3897 int e,error;
3898 int force = 1;
3899
3900 error = 0;
3901 for (c = 0; c < raidPtr->numCol; c++) {
3902 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3903 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3904 &force, FWRITE, NOCRED);
3905 if (e) {
3906 if (e != ENODEV)
3907 printf("raid%d: cache flush to component %s failed.\n",
3908 raidPtr->raidid, raidPtr->Disks[c].devname);
3909 if (error == 0) {
3910 error = e;
3911 }
3912 }
3913 }
3914 }
3915
3916 for( c = 0; c < raidPtr->numSpare ; c++) {
3917 sparecol = raidPtr->numCol + c;
3918 /* Need to ensure that the reconstruct actually completed! */
3919 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3920 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3921 DIOCCACHESYNC, &force, FWRITE, NOCRED);
3922 if (e) {
3923 if (e != ENODEV)
3924 printf("raid%d: cache flush to component %s failed.\n",
3925 raidPtr->raidid, raidPtr->Disks[sparecol].devname);
3926 if (error == 0) {
3927 error = e;
3928 }
3929 }
3930 }
3931 }
3932 return error;
3933 }
3934