rf_netbsdkintf.c revision 1.294.2.3 1 /* $NetBSD: rf_netbsdkintf.c,v 1.294.2.3 2012/10/30 17:21:59 yamt Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: cd.c 1.6 90/11/28$
66 *
67 * @(#)cd.c 8.2 (Berkeley) 11/16/93
68 */
69
70 /*
71 * Copyright (c) 1995 Carnegie-Mellon University.
72 * All rights reserved.
73 *
74 * Authors: Mark Holland, Jim Zelenka
75 *
76 * Permission to use, copy, modify and distribute this software and
77 * its documentation is hereby granted, provided that both the copyright
78 * notice and this permission notice appear in all copies of the
79 * software, derivative works or modified versions, and any portions
80 * thereof, and that both notices appear in supporting documentation.
81 *
82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85 *
86 * Carnegie Mellon requests users of this software to return to
87 *
88 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
89 * School of Computer Science
90 * Carnegie Mellon University
91 * Pittsburgh PA 15213-3890
92 *
93 * any improvements or extensions that they make and grant Carnegie the
94 * rights to redistribute these changes.
95 */
96
97 /***********************************************************
98 *
99 * rf_kintf.c -- the kernel interface routines for RAIDframe
100 *
101 ***********************************************************/
102
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.294.2.3 2012/10/30 17:21:59 yamt Exp $");
105
106 #ifdef _KERNEL_OPT
107 #include "opt_compat_netbsd.h"
108 #include "opt_raid_autoconfig.h"
109 #include "raid.h"
110 #endif
111
112 #include <sys/param.h>
113 #include <sys/errno.h>
114 #include <sys/pool.h>
115 #include <sys/proc.h>
116 #include <sys/queue.h>
117 #include <sys/disk.h>
118 #include <sys/device.h>
119 #include <sys/stat.h>
120 #include <sys/ioctl.h>
121 #include <sys/fcntl.h>
122 #include <sys/systm.h>
123 #include <sys/vnode.h>
124 #include <sys/disklabel.h>
125 #include <sys/conf.h>
126 #include <sys/buf.h>
127 #include <sys/bufq.h>
128 #include <sys/reboot.h>
129 #include <sys/kauth.h>
130
131 #include <prop/proplib.h>
132
133 #include <dev/raidframe/raidframevar.h>
134 #include <dev/raidframe/raidframeio.h>
135 #include <dev/raidframe/rf_paritymap.h>
136
137 #include "rf_raid.h"
138 #include "rf_copyback.h"
139 #include "rf_dag.h"
140 #include "rf_dagflags.h"
141 #include "rf_desc.h"
142 #include "rf_diskqueue.h"
143 #include "rf_etimer.h"
144 #include "rf_general.h"
145 #include "rf_kintf.h"
146 #include "rf_options.h"
147 #include "rf_driver.h"
148 #include "rf_parityscan.h"
149 #include "rf_threadstuff.h"
150
151 #ifdef COMPAT_50
152 #include "rf_compat50.h"
153 #endif
154
155 #ifdef DEBUG
156 int rf_kdebug_level = 0;
157 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
158 #else /* DEBUG */
159 #define db1_printf(a) { }
160 #endif /* DEBUG */
161
162 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
163
164 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
165 static rf_declare_mutex2(rf_sparet_wait_mutex);
166 static rf_declare_cond2(rf_sparet_wait_cv);
167 static rf_declare_cond2(rf_sparet_resp_cv);
168
169 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
170 * spare table */
171 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
172 * installation process */
173 #endif
174
175 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
176
177 /* prototypes */
178 static void KernelWakeupFunc(struct buf *);
179 static void InitBP(struct buf *, struct vnode *, unsigned,
180 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
181 void *, int, struct proc *);
182 static void raidinit(RF_Raid_t *);
183
184 void raidattach(int);
185 static int raid_match(device_t, cfdata_t, void *);
186 static void raid_attach(device_t, device_t, void *);
187 static int raid_detach(device_t, int);
188
189 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
190 daddr_t, daddr_t);
191 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
192 daddr_t, daddr_t, int);
193
194 static int raidwrite_component_label(unsigned,
195 dev_t, struct vnode *, RF_ComponentLabel_t *);
196 static int raidread_component_label(unsigned,
197 dev_t, struct vnode *, RF_ComponentLabel_t *);
198
199
200 dev_type_open(raidopen);
201 dev_type_close(raidclose);
202 dev_type_read(raidread);
203 dev_type_write(raidwrite);
204 dev_type_ioctl(raidioctl);
205 dev_type_strategy(raidstrategy);
206 dev_type_dump(raiddump);
207 dev_type_size(raidsize);
208
209 const struct bdevsw raid_bdevsw = {
210 raidopen, raidclose, raidstrategy, raidioctl,
211 raiddump, raidsize, D_DISK
212 };
213
214 const struct cdevsw raid_cdevsw = {
215 raidopen, raidclose, raidread, raidwrite, raidioctl,
216 nostop, notty, nopoll, nommap, nokqfilter, D_DISK
217 };
218
219 static struct dkdriver rf_dkdriver = { raidstrategy, minphys };
220
221 /* XXX Not sure if the following should be replacing the raidPtrs above,
222 or if it should be used in conjunction with that...
223 */
224
225 struct raid_softc {
226 device_t sc_dev;
227 int sc_flags; /* flags */
228 int sc_cflags; /* configuration flags */
229 uint64_t sc_size; /* size of the raid device */
230 char sc_xname[20]; /* XXX external name */
231 struct disk sc_dkdev; /* generic disk device info */
232 struct bufq_state *buf_queue; /* used for the device queue */
233 };
234 /* sc_flags */
235 #define RAIDF_INITED 0x01 /* unit has been initialized */
236 #define RAIDF_WLABEL 0x02 /* label area is writable */
237 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
238 #define RAIDF_SHUTDOWN 0x08 /* unit is being shutdown */
239 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
240 #define RAIDF_LOCKED 0x80 /* unit is locked */
241
242 #define raidunit(x) DISKUNIT(x)
243 int numraid = 0;
244
245 extern struct cfdriver raid_cd;
246 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
247 raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
248 DVF_DETACH_SHUTDOWN);
249
250 /*
251 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
252 * Be aware that large numbers can allow the driver to consume a lot of
253 * kernel memory, especially on writes, and in degraded mode reads.
254 *
255 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
256 * a single 64K write will typically require 64K for the old data,
257 * 64K for the old parity, and 64K for the new parity, for a total
258 * of 192K (if the parity buffer is not re-used immediately).
259 * Even it if is used immediately, that's still 128K, which when multiplied
260 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
261 *
262 * Now in degraded mode, for example, a 64K read on the above setup may
263 * require data reconstruction, which will require *all* of the 4 remaining
264 * disks to participate -- 4 * 32K/disk == 128K again.
265 */
266
267 #ifndef RAIDOUTSTANDING
268 #define RAIDOUTSTANDING 6
269 #endif
270
271 #define RAIDLABELDEV(dev) \
272 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
273
274 /* declared here, and made public, for the benefit of KVM stuff.. */
275 struct raid_softc *raid_softc;
276
277 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
278 struct disklabel *);
279 static void raidgetdisklabel(dev_t);
280 static void raidmakedisklabel(struct raid_softc *);
281
282 static int raidlock(struct raid_softc *);
283 static void raidunlock(struct raid_softc *);
284
285 static int raid_detach_unlocked(struct raid_softc *);
286
287 static void rf_markalldirty(RF_Raid_t *);
288 static void rf_set_properties(struct raid_softc *, RF_Raid_t *);
289
290 void rf_ReconThread(struct rf_recon_req *);
291 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
292 void rf_CopybackThread(RF_Raid_t *raidPtr);
293 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
294 int rf_autoconfig(device_t);
295 void rf_buildroothack(RF_ConfigSet_t *);
296
297 RF_AutoConfig_t *rf_find_raid_components(void);
298 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
299 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
300 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
301 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
302 int rf_set_autoconfig(RF_Raid_t *, int);
303 int rf_set_rootpartition(RF_Raid_t *, int);
304 void rf_release_all_vps(RF_ConfigSet_t *);
305 void rf_cleanup_config_set(RF_ConfigSet_t *);
306 int rf_have_enough_components(RF_ConfigSet_t *);
307 int rf_auto_config_set(RF_ConfigSet_t *, int *);
308 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
309
310 /*
311 * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
312 * Note that this is overridden by having RAID_AUTOCONFIG as an option
313 * in the kernel config file.
314 */
315 #ifdef RAID_AUTOCONFIG
316 int raidautoconfig = 1;
317 #else
318 int raidautoconfig = 0;
319 #endif
320 static bool raidautoconfigdone = false;
321
322 struct RF_Pools_s rf_pools;
323
324 void
325 raidattach(int num)
326 {
327 int raidID;
328 int i, rc;
329
330 aprint_debug("raidattach: Asked for %d units\n", num);
331
332 if (num <= 0) {
333 #ifdef DIAGNOSTIC
334 panic("raidattach: count <= 0");
335 #endif
336 return;
337 }
338 /* This is where all the initialization stuff gets done. */
339
340 numraid = num;
341
342 /* Make some space for requested number of units... */
343
344 RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
345 if (raidPtrs == NULL) {
346 panic("raidPtrs is NULL!!");
347 }
348
349 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
350 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
351 rf_init_cond2(rf_sparet_wait_cv, "sparetw");
352 rf_init_cond2(rf_sparet_resp_cv, "rfgst");
353
354 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
355 #endif
356
357 for (i = 0; i < num; i++)
358 raidPtrs[i] = NULL;
359 rc = rf_BootRaidframe();
360 if (rc == 0)
361 aprint_verbose("Kernelized RAIDframe activated\n");
362 else
363 panic("Serious error booting RAID!!");
364
365 /* put together some datastructures like the CCD device does.. This
366 * lets us lock the device and what-not when it gets opened. */
367
368 raid_softc = (struct raid_softc *)
369 malloc(num * sizeof(struct raid_softc),
370 M_RAIDFRAME, M_NOWAIT);
371 if (raid_softc == NULL) {
372 aprint_error("WARNING: no memory for RAIDframe driver\n");
373 return;
374 }
375
376 memset(raid_softc, 0, num * sizeof(struct raid_softc));
377
378 for (raidID = 0; raidID < num; raidID++) {
379 bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_DISK_DEFAULT_STRAT, BUFQ_SORT_RAWBLOCK);
380
381 RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
382 (RF_Raid_t *));
383 if (raidPtrs[raidID] == NULL) {
384 aprint_error("WARNING: raidPtrs[%d] is NULL\n", raidID);
385 numraid = raidID;
386 return;
387 }
388 }
389
390 if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) {
391 aprint_error("raidattach: config_cfattach_attach failed?\n");
392 }
393
394 raidautoconfigdone = false;
395
396 /*
397 * Register a finalizer which will be used to auto-config RAID
398 * sets once all real hardware devices have been found.
399 */
400 if (config_finalize_register(NULL, rf_autoconfig) != 0)
401 aprint_error("WARNING: unable to register RAIDframe finalizer\n");
402 }
403
404 int
405 rf_autoconfig(device_t self)
406 {
407 RF_AutoConfig_t *ac_list;
408 RF_ConfigSet_t *config_sets;
409
410 if (!raidautoconfig || raidautoconfigdone == true)
411 return (0);
412
413 /* XXX This code can only be run once. */
414 raidautoconfigdone = true;
415
416 /* 1. locate all RAID components on the system */
417 aprint_debug("Searching for RAID components...\n");
418 ac_list = rf_find_raid_components();
419
420 /* 2. Sort them into their respective sets. */
421 config_sets = rf_create_auto_sets(ac_list);
422
423 /*
424 * 3. Evaluate each set andconfigure the valid ones.
425 * This gets done in rf_buildroothack().
426 */
427 rf_buildroothack(config_sets);
428
429 return 1;
430 }
431
432 void
433 rf_buildroothack(RF_ConfigSet_t *config_sets)
434 {
435 RF_ConfigSet_t *cset;
436 RF_ConfigSet_t *next_cset;
437 int retcode;
438 int raidID;
439 int rootID;
440 int col;
441 int num_root;
442 char *devname;
443
444 rootID = 0;
445 num_root = 0;
446 cset = config_sets;
447 while (cset != NULL) {
448 next_cset = cset->next;
449 if (rf_have_enough_components(cset) &&
450 cset->ac->clabel->autoconfigure==1) {
451 retcode = rf_auto_config_set(cset,&raidID);
452 if (!retcode) {
453 aprint_debug("raid%d: configured ok\n", raidID);
454 if (cset->rootable) {
455 rootID = raidID;
456 num_root++;
457 }
458 } else {
459 /* The autoconfig didn't work :( */
460 aprint_debug("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
461 rf_release_all_vps(cset);
462 }
463 } else {
464 /* we're not autoconfiguring this set...
465 release the associated resources */
466 rf_release_all_vps(cset);
467 }
468 /* cleanup */
469 rf_cleanup_config_set(cset);
470 cset = next_cset;
471 }
472
473 /* if the user has specified what the root device should be
474 then we don't touch booted_device or boothowto... */
475
476 if (rootspec != NULL)
477 return;
478
479 /* we found something bootable... */
480
481 if (num_root == 1) {
482 if (raid_softc[rootID].sc_dkdev.dk_nwedges != 0) {
483 /* XXX: How do we find the real root partition? */
484 char cname[sizeof(cset->ac->devname)];
485 snprintf(cname, sizeof(cname), "%s%c",
486 device_xname(raid_softc[rootID].sc_dev), 'a');
487 booted_device = dkwedge_find_by_wname(cname);
488 } else
489 booted_device = raid_softc[rootID].sc_dev;
490 } else if (num_root > 1) {
491
492 /*
493 * Maybe the MD code can help. If it cannot, then
494 * setroot() will discover that we have no
495 * booted_device and will ask the user if nothing was
496 * hardwired in the kernel config file
497 */
498
499 if (booted_device == NULL)
500 cpu_rootconf();
501 if (booted_device == NULL)
502 return;
503
504 num_root = 0;
505 for (raidID = 0; raidID < numraid; raidID++) {
506 if (raidPtrs[raidID]->valid == 0)
507 continue;
508
509 if (raidPtrs[raidID]->root_partition == 0)
510 continue;
511
512 for (col = 0; col < raidPtrs[raidID]->numCol; col++) {
513 devname = raidPtrs[raidID]->Disks[col].devname;
514 devname += sizeof("/dev/") - 1;
515 if (strncmp(devname, device_xname(booted_device),
516 strlen(device_xname(booted_device))) != 0)
517 continue;
518 aprint_debug("raid%d includes boot device %s\n",
519 raidID, devname);
520 num_root++;
521 rootID = raidID;
522 }
523 }
524
525 if (num_root == 1) {
526 booted_device = raid_softc[rootID].sc_dev;
527 } else {
528 /* we can't guess.. require the user to answer... */
529 boothowto |= RB_ASKNAME;
530 }
531 }
532 }
533
534
535 int
536 raidsize(dev_t dev)
537 {
538 struct raid_softc *rs;
539 struct disklabel *lp;
540 int part, unit, omask, size;
541
542 unit = raidunit(dev);
543 if (unit >= numraid)
544 return (-1);
545 rs = &raid_softc[unit];
546
547 if ((rs->sc_flags & RAIDF_INITED) == 0)
548 return (-1);
549
550 part = DISKPART(dev);
551 omask = rs->sc_dkdev.dk_openmask & (1 << part);
552 lp = rs->sc_dkdev.dk_label;
553
554 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
555 return (-1);
556
557 if (lp->d_partitions[part].p_fstype != FS_SWAP)
558 size = -1;
559 else
560 size = lp->d_partitions[part].p_size *
561 (lp->d_secsize / DEV_BSIZE);
562
563 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
564 return (-1);
565
566 return (size);
567
568 }
569
570 int
571 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
572 {
573 int unit = raidunit(dev);
574 struct raid_softc *rs;
575 const struct bdevsw *bdev;
576 struct disklabel *lp;
577 RF_Raid_t *raidPtr;
578 daddr_t offset;
579 int part, c, sparecol, j, scol, dumpto;
580 int error = 0;
581
582 if (unit >= numraid)
583 return (ENXIO);
584
585 rs = &raid_softc[unit];
586 raidPtr = raidPtrs[unit];
587
588 if ((rs->sc_flags & RAIDF_INITED) == 0)
589 return ENXIO;
590
591 /* we only support dumping to RAID 1 sets */
592 if (raidPtr->Layout.numDataCol != 1 ||
593 raidPtr->Layout.numParityCol != 1)
594 return EINVAL;
595
596
597 if ((error = raidlock(rs)) != 0)
598 return error;
599
600 if (size % DEV_BSIZE != 0) {
601 error = EINVAL;
602 goto out;
603 }
604
605 if (blkno + size / DEV_BSIZE > rs->sc_size) {
606 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
607 "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
608 size / DEV_BSIZE, rs->sc_size);
609 error = EINVAL;
610 goto out;
611 }
612
613 part = DISKPART(dev);
614 lp = rs->sc_dkdev.dk_label;
615 offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
616
617 /* figure out what device is alive.. */
618
619 /*
620 Look for a component to dump to. The preference for the
621 component to dump to is as follows:
622 1) the master
623 2) a used_spare of the master
624 3) the slave
625 4) a used_spare of the slave
626 */
627
628 dumpto = -1;
629 for (c = 0; c < raidPtr->numCol; c++) {
630 if (raidPtr->Disks[c].status == rf_ds_optimal) {
631 /* this might be the one */
632 dumpto = c;
633 break;
634 }
635 }
636
637 /*
638 At this point we have possibly selected a live master or a
639 live slave. We now check to see if there is a spared
640 master (or a spared slave), if we didn't find a live master
641 or a live slave.
642 */
643
644 for (c = 0; c < raidPtr->numSpare; c++) {
645 sparecol = raidPtr->numCol + c;
646 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
647 /* How about this one? */
648 scol = -1;
649 for(j=0;j<raidPtr->numCol;j++) {
650 if (raidPtr->Disks[j].spareCol == sparecol) {
651 scol = j;
652 break;
653 }
654 }
655 if (scol == 0) {
656 /*
657 We must have found a spared master!
658 We'll take that over anything else
659 found so far. (We couldn't have
660 found a real master before, since
661 this is a used spare, and it's
662 saying that it's replacing the
663 master.) On reboot (with
664 autoconfiguration turned on)
665 sparecol will become the 1st
666 component (component0) of this set.
667 */
668 dumpto = sparecol;
669 break;
670 } else if (scol != -1) {
671 /*
672 Must be a spared slave. We'll dump
673 to that if we havn't found anything
674 else so far.
675 */
676 if (dumpto == -1)
677 dumpto = sparecol;
678 }
679 }
680 }
681
682 if (dumpto == -1) {
683 /* we couldn't find any live components to dump to!?!?
684 */
685 error = EINVAL;
686 goto out;
687 }
688
689 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
690
691 /*
692 Note that blkno is relative to this particular partition.
693 By adding the offset of this partition in the RAID
694 set, and also adding RF_PROTECTED_SECTORS, we get a
695 value that is relative to the partition used for the
696 underlying component.
697 */
698
699 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
700 blkno + offset, va, size);
701
702 out:
703 raidunlock(rs);
704
705 return error;
706 }
707 /* ARGSUSED */
708 int
709 raidopen(dev_t dev, int flags, int fmt,
710 struct lwp *l)
711 {
712 int unit = raidunit(dev);
713 struct raid_softc *rs;
714 struct disklabel *lp;
715 int part, pmask;
716 int error = 0;
717
718 if (unit >= numraid)
719 return (ENXIO);
720 rs = &raid_softc[unit];
721
722 if ((error = raidlock(rs)) != 0)
723 return (error);
724
725 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
726 error = EBUSY;
727 goto bad;
728 }
729
730 lp = rs->sc_dkdev.dk_label;
731
732 part = DISKPART(dev);
733
734 /*
735 * If there are wedges, and this is not RAW_PART, then we
736 * need to fail.
737 */
738 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
739 error = EBUSY;
740 goto bad;
741 }
742 pmask = (1 << part);
743
744 if ((rs->sc_flags & RAIDF_INITED) &&
745 (rs->sc_dkdev.dk_openmask == 0))
746 raidgetdisklabel(dev);
747
748 /* make sure that this partition exists */
749
750 if (part != RAW_PART) {
751 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
752 ((part >= lp->d_npartitions) ||
753 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
754 error = ENXIO;
755 goto bad;
756 }
757 }
758 /* Prevent this unit from being unconfigured while open. */
759 switch (fmt) {
760 case S_IFCHR:
761 rs->sc_dkdev.dk_copenmask |= pmask;
762 break;
763
764 case S_IFBLK:
765 rs->sc_dkdev.dk_bopenmask |= pmask;
766 break;
767 }
768
769 if ((rs->sc_dkdev.dk_openmask == 0) &&
770 ((rs->sc_flags & RAIDF_INITED) != 0)) {
771 /* First one... mark things as dirty... Note that we *MUST*
772 have done a configure before this. I DO NOT WANT TO BE
773 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
774 THAT THEY BELONG TOGETHER!!!!! */
775 /* XXX should check to see if we're only open for reading
776 here... If so, we needn't do this, but then need some
777 other way of keeping track of what's happened.. */
778
779 rf_markalldirty(raidPtrs[unit]);
780 }
781
782
783 rs->sc_dkdev.dk_openmask =
784 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
785
786 bad:
787 raidunlock(rs);
788
789 return (error);
790
791
792 }
793 /* ARGSUSED */
794 int
795 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
796 {
797 int unit = raidunit(dev);
798 struct raid_softc *rs;
799 int error = 0;
800 int part;
801
802 if (unit >= numraid)
803 return (ENXIO);
804 rs = &raid_softc[unit];
805
806 if ((error = raidlock(rs)) != 0)
807 return (error);
808
809 part = DISKPART(dev);
810
811 /* ...that much closer to allowing unconfiguration... */
812 switch (fmt) {
813 case S_IFCHR:
814 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
815 break;
816
817 case S_IFBLK:
818 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
819 break;
820 }
821 rs->sc_dkdev.dk_openmask =
822 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
823
824 if ((rs->sc_dkdev.dk_openmask == 0) &&
825 ((rs->sc_flags & RAIDF_INITED) != 0)) {
826 /* Last one... device is not unconfigured yet.
827 Device shutdown has taken care of setting the
828 clean bits if RAIDF_INITED is not set
829 mark things as clean... */
830
831 rf_update_component_labels(raidPtrs[unit],
832 RF_FINAL_COMPONENT_UPDATE);
833
834 /* If the kernel is shutting down, it will detach
835 * this RAID set soon enough.
836 */
837 }
838
839 raidunlock(rs);
840 return (0);
841
842 }
843
844 void
845 raidstrategy(struct buf *bp)
846 {
847 unsigned int raidID = raidunit(bp->b_dev);
848 RF_Raid_t *raidPtr;
849 struct raid_softc *rs = &raid_softc[raidID];
850 int wlabel;
851
852 if ((rs->sc_flags & RAIDF_INITED) ==0) {
853 bp->b_error = ENXIO;
854 goto done;
855 }
856 if (raidID >= numraid || !raidPtrs[raidID]) {
857 bp->b_error = ENODEV;
858 goto done;
859 }
860 raidPtr = raidPtrs[raidID];
861 if (!raidPtr->valid) {
862 bp->b_error = ENODEV;
863 goto done;
864 }
865 if (bp->b_bcount == 0) {
866 db1_printf(("b_bcount is zero..\n"));
867 goto done;
868 }
869
870 /*
871 * Do bounds checking and adjust transfer. If there's an
872 * error, the bounds check will flag that for us.
873 */
874
875 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
876 if (DISKPART(bp->b_dev) == RAW_PART) {
877 uint64_t size; /* device size in DEV_BSIZE unit */
878
879 if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
880 size = raidPtr->totalSectors <<
881 (raidPtr->logBytesPerSector - DEV_BSHIFT);
882 } else {
883 size = raidPtr->totalSectors >>
884 (DEV_BSHIFT - raidPtr->logBytesPerSector);
885 }
886 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
887 goto done;
888 }
889 } else {
890 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
891 db1_printf(("Bounds check failed!!:%d %d\n",
892 (int) bp->b_blkno, (int) wlabel));
893 goto done;
894 }
895 }
896
897 rf_lock_mutex2(raidPtr->iodone_lock);
898
899 bp->b_resid = 0;
900
901 /* stuff it onto our queue */
902 bufq_put(rs->buf_queue, bp);
903
904 /* scheduled the IO to happen at the next convenient time */
905 rf_signal_cond2(raidPtr->iodone_cv);
906 rf_unlock_mutex2(raidPtr->iodone_lock);
907
908 return;
909
910 done:
911 bp->b_resid = bp->b_bcount;
912 biodone(bp);
913 }
914 /* ARGSUSED */
915 int
916 raidread(dev_t dev, struct uio *uio, int flags)
917 {
918 int unit = raidunit(dev);
919 struct raid_softc *rs;
920
921 if (unit >= numraid)
922 return (ENXIO);
923 rs = &raid_softc[unit];
924
925 if ((rs->sc_flags & RAIDF_INITED) == 0)
926 return (ENXIO);
927
928 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
929
930 }
931 /* ARGSUSED */
932 int
933 raidwrite(dev_t dev, struct uio *uio, int flags)
934 {
935 int unit = raidunit(dev);
936 struct raid_softc *rs;
937
938 if (unit >= numraid)
939 return (ENXIO);
940 rs = &raid_softc[unit];
941
942 if ((rs->sc_flags & RAIDF_INITED) == 0)
943 return (ENXIO);
944
945 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
946
947 }
948
949 static int
950 raid_detach_unlocked(struct raid_softc *rs)
951 {
952 int error;
953 RF_Raid_t *raidPtr;
954
955 raidPtr = raidPtrs[device_unit(rs->sc_dev)];
956
957 /*
958 * If somebody has a partition mounted, we shouldn't
959 * shutdown.
960 */
961 if (rs->sc_dkdev.dk_openmask != 0)
962 return EBUSY;
963
964 if ((rs->sc_flags & RAIDF_INITED) == 0)
965 ; /* not initialized: nothing to do */
966 else if ((error = rf_Shutdown(raidPtr)) != 0)
967 return error;
968 else
969 rs->sc_flags &= ~(RAIDF_INITED|RAIDF_SHUTDOWN);
970
971 /* Detach the disk. */
972 dkwedge_delall(&rs->sc_dkdev);
973 disk_detach(&rs->sc_dkdev);
974 disk_destroy(&rs->sc_dkdev);
975
976 aprint_normal_dev(rs->sc_dev, "detached\n");
977
978 return 0;
979 }
980
981 int
982 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
983 {
984 int unit = raidunit(dev);
985 int error = 0;
986 int part, pmask, s;
987 cfdata_t cf;
988 struct raid_softc *rs;
989 RF_Config_t *k_cfg, *u_cfg;
990 RF_Raid_t *raidPtr;
991 RF_RaidDisk_t *diskPtr;
992 RF_AccTotals_t *totals;
993 RF_DeviceConfig_t *d_cfg, **ucfgp;
994 u_char *specific_buf;
995 int retcode = 0;
996 int column;
997 /* int raidid; */
998 struct rf_recon_req *rrcopy, *rr;
999 RF_ComponentLabel_t *clabel;
1000 RF_ComponentLabel_t *ci_label;
1001 RF_ComponentLabel_t **clabel_ptr;
1002 RF_SingleComponent_t *sparePtr,*componentPtr;
1003 RF_SingleComponent_t component;
1004 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
1005 int i, j, d;
1006 #ifdef __HAVE_OLD_DISKLABEL
1007 struct disklabel newlabel;
1008 #endif
1009 struct dkwedge_info *dkw;
1010
1011 if (unit >= numraid)
1012 return (ENXIO);
1013 rs = &raid_softc[unit];
1014 raidPtr = raidPtrs[unit];
1015
1016 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1017 (int) DISKPART(dev), (int) unit, cmd));
1018
1019 /* Must be open for writes for these commands... */
1020 switch (cmd) {
1021 #ifdef DIOCGSECTORSIZE
1022 case DIOCGSECTORSIZE:
1023 *(u_int *)data = raidPtr->bytesPerSector;
1024 return 0;
1025 case DIOCGMEDIASIZE:
1026 *(off_t *)data =
1027 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
1028 return 0;
1029 #endif
1030 case DIOCSDINFO:
1031 case DIOCWDINFO:
1032 #ifdef __HAVE_OLD_DISKLABEL
1033 case ODIOCWDINFO:
1034 case ODIOCSDINFO:
1035 #endif
1036 case DIOCWLABEL:
1037 case DIOCAWEDGE:
1038 case DIOCDWEDGE:
1039 case DIOCSSTRATEGY:
1040 if ((flag & FWRITE) == 0)
1041 return (EBADF);
1042 }
1043
1044 /* Must be initialized for these... */
1045 switch (cmd) {
1046 case DIOCGDINFO:
1047 case DIOCSDINFO:
1048 case DIOCWDINFO:
1049 #ifdef __HAVE_OLD_DISKLABEL
1050 case ODIOCGDINFO:
1051 case ODIOCWDINFO:
1052 case ODIOCSDINFO:
1053 case ODIOCGDEFLABEL:
1054 #endif
1055 case DIOCGPART:
1056 case DIOCWLABEL:
1057 case DIOCGDEFLABEL:
1058 case DIOCAWEDGE:
1059 case DIOCDWEDGE:
1060 case DIOCLWEDGES:
1061 case DIOCCACHESYNC:
1062 case RAIDFRAME_SHUTDOWN:
1063 case RAIDFRAME_REWRITEPARITY:
1064 case RAIDFRAME_GET_INFO:
1065 case RAIDFRAME_RESET_ACCTOTALS:
1066 case RAIDFRAME_GET_ACCTOTALS:
1067 case RAIDFRAME_KEEP_ACCTOTALS:
1068 case RAIDFRAME_GET_SIZE:
1069 case RAIDFRAME_FAIL_DISK:
1070 case RAIDFRAME_COPYBACK:
1071 case RAIDFRAME_CHECK_RECON_STATUS:
1072 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1073 case RAIDFRAME_GET_COMPONENT_LABEL:
1074 case RAIDFRAME_SET_COMPONENT_LABEL:
1075 case RAIDFRAME_ADD_HOT_SPARE:
1076 case RAIDFRAME_REMOVE_HOT_SPARE:
1077 case RAIDFRAME_INIT_LABELS:
1078 case RAIDFRAME_REBUILD_IN_PLACE:
1079 case RAIDFRAME_CHECK_PARITY:
1080 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1081 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1082 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1083 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1084 case RAIDFRAME_SET_AUTOCONFIG:
1085 case RAIDFRAME_SET_ROOT:
1086 case RAIDFRAME_DELETE_COMPONENT:
1087 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1088 case RAIDFRAME_PARITYMAP_STATUS:
1089 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1090 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1091 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1092 case DIOCGSTRATEGY:
1093 case DIOCSSTRATEGY:
1094 if ((rs->sc_flags & RAIDF_INITED) == 0)
1095 return (ENXIO);
1096 }
1097
1098 switch (cmd) {
1099 #ifdef COMPAT_50
1100 case RAIDFRAME_GET_INFO50:
1101 return rf_get_info50(raidPtr, data);
1102
1103 case RAIDFRAME_CONFIGURE50:
1104 if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0)
1105 return retcode;
1106 goto config;
1107 #endif
1108 /* configure the system */
1109 case RAIDFRAME_CONFIGURE:
1110
1111 if (raidPtr->valid) {
1112 /* There is a valid RAID set running on this unit! */
1113 printf("raid%d: Device already configured!\n",unit);
1114 return(EINVAL);
1115 }
1116
1117 /* copy-in the configuration information */
1118 /* data points to a pointer to the configuration structure */
1119
1120 u_cfg = *((RF_Config_t **) data);
1121 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1122 if (k_cfg == NULL) {
1123 return (ENOMEM);
1124 }
1125 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1126 if (retcode) {
1127 RF_Free(k_cfg, sizeof(RF_Config_t));
1128 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1129 retcode));
1130 return (retcode);
1131 }
1132 goto config;
1133 config:
1134 /* allocate a buffer for the layout-specific data, and copy it
1135 * in */
1136 if (k_cfg->layoutSpecificSize) {
1137 if (k_cfg->layoutSpecificSize > 10000) {
1138 /* sanity check */
1139 RF_Free(k_cfg, sizeof(RF_Config_t));
1140 return (EINVAL);
1141 }
1142 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1143 (u_char *));
1144 if (specific_buf == NULL) {
1145 RF_Free(k_cfg, sizeof(RF_Config_t));
1146 return (ENOMEM);
1147 }
1148 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1149 k_cfg->layoutSpecificSize);
1150 if (retcode) {
1151 RF_Free(k_cfg, sizeof(RF_Config_t));
1152 RF_Free(specific_buf,
1153 k_cfg->layoutSpecificSize);
1154 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1155 retcode));
1156 return (retcode);
1157 }
1158 } else
1159 specific_buf = NULL;
1160 k_cfg->layoutSpecific = specific_buf;
1161
1162 /* should do some kind of sanity check on the configuration.
1163 * Store the sum of all the bytes in the last byte? */
1164
1165 /* configure the system */
1166
1167 /*
1168 * Clear the entire RAID descriptor, just to make sure
1169 * there is no stale data left in the case of a
1170 * reconfiguration
1171 */
1172 memset(raidPtr, 0, sizeof(*raidPtr));
1173 raidPtr->raidid = unit;
1174
1175 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1176
1177 if (retcode == 0) {
1178
1179 /* allow this many simultaneous IO's to
1180 this RAID device */
1181 raidPtr->openings = RAIDOUTSTANDING;
1182
1183 raidinit(raidPtr);
1184 rf_markalldirty(raidPtr);
1185 }
1186 /* free the buffers. No return code here. */
1187 if (k_cfg->layoutSpecificSize) {
1188 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1189 }
1190 RF_Free(k_cfg, sizeof(RF_Config_t));
1191
1192 return (retcode);
1193
1194 /* shutdown the system */
1195 case RAIDFRAME_SHUTDOWN:
1196
1197 part = DISKPART(dev);
1198 pmask = (1 << part);
1199
1200 if ((error = raidlock(rs)) != 0)
1201 return (error);
1202
1203 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
1204 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1205 (rs->sc_dkdev.dk_copenmask & pmask)))
1206 retcode = EBUSY;
1207 else {
1208 rs->sc_flags |= RAIDF_SHUTDOWN;
1209 rs->sc_dkdev.dk_copenmask &= ~pmask;
1210 rs->sc_dkdev.dk_bopenmask &= ~pmask;
1211 rs->sc_dkdev.dk_openmask &= ~pmask;
1212 retcode = 0;
1213 }
1214
1215 raidunlock(rs);
1216
1217 if (retcode != 0)
1218 return retcode;
1219
1220 /* free the pseudo device attach bits */
1221
1222 cf = device_cfdata(rs->sc_dev);
1223 if ((retcode = config_detach(rs->sc_dev, DETACH_QUIET)) == 0)
1224 free(cf, M_RAIDFRAME);
1225
1226 return (retcode);
1227 case RAIDFRAME_GET_COMPONENT_LABEL:
1228 clabel_ptr = (RF_ComponentLabel_t **) data;
1229 /* need to read the component label for the disk indicated
1230 by row,column in clabel */
1231
1232 /*
1233 * Perhaps there should be an option to skip the in-core
1234 * copy and hit the disk, as with disklabel(8).
1235 */
1236 RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *));
1237
1238 retcode = copyin(*clabel_ptr, clabel, sizeof(*clabel));
1239
1240 if (retcode) {
1241 RF_Free(clabel, sizeof(*clabel));
1242 return retcode;
1243 }
1244
1245 clabel->row = 0; /* Don't allow looking at anything else.*/
1246
1247 column = clabel->column;
1248
1249 if ((column < 0) || (column >= raidPtr->numCol +
1250 raidPtr->numSpare)) {
1251 RF_Free(clabel, sizeof(*clabel));
1252 return EINVAL;
1253 }
1254
1255 RF_Free(clabel, sizeof(*clabel));
1256
1257 clabel = raidget_component_label(raidPtr, column);
1258
1259 return copyout(clabel, *clabel_ptr, sizeof(**clabel_ptr));
1260
1261 #if 0
1262 case RAIDFRAME_SET_COMPONENT_LABEL:
1263 clabel = (RF_ComponentLabel_t *) data;
1264
1265 /* XXX check the label for valid stuff... */
1266 /* Note that some things *should not* get modified --
1267 the user should be re-initing the labels instead of
1268 trying to patch things.
1269 */
1270
1271 raidid = raidPtr->raidid;
1272 #ifdef DEBUG
1273 printf("raid%d: Got component label:\n", raidid);
1274 printf("raid%d: Version: %d\n", raidid, clabel->version);
1275 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1276 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1277 printf("raid%d: Column: %d\n", raidid, clabel->column);
1278 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1279 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1280 printf("raid%d: Status: %d\n", raidid, clabel->status);
1281 #endif
1282 clabel->row = 0;
1283 column = clabel->column;
1284
1285 if ((column < 0) || (column >= raidPtr->numCol)) {
1286 return(EINVAL);
1287 }
1288
1289 /* XXX this isn't allowed to do anything for now :-) */
1290
1291 /* XXX and before it is, we need to fill in the rest
1292 of the fields!?!?!?! */
1293 memcpy(raidget_component_label(raidPtr, column),
1294 clabel, sizeof(*clabel));
1295 raidflush_component_label(raidPtr, column);
1296 return (0);
1297 #endif
1298
1299 case RAIDFRAME_INIT_LABELS:
1300 clabel = (RF_ComponentLabel_t *) data;
1301 /*
1302 we only want the serial number from
1303 the above. We get all the rest of the information
1304 from the config that was used to create this RAID
1305 set.
1306 */
1307
1308 raidPtr->serial_number = clabel->serial_number;
1309
1310 for(column=0;column<raidPtr->numCol;column++) {
1311 diskPtr = &raidPtr->Disks[column];
1312 if (!RF_DEAD_DISK(diskPtr->status)) {
1313 ci_label = raidget_component_label(raidPtr,
1314 column);
1315 /* Zeroing this is important. */
1316 memset(ci_label, 0, sizeof(*ci_label));
1317 raid_init_component_label(raidPtr, ci_label);
1318 ci_label->serial_number =
1319 raidPtr->serial_number;
1320 ci_label->row = 0; /* we dont' pretend to support more */
1321 rf_component_label_set_partitionsize(ci_label,
1322 diskPtr->partitionSize);
1323 ci_label->column = column;
1324 raidflush_component_label(raidPtr, column);
1325 }
1326 /* XXXjld what about the spares? */
1327 }
1328
1329 return (retcode);
1330 case RAIDFRAME_SET_AUTOCONFIG:
1331 d = rf_set_autoconfig(raidPtr, *(int *) data);
1332 printf("raid%d: New autoconfig value is: %d\n",
1333 raidPtr->raidid, d);
1334 *(int *) data = d;
1335 return (retcode);
1336
1337 case RAIDFRAME_SET_ROOT:
1338 d = rf_set_rootpartition(raidPtr, *(int *) data);
1339 printf("raid%d: New rootpartition value is: %d\n",
1340 raidPtr->raidid, d);
1341 *(int *) data = d;
1342 return (retcode);
1343
1344 /* initialize all parity */
1345 case RAIDFRAME_REWRITEPARITY:
1346
1347 if (raidPtr->Layout.map->faultsTolerated == 0) {
1348 /* Parity for RAID 0 is trivially correct */
1349 raidPtr->parity_good = RF_RAID_CLEAN;
1350 return(0);
1351 }
1352
1353 if (raidPtr->parity_rewrite_in_progress == 1) {
1354 /* Re-write is already in progress! */
1355 return(EINVAL);
1356 }
1357
1358 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1359 rf_RewriteParityThread,
1360 raidPtr,"raid_parity");
1361 return (retcode);
1362
1363
1364 case RAIDFRAME_ADD_HOT_SPARE:
1365 sparePtr = (RF_SingleComponent_t *) data;
1366 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1367 retcode = rf_add_hot_spare(raidPtr, &component);
1368 return(retcode);
1369
1370 case RAIDFRAME_REMOVE_HOT_SPARE:
1371 return(retcode);
1372
1373 case RAIDFRAME_DELETE_COMPONENT:
1374 componentPtr = (RF_SingleComponent_t *)data;
1375 memcpy( &component, componentPtr,
1376 sizeof(RF_SingleComponent_t));
1377 retcode = rf_delete_component(raidPtr, &component);
1378 return(retcode);
1379
1380 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1381 componentPtr = (RF_SingleComponent_t *)data;
1382 memcpy( &component, componentPtr,
1383 sizeof(RF_SingleComponent_t));
1384 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1385 return(retcode);
1386
1387 case RAIDFRAME_REBUILD_IN_PLACE:
1388
1389 if (raidPtr->Layout.map->faultsTolerated == 0) {
1390 /* Can't do this on a RAID 0!! */
1391 return(EINVAL);
1392 }
1393
1394 if (raidPtr->recon_in_progress == 1) {
1395 /* a reconstruct is already in progress! */
1396 return(EINVAL);
1397 }
1398
1399 componentPtr = (RF_SingleComponent_t *) data;
1400 memcpy( &component, componentPtr,
1401 sizeof(RF_SingleComponent_t));
1402 component.row = 0; /* we don't support any more */
1403 column = component.column;
1404
1405 if ((column < 0) || (column >= raidPtr->numCol)) {
1406 return(EINVAL);
1407 }
1408
1409 rf_lock_mutex2(raidPtr->mutex);
1410 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1411 (raidPtr->numFailures > 0)) {
1412 /* XXX 0 above shouldn't be constant!!! */
1413 /* some component other than this has failed.
1414 Let's not make things worse than they already
1415 are... */
1416 printf("raid%d: Unable to reconstruct to disk at:\n",
1417 raidPtr->raidid);
1418 printf("raid%d: Col: %d Too many failures.\n",
1419 raidPtr->raidid, column);
1420 rf_unlock_mutex2(raidPtr->mutex);
1421 return (EINVAL);
1422 }
1423 if (raidPtr->Disks[column].status ==
1424 rf_ds_reconstructing) {
1425 printf("raid%d: Unable to reconstruct to disk at:\n",
1426 raidPtr->raidid);
1427 printf("raid%d: Col: %d Reconstruction already occuring!\n", raidPtr->raidid, column);
1428
1429 rf_unlock_mutex2(raidPtr->mutex);
1430 return (EINVAL);
1431 }
1432 if (raidPtr->Disks[column].status == rf_ds_spared) {
1433 rf_unlock_mutex2(raidPtr->mutex);
1434 return (EINVAL);
1435 }
1436 rf_unlock_mutex2(raidPtr->mutex);
1437
1438 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1439 if (rrcopy == NULL)
1440 return(ENOMEM);
1441
1442 rrcopy->raidPtr = (void *) raidPtr;
1443 rrcopy->col = column;
1444
1445 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1446 rf_ReconstructInPlaceThread,
1447 rrcopy,"raid_reconip");
1448 return(retcode);
1449
1450 case RAIDFRAME_GET_INFO:
1451 if (!raidPtr->valid)
1452 return (ENODEV);
1453 ucfgp = (RF_DeviceConfig_t **) data;
1454 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1455 (RF_DeviceConfig_t *));
1456 if (d_cfg == NULL)
1457 return (ENOMEM);
1458 d_cfg->rows = 1; /* there is only 1 row now */
1459 d_cfg->cols = raidPtr->numCol;
1460 d_cfg->ndevs = raidPtr->numCol;
1461 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1462 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1463 return (ENOMEM);
1464 }
1465 d_cfg->nspares = raidPtr->numSpare;
1466 if (d_cfg->nspares >= RF_MAX_DISKS) {
1467 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1468 return (ENOMEM);
1469 }
1470 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1471 d = 0;
1472 for (j = 0; j < d_cfg->cols; j++) {
1473 d_cfg->devs[d] = raidPtr->Disks[j];
1474 d++;
1475 }
1476 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1477 d_cfg->spares[i] = raidPtr->Disks[j];
1478 }
1479 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1480 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1481
1482 return (retcode);
1483
1484 case RAIDFRAME_CHECK_PARITY:
1485 *(int *) data = raidPtr->parity_good;
1486 return (0);
1487
1488 case RAIDFRAME_PARITYMAP_STATUS:
1489 if (rf_paritymap_ineligible(raidPtr))
1490 return EINVAL;
1491 rf_paritymap_status(raidPtr->parity_map,
1492 (struct rf_pmstat *)data);
1493 return 0;
1494
1495 case RAIDFRAME_PARITYMAP_SET_PARAMS:
1496 if (rf_paritymap_ineligible(raidPtr))
1497 return EINVAL;
1498 if (raidPtr->parity_map == NULL)
1499 return ENOENT; /* ??? */
1500 if (0 != rf_paritymap_set_params(raidPtr->parity_map,
1501 (struct rf_pmparams *)data, 1))
1502 return EINVAL;
1503 return 0;
1504
1505 case RAIDFRAME_PARITYMAP_GET_DISABLE:
1506 if (rf_paritymap_ineligible(raidPtr))
1507 return EINVAL;
1508 *(int *) data = rf_paritymap_get_disable(raidPtr);
1509 return 0;
1510
1511 case RAIDFRAME_PARITYMAP_SET_DISABLE:
1512 if (rf_paritymap_ineligible(raidPtr))
1513 return EINVAL;
1514 rf_paritymap_set_disable(raidPtr, *(int *)data);
1515 /* XXX should errors be passed up? */
1516 return 0;
1517
1518 case RAIDFRAME_RESET_ACCTOTALS:
1519 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1520 return (0);
1521
1522 case RAIDFRAME_GET_ACCTOTALS:
1523 totals = (RF_AccTotals_t *) data;
1524 *totals = raidPtr->acc_totals;
1525 return (0);
1526
1527 case RAIDFRAME_KEEP_ACCTOTALS:
1528 raidPtr->keep_acc_totals = *(int *)data;
1529 return (0);
1530
1531 case RAIDFRAME_GET_SIZE:
1532 *(int *) data = raidPtr->totalSectors;
1533 return (0);
1534
1535 /* fail a disk & optionally start reconstruction */
1536 case RAIDFRAME_FAIL_DISK:
1537
1538 if (raidPtr->Layout.map->faultsTolerated == 0) {
1539 /* Can't do this on a RAID 0!! */
1540 return(EINVAL);
1541 }
1542
1543 rr = (struct rf_recon_req *) data;
1544 rr->row = 0;
1545 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1546 return (EINVAL);
1547
1548
1549 rf_lock_mutex2(raidPtr->mutex);
1550 if (raidPtr->status == rf_rs_reconstructing) {
1551 /* you can't fail a disk while we're reconstructing! */
1552 /* XXX wrong for RAID6 */
1553 rf_unlock_mutex2(raidPtr->mutex);
1554 return (EINVAL);
1555 }
1556 if ((raidPtr->Disks[rr->col].status ==
1557 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1558 /* some other component has failed. Let's not make
1559 things worse. XXX wrong for RAID6 */
1560 rf_unlock_mutex2(raidPtr->mutex);
1561 return (EINVAL);
1562 }
1563 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1564 /* Can't fail a spared disk! */
1565 rf_unlock_mutex2(raidPtr->mutex);
1566 return (EINVAL);
1567 }
1568 rf_unlock_mutex2(raidPtr->mutex);
1569
1570 /* make a copy of the recon request so that we don't rely on
1571 * the user's buffer */
1572 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1573 if (rrcopy == NULL)
1574 return(ENOMEM);
1575 memcpy(rrcopy, rr, sizeof(*rr));
1576 rrcopy->raidPtr = (void *) raidPtr;
1577
1578 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1579 rf_ReconThread,
1580 rrcopy,"raid_recon");
1581 return (0);
1582
1583 /* invoke a copyback operation after recon on whatever disk
1584 * needs it, if any */
1585 case RAIDFRAME_COPYBACK:
1586
1587 if (raidPtr->Layout.map->faultsTolerated == 0) {
1588 /* This makes no sense on a RAID 0!! */
1589 return(EINVAL);
1590 }
1591
1592 if (raidPtr->copyback_in_progress == 1) {
1593 /* Copyback is already in progress! */
1594 return(EINVAL);
1595 }
1596
1597 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1598 rf_CopybackThread,
1599 raidPtr,"raid_copyback");
1600 return (retcode);
1601
1602 /* return the percentage completion of reconstruction */
1603 case RAIDFRAME_CHECK_RECON_STATUS:
1604 if (raidPtr->Layout.map->faultsTolerated == 0) {
1605 /* This makes no sense on a RAID 0, so tell the
1606 user it's done. */
1607 *(int *) data = 100;
1608 return(0);
1609 }
1610 if (raidPtr->status != rf_rs_reconstructing)
1611 *(int *) data = 100;
1612 else {
1613 if (raidPtr->reconControl->numRUsTotal > 0) {
1614 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1615 } else {
1616 *(int *) data = 0;
1617 }
1618 }
1619 return (0);
1620 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1621 progressInfoPtr = (RF_ProgressInfo_t **) data;
1622 if (raidPtr->status != rf_rs_reconstructing) {
1623 progressInfo.remaining = 0;
1624 progressInfo.completed = 100;
1625 progressInfo.total = 100;
1626 } else {
1627 progressInfo.total =
1628 raidPtr->reconControl->numRUsTotal;
1629 progressInfo.completed =
1630 raidPtr->reconControl->numRUsComplete;
1631 progressInfo.remaining = progressInfo.total -
1632 progressInfo.completed;
1633 }
1634 retcode = copyout(&progressInfo, *progressInfoPtr,
1635 sizeof(RF_ProgressInfo_t));
1636 return (retcode);
1637
1638 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1639 if (raidPtr->Layout.map->faultsTolerated == 0) {
1640 /* This makes no sense on a RAID 0, so tell the
1641 user it's done. */
1642 *(int *) data = 100;
1643 return(0);
1644 }
1645 if (raidPtr->parity_rewrite_in_progress == 1) {
1646 *(int *) data = 100 *
1647 raidPtr->parity_rewrite_stripes_done /
1648 raidPtr->Layout.numStripe;
1649 } else {
1650 *(int *) data = 100;
1651 }
1652 return (0);
1653
1654 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1655 progressInfoPtr = (RF_ProgressInfo_t **) data;
1656 if (raidPtr->parity_rewrite_in_progress == 1) {
1657 progressInfo.total = raidPtr->Layout.numStripe;
1658 progressInfo.completed =
1659 raidPtr->parity_rewrite_stripes_done;
1660 progressInfo.remaining = progressInfo.total -
1661 progressInfo.completed;
1662 } else {
1663 progressInfo.remaining = 0;
1664 progressInfo.completed = 100;
1665 progressInfo.total = 100;
1666 }
1667 retcode = copyout(&progressInfo, *progressInfoPtr,
1668 sizeof(RF_ProgressInfo_t));
1669 return (retcode);
1670
1671 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1672 if (raidPtr->Layout.map->faultsTolerated == 0) {
1673 /* This makes no sense on a RAID 0 */
1674 *(int *) data = 100;
1675 return(0);
1676 }
1677 if (raidPtr->copyback_in_progress == 1) {
1678 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1679 raidPtr->Layout.numStripe;
1680 } else {
1681 *(int *) data = 100;
1682 }
1683 return (0);
1684
1685 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1686 progressInfoPtr = (RF_ProgressInfo_t **) data;
1687 if (raidPtr->copyback_in_progress == 1) {
1688 progressInfo.total = raidPtr->Layout.numStripe;
1689 progressInfo.completed =
1690 raidPtr->copyback_stripes_done;
1691 progressInfo.remaining = progressInfo.total -
1692 progressInfo.completed;
1693 } else {
1694 progressInfo.remaining = 0;
1695 progressInfo.completed = 100;
1696 progressInfo.total = 100;
1697 }
1698 retcode = copyout(&progressInfo, *progressInfoPtr,
1699 sizeof(RF_ProgressInfo_t));
1700 return (retcode);
1701
1702 /* the sparetable daemon calls this to wait for the kernel to
1703 * need a spare table. this ioctl does not return until a
1704 * spare table is needed. XXX -- calling mpsleep here in the
1705 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1706 * -- I should either compute the spare table in the kernel,
1707 * or have a different -- XXX XXX -- interface (a different
1708 * character device) for delivering the table -- XXX */
1709 #if 0
1710 case RAIDFRAME_SPARET_WAIT:
1711 rf_lock_mutex2(rf_sparet_wait_mutex);
1712 while (!rf_sparet_wait_queue)
1713 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1714 waitreq = rf_sparet_wait_queue;
1715 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1716 rf_unlock_mutex2(rf_sparet_wait_mutex);
1717
1718 /* structure assignment */
1719 *((RF_SparetWait_t *) data) = *waitreq;
1720
1721 RF_Free(waitreq, sizeof(*waitreq));
1722 return (0);
1723
1724 /* wakes up a process waiting on SPARET_WAIT and puts an error
1725 * code in it that will cause the dameon to exit */
1726 case RAIDFRAME_ABORT_SPARET_WAIT:
1727 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1728 waitreq->fcol = -1;
1729 rf_lock_mutex2(rf_sparet_wait_mutex);
1730 waitreq->next = rf_sparet_wait_queue;
1731 rf_sparet_wait_queue = waitreq;
1732 rf_broadcast_conf2(rf_sparet_wait_cv);
1733 rf_unlock_mutex2(rf_sparet_wait_mutex);
1734 return (0);
1735
1736 /* used by the spare table daemon to deliver a spare table
1737 * into the kernel */
1738 case RAIDFRAME_SEND_SPARET:
1739
1740 /* install the spare table */
1741 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1742
1743 /* respond to the requestor. the return status of the spare
1744 * table installation is passed in the "fcol" field */
1745 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1746 waitreq->fcol = retcode;
1747 rf_lock_mutex2(rf_sparet_wait_mutex);
1748 waitreq->next = rf_sparet_resp_queue;
1749 rf_sparet_resp_queue = waitreq;
1750 rf_broadcast_cond2(rf_sparet_resp_cv);
1751 rf_unlock_mutex2(rf_sparet_wait_mutex);
1752
1753 return (retcode);
1754 #endif
1755
1756 default:
1757 break; /* fall through to the os-specific code below */
1758
1759 }
1760
1761 if (!raidPtr->valid)
1762 return (EINVAL);
1763
1764 /*
1765 * Add support for "regular" device ioctls here.
1766 */
1767
1768 error = disk_ioctl(&rs->sc_dkdev, cmd, data, flag, l);
1769 if (error != EPASSTHROUGH)
1770 return (error);
1771
1772 switch (cmd) {
1773 case DIOCGDINFO:
1774 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1775 break;
1776 #ifdef __HAVE_OLD_DISKLABEL
1777 case ODIOCGDINFO:
1778 newlabel = *(rs->sc_dkdev.dk_label);
1779 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1780 return ENOTTY;
1781 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1782 break;
1783 #endif
1784
1785 case DIOCGPART:
1786 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1787 ((struct partinfo *) data)->part =
1788 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1789 break;
1790
1791 case DIOCWDINFO:
1792 case DIOCSDINFO:
1793 #ifdef __HAVE_OLD_DISKLABEL
1794 case ODIOCWDINFO:
1795 case ODIOCSDINFO:
1796 #endif
1797 {
1798 struct disklabel *lp;
1799 #ifdef __HAVE_OLD_DISKLABEL
1800 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1801 memset(&newlabel, 0, sizeof newlabel);
1802 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1803 lp = &newlabel;
1804 } else
1805 #endif
1806 lp = (struct disklabel *)data;
1807
1808 if ((error = raidlock(rs)) != 0)
1809 return (error);
1810
1811 rs->sc_flags |= RAIDF_LABELLING;
1812
1813 error = setdisklabel(rs->sc_dkdev.dk_label,
1814 lp, 0, rs->sc_dkdev.dk_cpulabel);
1815 if (error == 0) {
1816 if (cmd == DIOCWDINFO
1817 #ifdef __HAVE_OLD_DISKLABEL
1818 || cmd == ODIOCWDINFO
1819 #endif
1820 )
1821 error = writedisklabel(RAIDLABELDEV(dev),
1822 raidstrategy, rs->sc_dkdev.dk_label,
1823 rs->sc_dkdev.dk_cpulabel);
1824 }
1825 rs->sc_flags &= ~RAIDF_LABELLING;
1826
1827 raidunlock(rs);
1828
1829 if (error)
1830 return (error);
1831 break;
1832 }
1833
1834 case DIOCWLABEL:
1835 if (*(int *) data != 0)
1836 rs->sc_flags |= RAIDF_WLABEL;
1837 else
1838 rs->sc_flags &= ~RAIDF_WLABEL;
1839 break;
1840
1841 case DIOCGDEFLABEL:
1842 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1843 break;
1844
1845 #ifdef __HAVE_OLD_DISKLABEL
1846 case ODIOCGDEFLABEL:
1847 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1848 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1849 return ENOTTY;
1850 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1851 break;
1852 #endif
1853
1854 case DIOCAWEDGE:
1855 case DIOCDWEDGE:
1856 dkw = (void *)data;
1857
1858 /* If the ioctl happens here, the parent is us. */
1859 (void)strcpy(dkw->dkw_parent, rs->sc_xname);
1860 return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw);
1861
1862 case DIOCLWEDGES:
1863 return dkwedge_list(&rs->sc_dkdev,
1864 (struct dkwedge_list *)data, l);
1865 case DIOCCACHESYNC:
1866 return rf_sync_component_caches(raidPtr);
1867
1868 case DIOCGSTRATEGY:
1869 {
1870 struct disk_strategy *dks = (void *)data;
1871
1872 s = splbio();
1873 strlcpy(dks->dks_name, bufq_getstrategyname(rs->buf_queue),
1874 sizeof(dks->dks_name));
1875 splx(s);
1876 dks->dks_paramlen = 0;
1877
1878 return 0;
1879 }
1880
1881 case DIOCSSTRATEGY:
1882 {
1883 struct disk_strategy *dks = (void *)data;
1884 struct bufq_state *new;
1885 struct bufq_state *old;
1886
1887 if (dks->dks_param != NULL) {
1888 return EINVAL;
1889 }
1890 dks->dks_name[sizeof(dks->dks_name) - 1] = 0; /* ensure term */
1891 error = bufq_alloc(&new, dks->dks_name,
1892 BUFQ_EXACT|BUFQ_SORT_RAWBLOCK);
1893 if (error) {
1894 return error;
1895 }
1896 s = splbio();
1897 old = rs->buf_queue;
1898 bufq_move(new, old);
1899 rs->buf_queue = new;
1900 splx(s);
1901 bufq_free(old);
1902
1903 return 0;
1904 }
1905
1906 default:
1907 retcode = ENOTTY;
1908 }
1909 return (retcode);
1910
1911 }
1912
1913
1914 /* raidinit -- complete the rest of the initialization for the
1915 RAIDframe device. */
1916
1917
1918 static void
1919 raidinit(RF_Raid_t *raidPtr)
1920 {
1921 cfdata_t cf;
1922 struct raid_softc *rs;
1923 int unit;
1924
1925 unit = raidPtr->raidid;
1926
1927 rs = &raid_softc[unit];
1928
1929 /* XXX should check return code first... */
1930 rs->sc_flags |= RAIDF_INITED;
1931
1932 /* XXX doesn't check bounds. */
1933 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1934
1935 /* attach the pseudo device */
1936 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1937 cf->cf_name = raid_cd.cd_name;
1938 cf->cf_atname = raid_cd.cd_name;
1939 cf->cf_unit = unit;
1940 cf->cf_fstate = FSTATE_STAR;
1941
1942 rs->sc_dev = config_attach_pseudo(cf);
1943
1944 if (rs->sc_dev == NULL) {
1945 printf("raid%d: config_attach_pseudo failed\n",
1946 raidPtr->raidid);
1947 rs->sc_flags &= ~RAIDF_INITED;
1948 free(cf, M_RAIDFRAME);
1949 return;
1950 }
1951
1952 /* disk_attach actually creates space for the CPU disklabel, among
1953 * other things, so it's critical to call this *BEFORE* we try putzing
1954 * with disklabels. */
1955
1956 disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1957 disk_attach(&rs->sc_dkdev);
1958 disk_blocksize(&rs->sc_dkdev, raidPtr->bytesPerSector);
1959
1960 /* XXX There may be a weird interaction here between this, and
1961 * protectedSectors, as used in RAIDframe. */
1962
1963 rs->sc_size = raidPtr->totalSectors;
1964
1965 dkwedge_discover(&rs->sc_dkdev);
1966
1967 rf_set_properties(rs, raidPtr);
1968
1969 }
1970 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1971 /* wake up the daemon & tell it to get us a spare table
1972 * XXX
1973 * the entries in the queues should be tagged with the raidPtr
1974 * so that in the extremely rare case that two recons happen at once,
1975 * we know for which device were requesting a spare table
1976 * XXX
1977 *
1978 * XXX This code is not currently used. GO
1979 */
1980 int
1981 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1982 {
1983 int retcode;
1984
1985 rf_lock_mutex2(rf_sparet_wait_mutex);
1986 req->next = rf_sparet_wait_queue;
1987 rf_sparet_wait_queue = req;
1988 rf_broadcast_cond2(rf_sparet_wait_cv);
1989
1990 /* mpsleep unlocks the mutex */
1991 while (!rf_sparet_resp_queue) {
1992 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
1993 }
1994 req = rf_sparet_resp_queue;
1995 rf_sparet_resp_queue = req->next;
1996 rf_unlock_mutex2(rf_sparet_wait_mutex);
1997
1998 retcode = req->fcol;
1999 RF_Free(req, sizeof(*req)); /* this is not the same req as we
2000 * alloc'd */
2001 return (retcode);
2002 }
2003 #endif
2004
2005 /* a wrapper around rf_DoAccess that extracts appropriate info from the
2006 * bp & passes it down.
2007 * any calls originating in the kernel must use non-blocking I/O
2008 * do some extra sanity checking to return "appropriate" error values for
2009 * certain conditions (to make some standard utilities work)
2010 *
2011 * Formerly known as: rf_DoAccessKernel
2012 */
2013 void
2014 raidstart(RF_Raid_t *raidPtr)
2015 {
2016 RF_SectorCount_t num_blocks, pb, sum;
2017 RF_RaidAddr_t raid_addr;
2018 struct partition *pp;
2019 daddr_t blocknum;
2020 int unit;
2021 struct raid_softc *rs;
2022 int do_async;
2023 struct buf *bp;
2024 int rc;
2025
2026 unit = raidPtr->raidid;
2027 rs = &raid_softc[unit];
2028
2029 /* quick check to see if anything has died recently */
2030 rf_lock_mutex2(raidPtr->mutex);
2031 if (raidPtr->numNewFailures > 0) {
2032 rf_unlock_mutex2(raidPtr->mutex);
2033 rf_update_component_labels(raidPtr,
2034 RF_NORMAL_COMPONENT_UPDATE);
2035 rf_lock_mutex2(raidPtr->mutex);
2036 raidPtr->numNewFailures--;
2037 }
2038
2039 /* Check to see if we're at the limit... */
2040 while (raidPtr->openings > 0) {
2041 rf_unlock_mutex2(raidPtr->mutex);
2042
2043 /* get the next item, if any, from the queue */
2044 if ((bp = bufq_get(rs->buf_queue)) == NULL) {
2045 /* nothing more to do */
2046 return;
2047 }
2048
2049 /* Ok, for the bp we have here, bp->b_blkno is relative to the
2050 * partition.. Need to make it absolute to the underlying
2051 * device.. */
2052
2053 blocknum = bp->b_blkno << DEV_BSHIFT >> raidPtr->logBytesPerSector;
2054 if (DISKPART(bp->b_dev) != RAW_PART) {
2055 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
2056 blocknum += pp->p_offset;
2057 }
2058
2059 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
2060 (int) blocknum));
2061
2062 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
2063 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
2064
2065 /* *THIS* is where we adjust what block we're going to...
2066 * but DO NOT TOUCH bp->b_blkno!!! */
2067 raid_addr = blocknum;
2068
2069 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
2070 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
2071 sum = raid_addr + num_blocks + pb;
2072 if (1 || rf_debugKernelAccess) {
2073 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
2074 (int) raid_addr, (int) sum, (int) num_blocks,
2075 (int) pb, (int) bp->b_resid));
2076 }
2077 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
2078 || (sum < num_blocks) || (sum < pb)) {
2079 bp->b_error = ENOSPC;
2080 bp->b_resid = bp->b_bcount;
2081 biodone(bp);
2082 rf_lock_mutex2(raidPtr->mutex);
2083 continue;
2084 }
2085 /*
2086 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
2087 */
2088
2089 if (bp->b_bcount & raidPtr->sectorMask) {
2090 bp->b_error = EINVAL;
2091 bp->b_resid = bp->b_bcount;
2092 biodone(bp);
2093 rf_lock_mutex2(raidPtr->mutex);
2094 continue;
2095
2096 }
2097 db1_printf(("Calling DoAccess..\n"));
2098
2099
2100 rf_lock_mutex2(raidPtr->mutex);
2101 raidPtr->openings--;
2102 rf_unlock_mutex2(raidPtr->mutex);
2103
2104 /*
2105 * Everything is async.
2106 */
2107 do_async = 1;
2108
2109 disk_busy(&rs->sc_dkdev);
2110
2111 /* XXX we're still at splbio() here... do we *really*
2112 need to be? */
2113
2114 /* don't ever condition on bp->b_flags & B_WRITE.
2115 * always condition on B_READ instead */
2116
2117 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
2118 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
2119 do_async, raid_addr, num_blocks,
2120 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
2121
2122 if (rc) {
2123 bp->b_error = rc;
2124 bp->b_resid = bp->b_bcount;
2125 biodone(bp);
2126 /* continue loop */
2127 }
2128
2129 rf_lock_mutex2(raidPtr->mutex);
2130 }
2131 rf_unlock_mutex2(raidPtr->mutex);
2132 }
2133
2134
2135
2136
2137 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
2138
2139 int
2140 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
2141 {
2142 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2143 struct buf *bp;
2144
2145 req->queue = queue;
2146 bp = req->bp;
2147
2148 switch (req->type) {
2149 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
2150 /* XXX need to do something extra here.. */
2151 /* I'm leaving this in, as I've never actually seen it used,
2152 * and I'd like folks to report it... GO */
2153 printf(("WAKEUP CALLED\n"));
2154 queue->numOutstanding++;
2155
2156 bp->b_flags = 0;
2157 bp->b_private = req;
2158
2159 KernelWakeupFunc(bp);
2160 break;
2161
2162 case RF_IO_TYPE_READ:
2163 case RF_IO_TYPE_WRITE:
2164 #if RF_ACC_TRACE > 0
2165 if (req->tracerec) {
2166 RF_ETIMER_START(req->tracerec->timer);
2167 }
2168 #endif
2169 InitBP(bp, queue->rf_cinfo->ci_vp,
2170 op, queue->rf_cinfo->ci_dev,
2171 req->sectorOffset, req->numSector,
2172 req->buf, KernelWakeupFunc, (void *) req,
2173 queue->raidPtr->logBytesPerSector, req->b_proc);
2174
2175 if (rf_debugKernelAccess) {
2176 db1_printf(("dispatch: bp->b_blkno = %ld\n",
2177 (long) bp->b_blkno));
2178 }
2179 queue->numOutstanding++;
2180 queue->last_deq_sector = req->sectorOffset;
2181 /* acc wouldn't have been let in if there were any pending
2182 * reqs at any other priority */
2183 queue->curPriority = req->priority;
2184
2185 db1_printf(("Going for %c to unit %d col %d\n",
2186 req->type, queue->raidPtr->raidid,
2187 queue->col));
2188 db1_printf(("sector %d count %d (%d bytes) %d\n",
2189 (int) req->sectorOffset, (int) req->numSector,
2190 (int) (req->numSector <<
2191 queue->raidPtr->logBytesPerSector),
2192 (int) queue->raidPtr->logBytesPerSector));
2193
2194 /*
2195 * XXX: drop lock here since this can block at
2196 * least with backing SCSI devices. Retake it
2197 * to minimize fuss with calling interfaces.
2198 */
2199
2200 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2201 bdev_strategy(bp);
2202 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2203 break;
2204
2205 default:
2206 panic("bad req->type in rf_DispatchKernelIO");
2207 }
2208 db1_printf(("Exiting from DispatchKernelIO\n"));
2209
2210 return (0);
2211 }
2212 /* this is the callback function associated with a I/O invoked from
2213 kernel code.
2214 */
2215 static void
2216 KernelWakeupFunc(struct buf *bp)
2217 {
2218 RF_DiskQueueData_t *req = NULL;
2219 RF_DiskQueue_t *queue;
2220
2221 db1_printf(("recovering the request queue:\n"));
2222
2223 req = bp->b_private;
2224
2225 queue = (RF_DiskQueue_t *) req->queue;
2226
2227 rf_lock_mutex2(queue->raidPtr->iodone_lock);
2228
2229 #if RF_ACC_TRACE > 0
2230 if (req->tracerec) {
2231 RF_ETIMER_STOP(req->tracerec->timer);
2232 RF_ETIMER_EVAL(req->tracerec->timer);
2233 rf_lock_mutex2(rf_tracing_mutex);
2234 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2235 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2236 req->tracerec->num_phys_ios++;
2237 rf_unlock_mutex2(rf_tracing_mutex);
2238 }
2239 #endif
2240
2241 /* XXX Ok, let's get aggressive... If b_error is set, let's go
2242 * ballistic, and mark the component as hosed... */
2243
2244 if (bp->b_error != 0) {
2245 /* Mark the disk as dead */
2246 /* but only mark it once... */
2247 /* and only if it wouldn't leave this RAID set
2248 completely broken */
2249 if (((queue->raidPtr->Disks[queue->col].status ==
2250 rf_ds_optimal) ||
2251 (queue->raidPtr->Disks[queue->col].status ==
2252 rf_ds_used_spare)) &&
2253 (queue->raidPtr->numFailures <
2254 queue->raidPtr->Layout.map->faultsTolerated)) {
2255 printf("raid%d: IO Error. Marking %s as failed.\n",
2256 queue->raidPtr->raidid,
2257 queue->raidPtr->Disks[queue->col].devname);
2258 queue->raidPtr->Disks[queue->col].status =
2259 rf_ds_failed;
2260 queue->raidPtr->status = rf_rs_degraded;
2261 queue->raidPtr->numFailures++;
2262 queue->raidPtr->numNewFailures++;
2263 } else { /* Disk is already dead... */
2264 /* printf("Disk already marked as dead!\n"); */
2265 }
2266
2267 }
2268
2269 /* Fill in the error value */
2270 req->error = bp->b_error;
2271
2272 /* Drop this one on the "finished" queue... */
2273 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2274
2275 /* Let the raidio thread know there is work to be done. */
2276 rf_signal_cond2(queue->raidPtr->iodone_cv);
2277
2278 rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2279 }
2280
2281
2282 /*
2283 * initialize a buf structure for doing an I/O in the kernel.
2284 */
2285 static void
2286 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2287 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2288 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2289 struct proc *b_proc)
2290 {
2291 /* bp->b_flags = B_PHYS | rw_flag; */
2292 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */
2293 bp->b_oflags = 0;
2294 bp->b_cflags = 0;
2295 bp->b_bcount = numSect << logBytesPerSector;
2296 bp->b_bufsize = bp->b_bcount;
2297 bp->b_error = 0;
2298 bp->b_dev = dev;
2299 bp->b_data = bf;
2300 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2301 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2302 if (bp->b_bcount == 0) {
2303 panic("bp->b_bcount is zero in InitBP!!");
2304 }
2305 bp->b_proc = b_proc;
2306 bp->b_iodone = cbFunc;
2307 bp->b_private = cbArg;
2308 }
2309
2310 static void
2311 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2312 struct disklabel *lp)
2313 {
2314 memset(lp, 0, sizeof(*lp));
2315
2316 /* fabricate a label... */
2317 lp->d_secperunit = raidPtr->totalSectors;
2318 lp->d_secsize = raidPtr->bytesPerSector;
2319 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2320 lp->d_ntracks = 4 * raidPtr->numCol;
2321 lp->d_ncylinders = raidPtr->totalSectors /
2322 (lp->d_nsectors * lp->d_ntracks);
2323 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2324
2325 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2326 lp->d_type = DTYPE_RAID;
2327 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2328 lp->d_rpm = 3600;
2329 lp->d_interleave = 1;
2330 lp->d_flags = 0;
2331
2332 lp->d_partitions[RAW_PART].p_offset = 0;
2333 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2334 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2335 lp->d_npartitions = RAW_PART + 1;
2336
2337 lp->d_magic = DISKMAGIC;
2338 lp->d_magic2 = DISKMAGIC;
2339 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2340
2341 }
2342 /*
2343 * Read the disklabel from the raid device. If one is not present, fake one
2344 * up.
2345 */
2346 static void
2347 raidgetdisklabel(dev_t dev)
2348 {
2349 int unit = raidunit(dev);
2350 struct raid_softc *rs = &raid_softc[unit];
2351 const char *errstring;
2352 struct disklabel *lp = rs->sc_dkdev.dk_label;
2353 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2354 RF_Raid_t *raidPtr;
2355
2356 db1_printf(("Getting the disklabel...\n"));
2357
2358 memset(clp, 0, sizeof(*clp));
2359
2360 raidPtr = raidPtrs[unit];
2361
2362 raidgetdefaultlabel(raidPtr, rs, lp);
2363
2364 /*
2365 * Call the generic disklabel extraction routine.
2366 */
2367 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2368 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2369 if (errstring)
2370 raidmakedisklabel(rs);
2371 else {
2372 int i;
2373 struct partition *pp;
2374
2375 /*
2376 * Sanity check whether the found disklabel is valid.
2377 *
2378 * This is necessary since total size of the raid device
2379 * may vary when an interleave is changed even though exactly
2380 * same components are used, and old disklabel may used
2381 * if that is found.
2382 */
2383 if (lp->d_secperunit != rs->sc_size)
2384 printf("raid%d: WARNING: %s: "
2385 "total sector size in disklabel (%" PRIu32 ") != "
2386 "the size of raid (%" PRIu64 ")\n", unit, rs->sc_xname,
2387 lp->d_secperunit, rs->sc_size);
2388 for (i = 0; i < lp->d_npartitions; i++) {
2389 pp = &lp->d_partitions[i];
2390 if (pp->p_offset + pp->p_size > rs->sc_size)
2391 printf("raid%d: WARNING: %s: end of partition `%c' "
2392 "exceeds the size of raid (%" PRIu64 ")\n",
2393 unit, rs->sc_xname, 'a' + i, rs->sc_size);
2394 }
2395 }
2396
2397 }
2398 /*
2399 * Take care of things one might want to take care of in the event
2400 * that a disklabel isn't present.
2401 */
2402 static void
2403 raidmakedisklabel(struct raid_softc *rs)
2404 {
2405 struct disklabel *lp = rs->sc_dkdev.dk_label;
2406 db1_printf(("Making a label..\n"));
2407
2408 /*
2409 * For historical reasons, if there's no disklabel present
2410 * the raw partition must be marked FS_BSDFFS.
2411 */
2412
2413 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2414
2415 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2416
2417 lp->d_checksum = dkcksum(lp);
2418 }
2419 /*
2420 * Wait interruptibly for an exclusive lock.
2421 *
2422 * XXX
2423 * Several drivers do this; it should be abstracted and made MP-safe.
2424 * (Hmm... where have we seen this warning before :-> GO )
2425 */
2426 static int
2427 raidlock(struct raid_softc *rs)
2428 {
2429 int error;
2430
2431 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2432 rs->sc_flags |= RAIDF_WANTED;
2433 if ((error =
2434 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2435 return (error);
2436 }
2437 rs->sc_flags |= RAIDF_LOCKED;
2438 return (0);
2439 }
2440 /*
2441 * Unlock and wake up any waiters.
2442 */
2443 static void
2444 raidunlock(struct raid_softc *rs)
2445 {
2446
2447 rs->sc_flags &= ~RAIDF_LOCKED;
2448 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2449 rs->sc_flags &= ~RAIDF_WANTED;
2450 wakeup(rs);
2451 }
2452 }
2453
2454
2455 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2456 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2457 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
2458
2459 static daddr_t
2460 rf_component_info_offset(void)
2461 {
2462
2463 return RF_COMPONENT_INFO_OFFSET;
2464 }
2465
2466 static daddr_t
2467 rf_component_info_size(unsigned secsize)
2468 {
2469 daddr_t info_size;
2470
2471 KASSERT(secsize);
2472 if (secsize > RF_COMPONENT_INFO_SIZE)
2473 info_size = secsize;
2474 else
2475 info_size = RF_COMPONENT_INFO_SIZE;
2476
2477 return info_size;
2478 }
2479
2480 static daddr_t
2481 rf_parity_map_offset(RF_Raid_t *raidPtr)
2482 {
2483 daddr_t map_offset;
2484
2485 KASSERT(raidPtr->bytesPerSector);
2486 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2487 map_offset = raidPtr->bytesPerSector;
2488 else
2489 map_offset = RF_COMPONENT_INFO_SIZE;
2490 map_offset += rf_component_info_offset();
2491
2492 return map_offset;
2493 }
2494
2495 static daddr_t
2496 rf_parity_map_size(RF_Raid_t *raidPtr)
2497 {
2498 daddr_t map_size;
2499
2500 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2501 map_size = raidPtr->bytesPerSector;
2502 else
2503 map_size = RF_PARITY_MAP_SIZE;
2504
2505 return map_size;
2506 }
2507
2508 int
2509 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2510 {
2511 RF_ComponentLabel_t *clabel;
2512
2513 clabel = raidget_component_label(raidPtr, col);
2514 clabel->clean = RF_RAID_CLEAN;
2515 raidflush_component_label(raidPtr, col);
2516 return(0);
2517 }
2518
2519
2520 int
2521 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2522 {
2523 RF_ComponentLabel_t *clabel;
2524
2525 clabel = raidget_component_label(raidPtr, col);
2526 clabel->clean = RF_RAID_DIRTY;
2527 raidflush_component_label(raidPtr, col);
2528 return(0);
2529 }
2530
2531 int
2532 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2533 {
2534 KASSERT(raidPtr->bytesPerSector);
2535 return raidread_component_label(raidPtr->bytesPerSector,
2536 raidPtr->Disks[col].dev,
2537 raidPtr->raid_cinfo[col].ci_vp,
2538 &raidPtr->raid_cinfo[col].ci_label);
2539 }
2540
2541 RF_ComponentLabel_t *
2542 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2543 {
2544 return &raidPtr->raid_cinfo[col].ci_label;
2545 }
2546
2547 int
2548 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2549 {
2550 RF_ComponentLabel_t *label;
2551
2552 label = &raidPtr->raid_cinfo[col].ci_label;
2553 label->mod_counter = raidPtr->mod_counter;
2554 #ifndef RF_NO_PARITY_MAP
2555 label->parity_map_modcount = label->mod_counter;
2556 #endif
2557 return raidwrite_component_label(raidPtr->bytesPerSector,
2558 raidPtr->Disks[col].dev,
2559 raidPtr->raid_cinfo[col].ci_vp, label);
2560 }
2561
2562
2563 static int
2564 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2565 RF_ComponentLabel_t *clabel)
2566 {
2567 return raidread_component_area(dev, b_vp, clabel,
2568 sizeof(RF_ComponentLabel_t),
2569 rf_component_info_offset(),
2570 rf_component_info_size(secsize));
2571 }
2572
2573 /* ARGSUSED */
2574 static int
2575 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2576 size_t msize, daddr_t offset, daddr_t dsize)
2577 {
2578 struct buf *bp;
2579 const struct bdevsw *bdev;
2580 int error;
2581
2582 /* XXX should probably ensure that we don't try to do this if
2583 someone has changed rf_protected_sectors. */
2584
2585 if (b_vp == NULL) {
2586 /* For whatever reason, this component is not valid.
2587 Don't try to read a component label from it. */
2588 return(EINVAL);
2589 }
2590
2591 /* get a block of the appropriate size... */
2592 bp = geteblk((int)dsize);
2593 bp->b_dev = dev;
2594
2595 /* get our ducks in a row for the read */
2596 bp->b_blkno = offset / DEV_BSIZE;
2597 bp->b_bcount = dsize;
2598 bp->b_flags |= B_READ;
2599 bp->b_resid = dsize;
2600
2601 bdev = bdevsw_lookup(bp->b_dev);
2602 if (bdev == NULL)
2603 return (ENXIO);
2604 (*bdev->d_strategy)(bp);
2605
2606 error = biowait(bp);
2607
2608 if (!error) {
2609 memcpy(data, bp->b_data, msize);
2610 }
2611
2612 brelse(bp, 0);
2613 return(error);
2614 }
2615
2616
2617 static int
2618 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2619 RF_ComponentLabel_t *clabel)
2620 {
2621 return raidwrite_component_area(dev, b_vp, clabel,
2622 sizeof(RF_ComponentLabel_t),
2623 rf_component_info_offset(),
2624 rf_component_info_size(secsize), 0);
2625 }
2626
2627 /* ARGSUSED */
2628 static int
2629 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2630 size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2631 {
2632 struct buf *bp;
2633 const struct bdevsw *bdev;
2634 int error;
2635
2636 /* get a block of the appropriate size... */
2637 bp = geteblk((int)dsize);
2638 bp->b_dev = dev;
2639
2640 /* get our ducks in a row for the write */
2641 bp->b_blkno = offset / DEV_BSIZE;
2642 bp->b_bcount = dsize;
2643 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2644 bp->b_resid = dsize;
2645
2646 memset(bp->b_data, 0, dsize);
2647 memcpy(bp->b_data, data, msize);
2648
2649 bdev = bdevsw_lookup(bp->b_dev);
2650 if (bdev == NULL)
2651 return (ENXIO);
2652 (*bdev->d_strategy)(bp);
2653 if (asyncp)
2654 return 0;
2655 error = biowait(bp);
2656 brelse(bp, 0);
2657 if (error) {
2658 #if 1
2659 printf("Failed to write RAID component info!\n");
2660 #endif
2661 }
2662
2663 return(error);
2664 }
2665
2666 void
2667 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2668 {
2669 int c;
2670
2671 for (c = 0; c < raidPtr->numCol; c++) {
2672 /* Skip dead disks. */
2673 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2674 continue;
2675 /* XXXjld: what if an error occurs here? */
2676 raidwrite_component_area(raidPtr->Disks[c].dev,
2677 raidPtr->raid_cinfo[c].ci_vp, map,
2678 RF_PARITYMAP_NBYTE,
2679 rf_parity_map_offset(raidPtr),
2680 rf_parity_map_size(raidPtr), 0);
2681 }
2682 }
2683
2684 void
2685 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2686 {
2687 struct rf_paritymap_ondisk tmp;
2688 int c,first;
2689
2690 first=1;
2691 for (c = 0; c < raidPtr->numCol; c++) {
2692 /* Skip dead disks. */
2693 if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2694 continue;
2695 raidread_component_area(raidPtr->Disks[c].dev,
2696 raidPtr->raid_cinfo[c].ci_vp, &tmp,
2697 RF_PARITYMAP_NBYTE,
2698 rf_parity_map_offset(raidPtr),
2699 rf_parity_map_size(raidPtr));
2700 if (first) {
2701 memcpy(map, &tmp, sizeof(*map));
2702 first = 0;
2703 } else {
2704 rf_paritymap_merge(map, &tmp);
2705 }
2706 }
2707 }
2708
2709 void
2710 rf_markalldirty(RF_Raid_t *raidPtr)
2711 {
2712 RF_ComponentLabel_t *clabel;
2713 int sparecol;
2714 int c;
2715 int j;
2716 int scol = -1;
2717
2718 raidPtr->mod_counter++;
2719 for (c = 0; c < raidPtr->numCol; c++) {
2720 /* we don't want to touch (at all) a disk that has
2721 failed */
2722 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2723 clabel = raidget_component_label(raidPtr, c);
2724 if (clabel->status == rf_ds_spared) {
2725 /* XXX do something special...
2726 but whatever you do, don't
2727 try to access it!! */
2728 } else {
2729 raidmarkdirty(raidPtr, c);
2730 }
2731 }
2732 }
2733
2734 for( c = 0; c < raidPtr->numSpare ; c++) {
2735 sparecol = raidPtr->numCol + c;
2736 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2737 /*
2738
2739 we claim this disk is "optimal" if it's
2740 rf_ds_used_spare, as that means it should be
2741 directly substitutable for the disk it replaced.
2742 We note that too...
2743
2744 */
2745
2746 for(j=0;j<raidPtr->numCol;j++) {
2747 if (raidPtr->Disks[j].spareCol == sparecol) {
2748 scol = j;
2749 break;
2750 }
2751 }
2752
2753 clabel = raidget_component_label(raidPtr, sparecol);
2754 /* make sure status is noted */
2755
2756 raid_init_component_label(raidPtr, clabel);
2757
2758 clabel->row = 0;
2759 clabel->column = scol;
2760 /* Note: we *don't* change status from rf_ds_used_spare
2761 to rf_ds_optimal */
2762 /* clabel.status = rf_ds_optimal; */
2763
2764 raidmarkdirty(raidPtr, sparecol);
2765 }
2766 }
2767 }
2768
2769
2770 void
2771 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2772 {
2773 RF_ComponentLabel_t *clabel;
2774 int sparecol;
2775 int c;
2776 int j;
2777 int scol;
2778
2779 scol = -1;
2780
2781 /* XXX should do extra checks to make sure things really are clean,
2782 rather than blindly setting the clean bit... */
2783
2784 raidPtr->mod_counter++;
2785
2786 for (c = 0; c < raidPtr->numCol; c++) {
2787 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2788 clabel = raidget_component_label(raidPtr, c);
2789 /* make sure status is noted */
2790 clabel->status = rf_ds_optimal;
2791
2792 /* note what unit we are configured as */
2793 clabel->last_unit = raidPtr->raidid;
2794
2795 raidflush_component_label(raidPtr, c);
2796 if (final == RF_FINAL_COMPONENT_UPDATE) {
2797 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2798 raidmarkclean(raidPtr, c);
2799 }
2800 }
2801 }
2802 /* else we don't touch it.. */
2803 }
2804
2805 for( c = 0; c < raidPtr->numSpare ; c++) {
2806 sparecol = raidPtr->numCol + c;
2807 /* Need to ensure that the reconstruct actually completed! */
2808 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2809 /*
2810
2811 we claim this disk is "optimal" if it's
2812 rf_ds_used_spare, as that means it should be
2813 directly substitutable for the disk it replaced.
2814 We note that too...
2815
2816 */
2817
2818 for(j=0;j<raidPtr->numCol;j++) {
2819 if (raidPtr->Disks[j].spareCol == sparecol) {
2820 scol = j;
2821 break;
2822 }
2823 }
2824
2825 /* XXX shouldn't *really* need this... */
2826 clabel = raidget_component_label(raidPtr, sparecol);
2827 /* make sure status is noted */
2828
2829 raid_init_component_label(raidPtr, clabel);
2830
2831 clabel->column = scol;
2832 clabel->status = rf_ds_optimal;
2833 clabel->last_unit = raidPtr->raidid;
2834
2835 raidflush_component_label(raidPtr, sparecol);
2836 if (final == RF_FINAL_COMPONENT_UPDATE) {
2837 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2838 raidmarkclean(raidPtr, sparecol);
2839 }
2840 }
2841 }
2842 }
2843 }
2844
2845 void
2846 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2847 {
2848
2849 if (vp != NULL) {
2850 if (auto_configured == 1) {
2851 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2852 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2853 vput(vp);
2854
2855 } else {
2856 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2857 }
2858 }
2859 }
2860
2861
2862 void
2863 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2864 {
2865 int r,c;
2866 struct vnode *vp;
2867 int acd;
2868
2869
2870 /* We take this opportunity to close the vnodes like we should.. */
2871
2872 for (c = 0; c < raidPtr->numCol; c++) {
2873 vp = raidPtr->raid_cinfo[c].ci_vp;
2874 acd = raidPtr->Disks[c].auto_configured;
2875 rf_close_component(raidPtr, vp, acd);
2876 raidPtr->raid_cinfo[c].ci_vp = NULL;
2877 raidPtr->Disks[c].auto_configured = 0;
2878 }
2879
2880 for (r = 0; r < raidPtr->numSpare; r++) {
2881 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2882 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2883 rf_close_component(raidPtr, vp, acd);
2884 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2885 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2886 }
2887 }
2888
2889
2890 void
2891 rf_ReconThread(struct rf_recon_req *req)
2892 {
2893 int s;
2894 RF_Raid_t *raidPtr;
2895
2896 s = splbio();
2897 raidPtr = (RF_Raid_t *) req->raidPtr;
2898 raidPtr->recon_in_progress = 1;
2899
2900 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2901 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2902
2903 RF_Free(req, sizeof(*req));
2904
2905 raidPtr->recon_in_progress = 0;
2906 splx(s);
2907
2908 /* That's all... */
2909 kthread_exit(0); /* does not return */
2910 }
2911
2912 void
2913 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2914 {
2915 int retcode;
2916 int s;
2917
2918 raidPtr->parity_rewrite_stripes_done = 0;
2919 raidPtr->parity_rewrite_in_progress = 1;
2920 s = splbio();
2921 retcode = rf_RewriteParity(raidPtr);
2922 splx(s);
2923 if (retcode) {
2924 printf("raid%d: Error re-writing parity (%d)!\n",
2925 raidPtr->raidid, retcode);
2926 } else {
2927 /* set the clean bit! If we shutdown correctly,
2928 the clean bit on each component label will get
2929 set */
2930 raidPtr->parity_good = RF_RAID_CLEAN;
2931 }
2932 raidPtr->parity_rewrite_in_progress = 0;
2933
2934 /* Anyone waiting for us to stop? If so, inform them... */
2935 if (raidPtr->waitShutdown) {
2936 wakeup(&raidPtr->parity_rewrite_in_progress);
2937 }
2938
2939 /* That's all... */
2940 kthread_exit(0); /* does not return */
2941 }
2942
2943
2944 void
2945 rf_CopybackThread(RF_Raid_t *raidPtr)
2946 {
2947 int s;
2948
2949 raidPtr->copyback_in_progress = 1;
2950 s = splbio();
2951 rf_CopybackReconstructedData(raidPtr);
2952 splx(s);
2953 raidPtr->copyback_in_progress = 0;
2954
2955 /* That's all... */
2956 kthread_exit(0); /* does not return */
2957 }
2958
2959
2960 void
2961 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
2962 {
2963 int s;
2964 RF_Raid_t *raidPtr;
2965
2966 s = splbio();
2967 raidPtr = req->raidPtr;
2968 raidPtr->recon_in_progress = 1;
2969 rf_ReconstructInPlace(raidPtr, req->col);
2970 RF_Free(req, sizeof(*req));
2971 raidPtr->recon_in_progress = 0;
2972 splx(s);
2973
2974 /* That's all... */
2975 kthread_exit(0); /* does not return */
2976 }
2977
2978 static RF_AutoConfig_t *
2979 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
2980 const char *cname, RF_SectorCount_t size, uint64_t numsecs,
2981 unsigned secsize)
2982 {
2983 int good_one = 0;
2984 RF_ComponentLabel_t *clabel;
2985 RF_AutoConfig_t *ac;
2986
2987 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
2988 if (clabel == NULL) {
2989 oomem:
2990 while(ac_list) {
2991 ac = ac_list;
2992 if (ac->clabel)
2993 free(ac->clabel, M_RAIDFRAME);
2994 ac_list = ac_list->next;
2995 free(ac, M_RAIDFRAME);
2996 }
2997 printf("RAID auto config: out of memory!\n");
2998 return NULL; /* XXX probably should panic? */
2999 }
3000
3001 if (!raidread_component_label(secsize, dev, vp, clabel)) {
3002 /* Got the label. Does it look reasonable? */
3003 if (rf_reasonable_label(clabel, numsecs) &&
3004 (rf_component_label_partitionsize(clabel) <= size)) {
3005 #ifdef DEBUG
3006 printf("Component on: %s: %llu\n",
3007 cname, (unsigned long long)size);
3008 rf_print_component_label(clabel);
3009 #endif
3010 /* if it's reasonable, add it, else ignore it. */
3011 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
3012 M_NOWAIT);
3013 if (ac == NULL) {
3014 free(clabel, M_RAIDFRAME);
3015 goto oomem;
3016 }
3017 strlcpy(ac->devname, cname, sizeof(ac->devname));
3018 ac->dev = dev;
3019 ac->vp = vp;
3020 ac->clabel = clabel;
3021 ac->next = ac_list;
3022 ac_list = ac;
3023 good_one = 1;
3024 }
3025 }
3026 if (!good_one) {
3027 /* cleanup */
3028 free(clabel, M_RAIDFRAME);
3029 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3030 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3031 vput(vp);
3032 }
3033 return ac_list;
3034 }
3035
3036 RF_AutoConfig_t *
3037 rf_find_raid_components(void)
3038 {
3039 struct vnode *vp;
3040 struct disklabel label;
3041 device_t dv;
3042 deviter_t di;
3043 dev_t dev;
3044 int bmajor, bminor, wedge, rf_part_found;
3045 int error;
3046 int i;
3047 RF_AutoConfig_t *ac_list;
3048 uint64_t numsecs;
3049 unsigned secsize;
3050
3051 /* initialize the AutoConfig list */
3052 ac_list = NULL;
3053
3054 /* we begin by trolling through *all* the devices on the system */
3055
3056 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
3057 dv = deviter_next(&di)) {
3058
3059 /* we are only interested in disks... */
3060 if (device_class(dv) != DV_DISK)
3061 continue;
3062
3063 /* we don't care about floppies... */
3064 if (device_is_a(dv, "fd")) {
3065 continue;
3066 }
3067
3068 /* we don't care about CD's... */
3069 if (device_is_a(dv, "cd")) {
3070 continue;
3071 }
3072
3073 /* we don't care about md's... */
3074 if (device_is_a(dv, "md")) {
3075 continue;
3076 }
3077
3078 /* hdfd is the Atari/Hades floppy driver */
3079 if (device_is_a(dv, "hdfd")) {
3080 continue;
3081 }
3082
3083 /* fdisa is the Atari/Milan floppy driver */
3084 if (device_is_a(dv, "fdisa")) {
3085 continue;
3086 }
3087
3088 /* need to find the device_name_to_block_device_major stuff */
3089 bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
3090
3091 rf_part_found = 0; /*No raid partition as yet*/
3092
3093 /* get a vnode for the raw partition of this disk */
3094
3095 wedge = device_is_a(dv, "dk");
3096 bminor = minor(device_unit(dv));
3097 dev = wedge ? makedev(bmajor, bminor) :
3098 MAKEDISKDEV(bmajor, bminor, RAW_PART);
3099 if (bdevvp(dev, &vp))
3100 panic("RAID can't alloc vnode");
3101
3102 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
3103
3104 if (error) {
3105 /* "Who cares." Continue looking
3106 for something that exists*/
3107 vput(vp);
3108 continue;
3109 }
3110
3111 error = getdisksize(vp, &numsecs, &secsize);
3112 if (error) {
3113 vput(vp);
3114 continue;
3115 }
3116 if (wedge) {
3117 struct dkwedge_info dkw;
3118 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
3119 NOCRED);
3120 if (error) {
3121 printf("RAIDframe: can't get wedge info for "
3122 "dev %s (%d)\n", device_xname(dv), error);
3123 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3124 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3125 vput(vp);
3126 continue;
3127 }
3128
3129 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
3130 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3131 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3132 vput(vp);
3133 continue;
3134 }
3135
3136 ac_list = rf_get_component(ac_list, dev, vp,
3137 device_xname(dv), dkw.dkw_size, numsecs, secsize);
3138 rf_part_found = 1; /*There is a raid component on this disk*/
3139 continue;
3140 }
3141
3142 /* Ok, the disk exists. Go get the disklabel. */
3143 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
3144 if (error) {
3145 /*
3146 * XXX can't happen - open() would
3147 * have errored out (or faked up one)
3148 */
3149 if (error != ENOTTY)
3150 printf("RAIDframe: can't get label for dev "
3151 "%s (%d)\n", device_xname(dv), error);
3152 }
3153
3154 /* don't need this any more. We'll allocate it again
3155 a little later if we really do... */
3156 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3157 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3158 vput(vp);
3159
3160 if (error)
3161 continue;
3162
3163 rf_part_found = 0; /*No raid partitions yet*/
3164 for (i = 0; i < label.d_npartitions; i++) {
3165 char cname[sizeof(ac_list->devname)];
3166
3167 /* We only support partitions marked as RAID */
3168 if (label.d_partitions[i].p_fstype != FS_RAID)
3169 continue;
3170
3171 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
3172 if (bdevvp(dev, &vp))
3173 panic("RAID can't alloc vnode");
3174
3175 error = VOP_OPEN(vp, FREAD, NOCRED);
3176 if (error) {
3177 /* Whatever... */
3178 vput(vp);
3179 continue;
3180 }
3181 snprintf(cname, sizeof(cname), "%s%c",
3182 device_xname(dv), 'a' + i);
3183 ac_list = rf_get_component(ac_list, dev, vp, cname,
3184 label.d_partitions[i].p_size, numsecs, secsize);
3185 rf_part_found = 1; /*There is at least one raid partition on this disk*/
3186 }
3187
3188 /*
3189 *If there is no raid component on this disk, either in a
3190 *disklabel or inside a wedge, check the raw partition as well,
3191 *as it is possible to configure raid components on raw disk
3192 *devices.
3193 */
3194
3195 if (!rf_part_found) {
3196 char cname[sizeof(ac_list->devname)];
3197
3198 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
3199 if (bdevvp(dev, &vp))
3200 panic("RAID can't alloc vnode");
3201
3202 error = VOP_OPEN(vp, FREAD, NOCRED);
3203 if (error) {
3204 /* Whatever... */
3205 vput(vp);
3206 continue;
3207 }
3208 snprintf(cname, sizeof(cname), "%s%c",
3209 device_xname(dv), 'a' + RAW_PART);
3210 ac_list = rf_get_component(ac_list, dev, vp, cname,
3211 label.d_partitions[RAW_PART].p_size, numsecs, secsize);
3212 }
3213 }
3214 deviter_release(&di);
3215 return ac_list;
3216 }
3217
3218
3219 int
3220 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3221 {
3222
3223 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
3224 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
3225 ((clabel->clean == RF_RAID_CLEAN) ||
3226 (clabel->clean == RF_RAID_DIRTY)) &&
3227 clabel->row >=0 &&
3228 clabel->column >= 0 &&
3229 clabel->num_rows > 0 &&
3230 clabel->num_columns > 0 &&
3231 clabel->row < clabel->num_rows &&
3232 clabel->column < clabel->num_columns &&
3233 clabel->blockSize > 0 &&
3234 /*
3235 * numBlocksHi may contain garbage, but it is ok since
3236 * the type is unsigned. If it is really garbage,
3237 * rf_fix_old_label_size() will fix it.
3238 */
3239 rf_component_label_numblocks(clabel) > 0) {
3240 /*
3241 * label looks reasonable enough...
3242 * let's make sure it has no old garbage.
3243 */
3244 if (numsecs)
3245 rf_fix_old_label_size(clabel, numsecs);
3246 return(1);
3247 }
3248 return(0);
3249 }
3250
3251
3252 /*
3253 * For reasons yet unknown, some old component labels have garbage in
3254 * the newer numBlocksHi region, and this causes lossage. Since those
3255 * disks will also have numsecs set to less than 32 bits of sectors,
3256 * we can determine when this corruption has occured, and fix it.
3257 *
3258 * The exact same problem, with the same unknown reason, happens to
3259 * the partitionSizeHi member as well.
3260 */
3261 static void
3262 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3263 {
3264
3265 if (numsecs < ((uint64_t)1 << 32)) {
3266 if (clabel->numBlocksHi) {
3267 printf("WARNING: total sectors < 32 bits, yet "
3268 "numBlocksHi set\n"
3269 "WARNING: resetting numBlocksHi to zero.\n");
3270 clabel->numBlocksHi = 0;
3271 }
3272
3273 if (clabel->partitionSizeHi) {
3274 printf("WARNING: total sectors < 32 bits, yet "
3275 "partitionSizeHi set\n"
3276 "WARNING: resetting partitionSizeHi to zero.\n");
3277 clabel->partitionSizeHi = 0;
3278 }
3279 }
3280 }
3281
3282
3283 #ifdef DEBUG
3284 void
3285 rf_print_component_label(RF_ComponentLabel_t *clabel)
3286 {
3287 uint64_t numBlocks;
3288
3289 numBlocks = rf_component_label_numblocks(clabel);
3290
3291 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3292 clabel->row, clabel->column,
3293 clabel->num_rows, clabel->num_columns);
3294 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3295 clabel->version, clabel->serial_number,
3296 clabel->mod_counter);
3297 printf(" Clean: %s Status: %d\n",
3298 clabel->clean ? "Yes" : "No", clabel->status);
3299 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3300 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3301 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3302 (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3303 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3304 printf(" Contains root partition: %s\n",
3305 clabel->root_partition ? "Yes" : "No");
3306 printf(" Last configured as: raid%d\n", clabel->last_unit);
3307 #if 0
3308 printf(" Config order: %d\n", clabel->config_order);
3309 #endif
3310
3311 }
3312 #endif
3313
3314 RF_ConfigSet_t *
3315 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3316 {
3317 RF_AutoConfig_t *ac;
3318 RF_ConfigSet_t *config_sets;
3319 RF_ConfigSet_t *cset;
3320 RF_AutoConfig_t *ac_next;
3321
3322
3323 config_sets = NULL;
3324
3325 /* Go through the AutoConfig list, and figure out which components
3326 belong to what sets. */
3327 ac = ac_list;
3328 while(ac!=NULL) {
3329 /* we're going to putz with ac->next, so save it here
3330 for use at the end of the loop */
3331 ac_next = ac->next;
3332
3333 if (config_sets == NULL) {
3334 /* will need at least this one... */
3335 config_sets = (RF_ConfigSet_t *)
3336 malloc(sizeof(RF_ConfigSet_t),
3337 M_RAIDFRAME, M_NOWAIT);
3338 if (config_sets == NULL) {
3339 panic("rf_create_auto_sets: No memory!");
3340 }
3341 /* this one is easy :) */
3342 config_sets->ac = ac;
3343 config_sets->next = NULL;
3344 config_sets->rootable = 0;
3345 ac->next = NULL;
3346 } else {
3347 /* which set does this component fit into? */
3348 cset = config_sets;
3349 while(cset!=NULL) {
3350 if (rf_does_it_fit(cset, ac)) {
3351 /* looks like it matches... */
3352 ac->next = cset->ac;
3353 cset->ac = ac;
3354 break;
3355 }
3356 cset = cset->next;
3357 }
3358 if (cset==NULL) {
3359 /* didn't find a match above... new set..*/
3360 cset = (RF_ConfigSet_t *)
3361 malloc(sizeof(RF_ConfigSet_t),
3362 M_RAIDFRAME, M_NOWAIT);
3363 if (cset == NULL) {
3364 panic("rf_create_auto_sets: No memory!");
3365 }
3366 cset->ac = ac;
3367 ac->next = NULL;
3368 cset->next = config_sets;
3369 cset->rootable = 0;
3370 config_sets = cset;
3371 }
3372 }
3373 ac = ac_next;
3374 }
3375
3376
3377 return(config_sets);
3378 }
3379
3380 static int
3381 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3382 {
3383 RF_ComponentLabel_t *clabel1, *clabel2;
3384
3385 /* If this one matches the *first* one in the set, that's good
3386 enough, since the other members of the set would have been
3387 through here too... */
3388 /* note that we are not checking partitionSize here..
3389
3390 Note that we are also not checking the mod_counters here.
3391 If everything else matches execpt the mod_counter, that's
3392 good enough for this test. We will deal with the mod_counters
3393 a little later in the autoconfiguration process.
3394
3395 (clabel1->mod_counter == clabel2->mod_counter) &&
3396
3397 The reason we don't check for this is that failed disks
3398 will have lower modification counts. If those disks are
3399 not added to the set they used to belong to, then they will
3400 form their own set, which may result in 2 different sets,
3401 for example, competing to be configured at raid0, and
3402 perhaps competing to be the root filesystem set. If the
3403 wrong ones get configured, or both attempt to become /,
3404 weird behaviour and or serious lossage will occur. Thus we
3405 need to bring them into the fold here, and kick them out at
3406 a later point.
3407
3408 */
3409
3410 clabel1 = cset->ac->clabel;
3411 clabel2 = ac->clabel;
3412 if ((clabel1->version == clabel2->version) &&
3413 (clabel1->serial_number == clabel2->serial_number) &&
3414 (clabel1->num_rows == clabel2->num_rows) &&
3415 (clabel1->num_columns == clabel2->num_columns) &&
3416 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3417 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3418 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3419 (clabel1->parityConfig == clabel2->parityConfig) &&
3420 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3421 (clabel1->blockSize == clabel2->blockSize) &&
3422 rf_component_label_numblocks(clabel1) ==
3423 rf_component_label_numblocks(clabel2) &&
3424 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3425 (clabel1->root_partition == clabel2->root_partition) &&
3426 (clabel1->last_unit == clabel2->last_unit) &&
3427 (clabel1->config_order == clabel2->config_order)) {
3428 /* if it get's here, it almost *has* to be a match */
3429 } else {
3430 /* it's not consistent with somebody in the set..
3431 punt */
3432 return(0);
3433 }
3434 /* all was fine.. it must fit... */
3435 return(1);
3436 }
3437
3438 int
3439 rf_have_enough_components(RF_ConfigSet_t *cset)
3440 {
3441 RF_AutoConfig_t *ac;
3442 RF_AutoConfig_t *auto_config;
3443 RF_ComponentLabel_t *clabel;
3444 int c;
3445 int num_cols;
3446 int num_missing;
3447 int mod_counter;
3448 int mod_counter_found;
3449 int even_pair_failed;
3450 char parity_type;
3451
3452
3453 /* check to see that we have enough 'live' components
3454 of this set. If so, we can configure it if necessary */
3455
3456 num_cols = cset->ac->clabel->num_columns;
3457 parity_type = cset->ac->clabel->parityConfig;
3458
3459 /* XXX Check for duplicate components!?!?!? */
3460
3461 /* Determine what the mod_counter is supposed to be for this set. */
3462
3463 mod_counter_found = 0;
3464 mod_counter = 0;
3465 ac = cset->ac;
3466 while(ac!=NULL) {
3467 if (mod_counter_found==0) {
3468 mod_counter = ac->clabel->mod_counter;
3469 mod_counter_found = 1;
3470 } else {
3471 if (ac->clabel->mod_counter > mod_counter) {
3472 mod_counter = ac->clabel->mod_counter;
3473 }
3474 }
3475 ac = ac->next;
3476 }
3477
3478 num_missing = 0;
3479 auto_config = cset->ac;
3480
3481 even_pair_failed = 0;
3482 for(c=0; c<num_cols; c++) {
3483 ac = auto_config;
3484 while(ac!=NULL) {
3485 if ((ac->clabel->column == c) &&
3486 (ac->clabel->mod_counter == mod_counter)) {
3487 /* it's this one... */
3488 #ifdef DEBUG
3489 printf("Found: %s at %d\n",
3490 ac->devname,c);
3491 #endif
3492 break;
3493 }
3494 ac=ac->next;
3495 }
3496 if (ac==NULL) {
3497 /* Didn't find one here! */
3498 /* special case for RAID 1, especially
3499 where there are more than 2
3500 components (where RAIDframe treats
3501 things a little differently :( ) */
3502 if (parity_type == '1') {
3503 if (c%2 == 0) { /* even component */
3504 even_pair_failed = 1;
3505 } else { /* odd component. If
3506 we're failed, and
3507 so is the even
3508 component, it's
3509 "Good Night, Charlie" */
3510 if (even_pair_failed == 1) {
3511 return(0);
3512 }
3513 }
3514 } else {
3515 /* normal accounting */
3516 num_missing++;
3517 }
3518 }
3519 if ((parity_type == '1') && (c%2 == 1)) {
3520 /* Just did an even component, and we didn't
3521 bail.. reset the even_pair_failed flag,
3522 and go on to the next component.... */
3523 even_pair_failed = 0;
3524 }
3525 }
3526
3527 clabel = cset->ac->clabel;
3528
3529 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3530 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3531 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3532 /* XXX this needs to be made *much* more general */
3533 /* Too many failures */
3534 return(0);
3535 }
3536 /* otherwise, all is well, and we've got enough to take a kick
3537 at autoconfiguring this set */
3538 return(1);
3539 }
3540
3541 void
3542 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3543 RF_Raid_t *raidPtr)
3544 {
3545 RF_ComponentLabel_t *clabel;
3546 int i;
3547
3548 clabel = ac->clabel;
3549
3550 /* 1. Fill in the common stuff */
3551 config->numRow = clabel->num_rows = 1;
3552 config->numCol = clabel->num_columns;
3553 config->numSpare = 0; /* XXX should this be set here? */
3554 config->sectPerSU = clabel->sectPerSU;
3555 config->SUsPerPU = clabel->SUsPerPU;
3556 config->SUsPerRU = clabel->SUsPerRU;
3557 config->parityConfig = clabel->parityConfig;
3558 /* XXX... */
3559 strcpy(config->diskQueueType,"fifo");
3560 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3561 config->layoutSpecificSize = 0; /* XXX ?? */
3562
3563 while(ac!=NULL) {
3564 /* row/col values will be in range due to the checks
3565 in reasonable_label() */
3566 strcpy(config->devnames[0][ac->clabel->column],
3567 ac->devname);
3568 ac = ac->next;
3569 }
3570
3571 for(i=0;i<RF_MAXDBGV;i++) {
3572 config->debugVars[i][0] = 0;
3573 }
3574 }
3575
3576 int
3577 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3578 {
3579 RF_ComponentLabel_t *clabel;
3580 int column;
3581 int sparecol;
3582
3583 raidPtr->autoconfigure = new_value;
3584
3585 for(column=0; column<raidPtr->numCol; column++) {
3586 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3587 clabel = raidget_component_label(raidPtr, column);
3588 clabel->autoconfigure = new_value;
3589 raidflush_component_label(raidPtr, column);
3590 }
3591 }
3592 for(column = 0; column < raidPtr->numSpare ; column++) {
3593 sparecol = raidPtr->numCol + column;
3594 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3595 clabel = raidget_component_label(raidPtr, sparecol);
3596 clabel->autoconfigure = new_value;
3597 raidflush_component_label(raidPtr, sparecol);
3598 }
3599 }
3600 return(new_value);
3601 }
3602
3603 int
3604 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3605 {
3606 RF_ComponentLabel_t *clabel;
3607 int column;
3608 int sparecol;
3609
3610 raidPtr->root_partition = new_value;
3611 for(column=0; column<raidPtr->numCol; column++) {
3612 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3613 clabel = raidget_component_label(raidPtr, column);
3614 clabel->root_partition = new_value;
3615 raidflush_component_label(raidPtr, column);
3616 }
3617 }
3618 for(column = 0; column < raidPtr->numSpare ; column++) {
3619 sparecol = raidPtr->numCol + column;
3620 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3621 clabel = raidget_component_label(raidPtr, sparecol);
3622 clabel->root_partition = new_value;
3623 raidflush_component_label(raidPtr, sparecol);
3624 }
3625 }
3626 return(new_value);
3627 }
3628
3629 void
3630 rf_release_all_vps(RF_ConfigSet_t *cset)
3631 {
3632 RF_AutoConfig_t *ac;
3633
3634 ac = cset->ac;
3635 while(ac!=NULL) {
3636 /* Close the vp, and give it back */
3637 if (ac->vp) {
3638 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3639 VOP_CLOSE(ac->vp, FREAD, NOCRED);
3640 vput(ac->vp);
3641 ac->vp = NULL;
3642 }
3643 ac = ac->next;
3644 }
3645 }
3646
3647
3648 void
3649 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3650 {
3651 RF_AutoConfig_t *ac;
3652 RF_AutoConfig_t *next_ac;
3653
3654 ac = cset->ac;
3655 while(ac!=NULL) {
3656 next_ac = ac->next;
3657 /* nuke the label */
3658 free(ac->clabel, M_RAIDFRAME);
3659 /* cleanup the config structure */
3660 free(ac, M_RAIDFRAME);
3661 /* "next.." */
3662 ac = next_ac;
3663 }
3664 /* and, finally, nuke the config set */
3665 free(cset, M_RAIDFRAME);
3666 }
3667
3668
3669 void
3670 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3671 {
3672 /* current version number */
3673 clabel->version = RF_COMPONENT_LABEL_VERSION;
3674 clabel->serial_number = raidPtr->serial_number;
3675 clabel->mod_counter = raidPtr->mod_counter;
3676
3677 clabel->num_rows = 1;
3678 clabel->num_columns = raidPtr->numCol;
3679 clabel->clean = RF_RAID_DIRTY; /* not clean */
3680 clabel->status = rf_ds_optimal; /* "It's good!" */
3681
3682 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3683 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3684 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3685
3686 clabel->blockSize = raidPtr->bytesPerSector;
3687 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3688
3689 /* XXX not portable */
3690 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3691 clabel->maxOutstanding = raidPtr->maxOutstanding;
3692 clabel->autoconfigure = raidPtr->autoconfigure;
3693 clabel->root_partition = raidPtr->root_partition;
3694 clabel->last_unit = raidPtr->raidid;
3695 clabel->config_order = raidPtr->config_order;
3696
3697 #ifndef RF_NO_PARITY_MAP
3698 rf_paritymap_init_label(raidPtr->parity_map, clabel);
3699 #endif
3700 }
3701
3702 int
3703 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
3704 {
3705 RF_Raid_t *raidPtr;
3706 RF_Config_t *config;
3707 int raidID;
3708 int retcode;
3709
3710 #ifdef DEBUG
3711 printf("RAID autoconfigure\n");
3712 #endif
3713
3714 retcode = 0;
3715 *unit = -1;
3716
3717 /* 1. Create a config structure */
3718
3719 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3720 M_RAIDFRAME,
3721 M_NOWAIT);
3722 if (config==NULL) {
3723 printf("Out of mem!?!?\n");
3724 /* XXX do something more intelligent here. */
3725 return(1);
3726 }
3727
3728 memset(config, 0, sizeof(RF_Config_t));
3729
3730 /*
3731 2. Figure out what RAID ID this one is supposed to live at
3732 See if we can get the same RAID dev that it was configured
3733 on last time..
3734 */
3735
3736 raidID = cset->ac->clabel->last_unit;
3737 if ((raidID < 0) || (raidID >= numraid)) {
3738 /* let's not wander off into lala land. */
3739 raidID = numraid - 1;
3740 }
3741 if (raidPtrs[raidID]->valid != 0) {
3742
3743 /*
3744 Nope... Go looking for an alternative...
3745 Start high so we don't immediately use raid0 if that's
3746 not taken.
3747 */
3748
3749 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3750 if (raidPtrs[raidID]->valid == 0) {
3751 /* can use this one! */
3752 break;
3753 }
3754 }
3755 }
3756
3757 if (raidID < 0) {
3758 /* punt... */
3759 printf("Unable to auto configure this set!\n");
3760 printf("(Out of RAID devs!)\n");
3761 free(config, M_RAIDFRAME);
3762 return(1);
3763 }
3764
3765 #ifdef DEBUG
3766 printf("Configuring raid%d:\n",raidID);
3767 #endif
3768
3769 raidPtr = raidPtrs[raidID];
3770
3771 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3772 raidPtr->raidid = raidID;
3773 raidPtr->openings = RAIDOUTSTANDING;
3774
3775 /* 3. Build the configuration structure */
3776 rf_create_configuration(cset->ac, config, raidPtr);
3777
3778 /* 4. Do the configuration */
3779 retcode = rf_Configure(raidPtr, config, cset->ac);
3780
3781 if (retcode == 0) {
3782
3783 raidinit(raidPtrs[raidID]);
3784
3785 rf_markalldirty(raidPtrs[raidID]);
3786 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3787 if (cset->ac->clabel->root_partition==1) {
3788 /* everything configured just fine. Make a note
3789 that this set is eligible to be root. */
3790 cset->rootable = 1;
3791 /* XXX do this here? */
3792 raidPtrs[raidID]->root_partition = 1;
3793 }
3794 }
3795
3796 /* 5. Cleanup */
3797 free(config, M_RAIDFRAME);
3798
3799 *unit = raidID;
3800 return(retcode);
3801 }
3802
3803 void
3804 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3805 {
3806 struct buf *bp;
3807
3808 bp = (struct buf *)desc->bp;
3809 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3810 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
3811 }
3812
3813 void
3814 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3815 size_t xmin, size_t xmax)
3816 {
3817 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3818 pool_sethiwat(p, xmax);
3819 pool_prime(p, xmin);
3820 pool_setlowat(p, xmin);
3821 }
3822
3823 /*
3824 * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see
3825 * if there is IO pending and if that IO could possibly be done for a
3826 * given RAID set. Returns 0 if IO is waiting and can be done, 1
3827 * otherwise.
3828 *
3829 */
3830
3831 int
3832 rf_buf_queue_check(int raidid)
3833 {
3834 if ((bufq_peek(raid_softc[raidid].buf_queue) != NULL) &&
3835 raidPtrs[raidid]->openings > 0) {
3836 /* there is work to do */
3837 return 0;
3838 }
3839 /* default is nothing to do */
3840 return 1;
3841 }
3842
3843 int
3844 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3845 {
3846 uint64_t numsecs;
3847 unsigned secsize;
3848 int error;
3849
3850 error = getdisksize(vp, &numsecs, &secsize);
3851 if (error == 0) {
3852 diskPtr->blockSize = secsize;
3853 diskPtr->numBlocks = numsecs - rf_protectedSectors;
3854 diskPtr->partitionSize = numsecs;
3855 return 0;
3856 }
3857 return error;
3858 }
3859
3860 static int
3861 raid_match(device_t self, cfdata_t cfdata, void *aux)
3862 {
3863 return 1;
3864 }
3865
3866 static void
3867 raid_attach(device_t parent, device_t self, void *aux)
3868 {
3869
3870 }
3871
3872
3873 static int
3874 raid_detach(device_t self, int flags)
3875 {
3876 int error;
3877 struct raid_softc *rs = &raid_softc[device_unit(self)];
3878
3879 if ((error = raidlock(rs)) != 0)
3880 return (error);
3881
3882 error = raid_detach_unlocked(rs);
3883
3884 raidunlock(rs);
3885
3886 return error;
3887 }
3888
3889 static void
3890 rf_set_properties(struct raid_softc *rs, RF_Raid_t *raidPtr)
3891 {
3892 prop_dictionary_t disk_info, odisk_info, geom;
3893 disk_info = prop_dictionary_create();
3894 geom = prop_dictionary_create();
3895 prop_dictionary_set_uint64(geom, "sectors-per-unit",
3896 raidPtr->totalSectors);
3897 prop_dictionary_set_uint32(geom, "sector-size",
3898 raidPtr->bytesPerSector);
3899
3900 prop_dictionary_set_uint16(geom, "sectors-per-track",
3901 raidPtr->Layout.dataSectorsPerStripe);
3902 prop_dictionary_set_uint16(geom, "tracks-per-cylinder",
3903 4 * raidPtr->numCol);
3904
3905 prop_dictionary_set_uint64(geom, "cylinders-per-unit",
3906 raidPtr->totalSectors / (raidPtr->Layout.dataSectorsPerStripe *
3907 (4 * raidPtr->numCol)));
3908
3909 prop_dictionary_set(disk_info, "geometry", geom);
3910 prop_object_release(geom);
3911 prop_dictionary_set(device_properties(rs->sc_dev),
3912 "disk-info", disk_info);
3913 odisk_info = rs->sc_dkdev.dk_info;
3914 rs->sc_dkdev.dk_info = disk_info;
3915 if (odisk_info)
3916 prop_object_release(odisk_info);
3917 }
3918
3919 /*
3920 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3921 * We end up returning whatever error was returned by the first cache flush
3922 * that fails.
3923 */
3924
3925 int
3926 rf_sync_component_caches(RF_Raid_t *raidPtr)
3927 {
3928 int c, sparecol;
3929 int e,error;
3930 int force = 1;
3931
3932 error = 0;
3933 for (c = 0; c < raidPtr->numCol; c++) {
3934 if (raidPtr->Disks[c].status == rf_ds_optimal) {
3935 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3936 &force, FWRITE, NOCRED);
3937 if (e) {
3938 if (e != ENODEV)
3939 printf("raid%d: cache flush to component %s failed.\n",
3940 raidPtr->raidid, raidPtr->Disks[c].devname);
3941 if (error == 0) {
3942 error = e;
3943 }
3944 }
3945 }
3946 }
3947
3948 for( c = 0; c < raidPtr->numSpare ; c++) {
3949 sparecol = raidPtr->numCol + c;
3950 /* Need to ensure that the reconstruct actually completed! */
3951 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3952 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3953 DIOCCACHESYNC, &force, FWRITE, NOCRED);
3954 if (e) {
3955 if (e != ENODEV)
3956 printf("raid%d: cache flush to component %s failed.\n",
3957 raidPtr->raidid, raidPtr->Disks[sparecol].devname);
3958 if (error == 0) {
3959 error = e;
3960 }
3961 }
3962 }
3963 }
3964 return error;
3965 }
3966