rf_netbsdkintf.c revision 1.222 1 /* $NetBSD: rf_netbsdkintf.c,v 1.222 2006/11/16 01:33:23 christos Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1990, 1993
40 * The Regents of the University of California. All rights reserved.
41 *
42 * This code is derived from software contributed to Berkeley by
43 * the Systems Programming Group of the University of Utah Computer
44 * Science Department.
45 *
46 * Redistribution and use in source and binary forms, with or without
47 * modification, are permitted provided that the following conditions
48 * are met:
49 * 1. Redistributions of source code must retain the above copyright
50 * notice, this list of conditions and the following disclaimer.
51 * 2. Redistributions in binary form must reproduce the above copyright
52 * notice, this list of conditions and the following disclaimer in the
53 * documentation and/or other materials provided with the distribution.
54 * 3. Neither the name of the University nor the names of its contributors
55 * may be used to endorse or promote products derived from this software
56 * without specific prior written permission.
57 *
58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE.
69 *
70 * from: Utah $Hdr: cd.c 1.6 90/11/28$
71 *
72 * @(#)cd.c 8.2 (Berkeley) 11/16/93
73 */
74
75 /*
76 * Copyright (c) 1988 University of Utah.
77 *
78 * This code is derived from software contributed to Berkeley by
79 * the Systems Programming Group of the University of Utah Computer
80 * Science Department.
81 *
82 * Redistribution and use in source and binary forms, with or without
83 * modification, are permitted provided that the following conditions
84 * are met:
85 * 1. Redistributions of source code must retain the above copyright
86 * notice, this list of conditions and the following disclaimer.
87 * 2. Redistributions in binary form must reproduce the above copyright
88 * notice, this list of conditions and the following disclaimer in the
89 * documentation and/or other materials provided with the distribution.
90 * 3. All advertising materials mentioning features or use of this software
91 * must display the following acknowledgement:
92 * This product includes software developed by the University of
93 * California, Berkeley and its contributors.
94 * 4. Neither the name of the University nor the names of its contributors
95 * may be used to endorse or promote products derived from this software
96 * without specific prior written permission.
97 *
98 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
99 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
100 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
101 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
102 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
103 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
104 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
105 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
106 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
107 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
108 * SUCH DAMAGE.
109 *
110 * from: Utah $Hdr: cd.c 1.6 90/11/28$
111 *
112 * @(#)cd.c 8.2 (Berkeley) 11/16/93
113 */
114
115 /*
116 * Copyright (c) 1995 Carnegie-Mellon University.
117 * All rights reserved.
118 *
119 * Authors: Mark Holland, Jim Zelenka
120 *
121 * Permission to use, copy, modify and distribute this software and
122 * its documentation is hereby granted, provided that both the copyright
123 * notice and this permission notice appear in all copies of the
124 * software, derivative works or modified versions, and any portions
125 * thereof, and that both notices appear in supporting documentation.
126 *
127 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
128 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
129 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
130 *
131 * Carnegie Mellon requests users of this software to return to
132 *
133 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
134 * School of Computer Science
135 * Carnegie Mellon University
136 * Pittsburgh PA 15213-3890
137 *
138 * any improvements or extensions that they make and grant Carnegie the
139 * rights to redistribute these changes.
140 */
141
142 /***********************************************************
143 *
144 * rf_kintf.c -- the kernel interface routines for RAIDframe
145 *
146 ***********************************************************/
147
148 #include <sys/cdefs.h>
149 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.222 2006/11/16 01:33:23 christos Exp $");
150
151 #include <sys/param.h>
152 #include <sys/errno.h>
153 #include <sys/pool.h>
154 #include <sys/proc.h>
155 #include <sys/queue.h>
156 #include <sys/disk.h>
157 #include <sys/device.h>
158 #include <sys/stat.h>
159 #include <sys/ioctl.h>
160 #include <sys/fcntl.h>
161 #include <sys/systm.h>
162 #include <sys/namei.h>
163 #include <sys/vnode.h>
164 #include <sys/disklabel.h>
165 #include <sys/conf.h>
166 #include <sys/lock.h>
167 #include <sys/buf.h>
168 #include <sys/bufq.h>
169 #include <sys/user.h>
170 #include <sys/reboot.h>
171 #include <sys/kauth.h>
172
173 #include <dev/raidframe/raidframevar.h>
174 #include <dev/raidframe/raidframeio.h>
175 #include "raid.h"
176 #include "opt_raid_autoconfig.h"
177 #include "rf_raid.h"
178 #include "rf_copyback.h"
179 #include "rf_dag.h"
180 #include "rf_dagflags.h"
181 #include "rf_desc.h"
182 #include "rf_diskqueue.h"
183 #include "rf_etimer.h"
184 #include "rf_general.h"
185 #include "rf_kintf.h"
186 #include "rf_options.h"
187 #include "rf_driver.h"
188 #include "rf_parityscan.h"
189 #include "rf_threadstuff.h"
190
191 #ifdef DEBUG
192 int rf_kdebug_level = 0;
193 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
194 #else /* DEBUG */
195 #define db1_printf(a) { }
196 #endif /* DEBUG */
197
198 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
199
200 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
201
202 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
203 * spare table */
204 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
205 * installation process */
206
207 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
208
209 /* prototypes */
210 static void KernelWakeupFunc(struct buf *);
211 static void InitBP(struct buf *, struct vnode *, unsigned,
212 dev_t, RF_SectorNum_t, RF_SectorCount_t, caddr_t, void (*) (struct buf *),
213 void *, int, struct proc *);
214 static void raidinit(RF_Raid_t *);
215
216 void raidattach(int);
217 static int raid_match(struct device *, struct cfdata *, void *);
218 static void raid_attach(struct device *, struct device *, void *);
219 static int raid_detach(struct device *, int);
220
221 dev_type_open(raidopen);
222 dev_type_close(raidclose);
223 dev_type_read(raidread);
224 dev_type_write(raidwrite);
225 dev_type_ioctl(raidioctl);
226 dev_type_strategy(raidstrategy);
227 dev_type_dump(raiddump);
228 dev_type_size(raidsize);
229
230 const struct bdevsw raid_bdevsw = {
231 raidopen, raidclose, raidstrategy, raidioctl,
232 raiddump, raidsize, D_DISK
233 };
234
235 const struct cdevsw raid_cdevsw = {
236 raidopen, raidclose, raidread, raidwrite, raidioctl,
237 nostop, notty, nopoll, nommap, nokqfilter, D_DISK
238 };
239
240 /* XXX Not sure if the following should be replacing the raidPtrs above,
241 or if it should be used in conjunction with that...
242 */
243
244 struct raid_softc {
245 struct device *sc_dev;
246 int sc_flags; /* flags */
247 int sc_cflags; /* configuration flags */
248 uint64_t sc_size; /* size of the raid device */
249 char sc_xname[20]; /* XXX external name */
250 struct disk sc_dkdev; /* generic disk device info */
251 struct bufq_state *buf_queue; /* used for the device queue */
252 };
253 /* sc_flags */
254 #define RAIDF_INITED 0x01 /* unit has been initialized */
255 #define RAIDF_WLABEL 0x02 /* label area is writable */
256 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
257 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
258 #define RAIDF_LOCKED 0x80 /* unit is locked */
259
260 #define raidunit(x) DISKUNIT(x)
261 int numraid = 0;
262
263 extern struct cfdriver raid_cd;
264 CFATTACH_DECL(raid, sizeof(struct raid_softc),
265 raid_match, raid_attach, raid_detach, NULL);
266
267 /*
268 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
269 * Be aware that large numbers can allow the driver to consume a lot of
270 * kernel memory, especially on writes, and in degraded mode reads.
271 *
272 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
273 * a single 64K write will typically require 64K for the old data,
274 * 64K for the old parity, and 64K for the new parity, for a total
275 * of 192K (if the parity buffer is not re-used immediately).
276 * Even it if is used immediately, that's still 128K, which when multiplied
277 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
278 *
279 * Now in degraded mode, for example, a 64K read on the above setup may
280 * require data reconstruction, which will require *all* of the 4 remaining
281 * disks to participate -- 4 * 32K/disk == 128K again.
282 */
283
284 #ifndef RAIDOUTSTANDING
285 #define RAIDOUTSTANDING 6
286 #endif
287
288 #define RAIDLABELDEV(dev) \
289 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
290
291 /* declared here, and made public, for the benefit of KVM stuff.. */
292 struct raid_softc *raid_softc;
293
294 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
295 struct disklabel *);
296 static void raidgetdisklabel(dev_t);
297 static void raidmakedisklabel(struct raid_softc *);
298
299 static int raidlock(struct raid_softc *);
300 static void raidunlock(struct raid_softc *);
301
302 static void rf_markalldirty(RF_Raid_t *);
303
304 void rf_ReconThread(struct rf_recon_req *);
305 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
306 void rf_CopybackThread(RF_Raid_t *raidPtr);
307 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
308 int rf_autoconfig(struct device *self);
309 void rf_buildroothack(RF_ConfigSet_t *);
310
311 RF_AutoConfig_t *rf_find_raid_components(void);
312 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
313 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
314 static int rf_reasonable_label(RF_ComponentLabel_t *);
315 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
316 int rf_set_autoconfig(RF_Raid_t *, int);
317 int rf_set_rootpartition(RF_Raid_t *, int);
318 void rf_release_all_vps(RF_ConfigSet_t *);
319 void rf_cleanup_config_set(RF_ConfigSet_t *);
320 int rf_have_enough_components(RF_ConfigSet_t *);
321 int rf_auto_config_set(RF_ConfigSet_t *, int *);
322
323 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
324 allow autoconfig to take place.
325 Note that this is overridden by having
326 RAID_AUTOCONFIG as an option in the
327 kernel config file. */
328
329 struct RF_Pools_s rf_pools;
330
331 void
332 raidattach(int num)
333 {
334 int raidID;
335 int i, rc;
336
337 #ifdef DEBUG
338 printf("raidattach: Asked for %d units\n", num);
339 #endif
340
341 if (num <= 0) {
342 #ifdef DIAGNOSTIC
343 panic("raidattach: count <= 0");
344 #endif
345 return;
346 }
347 /* This is where all the initialization stuff gets done. */
348
349 numraid = num;
350
351 /* Make some space for requested number of units... */
352
353 RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
354 if (raidPtrs == NULL) {
355 panic("raidPtrs is NULL!!");
356 }
357
358 rf_mutex_init(&rf_sparet_wait_mutex);
359
360 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
361
362 for (i = 0; i < num; i++)
363 raidPtrs[i] = NULL;
364 rc = rf_BootRaidframe();
365 if (rc == 0)
366 printf("Kernelized RAIDframe activated\n");
367 else
368 panic("Serious error booting RAID!!");
369
370 /* put together some datastructures like the CCD device does.. This
371 * lets us lock the device and what-not when it gets opened. */
372
373 raid_softc = (struct raid_softc *)
374 malloc(num * sizeof(struct raid_softc),
375 M_RAIDFRAME, M_NOWAIT);
376 if (raid_softc == NULL) {
377 printf("WARNING: no memory for RAIDframe driver\n");
378 return;
379 }
380
381 memset(raid_softc, 0, num * sizeof(struct raid_softc));
382
383 for (raidID = 0; raidID < num; raidID++) {
384 bufq_alloc(&raid_softc[raidID].buf_queue, "fcfs", 0);
385
386 RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
387 (RF_Raid_t *));
388 if (raidPtrs[raidID] == NULL) {
389 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
390 numraid = raidID;
391 return;
392 }
393 }
394
395 if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) {
396 printf("config_cfattach_attach failed?\n");
397 }
398
399 #ifdef RAID_AUTOCONFIG
400 raidautoconfig = 1;
401 #endif
402
403 /*
404 * Register a finalizer which will be used to auto-config RAID
405 * sets once all real hardware devices have been found.
406 */
407 if (config_finalize_register(NULL, rf_autoconfig) != 0)
408 printf("WARNING: unable to register RAIDframe finalizer\n");
409 }
410
411 int
412 rf_autoconfig(struct device *self)
413 {
414 RF_AutoConfig_t *ac_list;
415 RF_ConfigSet_t *config_sets;
416 int i;
417
418 if (raidautoconfig == 0)
419 return (0);
420
421 /* XXX This code can only be run once. */
422 raidautoconfig = 0;
423
424 /* 1. locate all RAID components on the system */
425 #ifdef DEBUG
426 printf("Searching for RAID components...\n");
427 #endif
428 ac_list = rf_find_raid_components();
429
430 /* 2. Sort them into their respective sets. */
431 config_sets = rf_create_auto_sets(ac_list);
432
433 /*
434 * 3. Evaluate each set andconfigure the valid ones.
435 * This gets done in rf_buildroothack().
436 */
437 rf_buildroothack(config_sets);
438
439 for (i = 0; i < numraid; i++)
440 if (raidPtrs[i] != NULL && raidPtrs[i]->valid)
441 dkwedge_discover(&raid_softc[i].sc_dkdev);
442
443 return 1;
444 }
445
446 void
447 rf_buildroothack(RF_ConfigSet_t *config_sets)
448 {
449 RF_ConfigSet_t *cset;
450 RF_ConfigSet_t *next_cset;
451 int retcode;
452 int raidID;
453 int rootID;
454 int num_root;
455
456 rootID = 0;
457 num_root = 0;
458 cset = config_sets;
459 while(cset != NULL ) {
460 next_cset = cset->next;
461 if (rf_have_enough_components(cset) &&
462 cset->ac->clabel->autoconfigure==1) {
463 retcode = rf_auto_config_set(cset,&raidID);
464 if (!retcode) {
465 #ifdef DEBUG
466 printf("raid%d: configured ok\n", raidID);
467 #endif
468 if (cset->rootable) {
469 rootID = raidID;
470 num_root++;
471 }
472 } else {
473 /* The autoconfig didn't work :( */
474 #if DEBUG
475 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
476 #endif
477 rf_release_all_vps(cset);
478 }
479 } else {
480 #ifdef DEBUG
481 printf("raid%d: not enough components\n", raidID);
482 #endif
483 /* we're not autoconfiguring this set...
484 release the associated resources */
485 rf_release_all_vps(cset);
486 }
487 /* cleanup */
488 rf_cleanup_config_set(cset);
489 cset = next_cset;
490 }
491
492 /* we found something bootable... */
493
494 if (num_root == 1) {
495 booted_device = raid_softc[rootID].sc_dev;
496 } else if (num_root > 1) {
497 /* we can't guess.. require the user to answer... */
498 boothowto |= RB_ASKNAME;
499 }
500 }
501
502
503 int
504 raidsize(dev_t dev)
505 {
506 struct raid_softc *rs;
507 struct disklabel *lp;
508 int part, unit, omask, size;
509
510 unit = raidunit(dev);
511 if (unit >= numraid)
512 return (-1);
513 rs = &raid_softc[unit];
514
515 if ((rs->sc_flags & RAIDF_INITED) == 0)
516 return (-1);
517
518 part = DISKPART(dev);
519 omask = rs->sc_dkdev.dk_openmask & (1 << part);
520 lp = rs->sc_dkdev.dk_label;
521
522 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
523 return (-1);
524
525 if (lp->d_partitions[part].p_fstype != FS_SWAP)
526 size = -1;
527 else
528 size = lp->d_partitions[part].p_size *
529 (lp->d_secsize / DEV_BSIZE);
530
531 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
532 return (-1);
533
534 return (size);
535
536 }
537
538 int
539 raiddump(dev_t dev, daddr_t blkno, caddr_t va,
540 size_t size)
541 {
542 /* Not implemented. */
543 return ENXIO;
544 }
545 /* ARGSUSED */
546 int
547 raidopen(dev_t dev, int flags, int fmt,
548 struct lwp *l)
549 {
550 int unit = raidunit(dev);
551 struct raid_softc *rs;
552 struct disklabel *lp;
553 int part, pmask;
554 int error = 0;
555
556 if (unit >= numraid)
557 return (ENXIO);
558 rs = &raid_softc[unit];
559
560 if ((error = raidlock(rs)) != 0)
561 return (error);
562 lp = rs->sc_dkdev.dk_label;
563
564 part = DISKPART(dev);
565
566 /*
567 * If there are wedges, and this is not RAW_PART, then we
568 * need to fail.
569 */
570 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
571 error = EBUSY;
572 goto bad;
573 }
574 pmask = (1 << part);
575
576 if ((rs->sc_flags & RAIDF_INITED) &&
577 (rs->sc_dkdev.dk_openmask == 0))
578 raidgetdisklabel(dev);
579
580 /* make sure that this partition exists */
581
582 if (part != RAW_PART) {
583 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
584 ((part >= lp->d_npartitions) ||
585 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
586 error = ENXIO;
587 goto bad;
588 }
589 }
590 /* Prevent this unit from being unconfigured while open. */
591 switch (fmt) {
592 case S_IFCHR:
593 rs->sc_dkdev.dk_copenmask |= pmask;
594 break;
595
596 case S_IFBLK:
597 rs->sc_dkdev.dk_bopenmask |= pmask;
598 break;
599 }
600
601 if ((rs->sc_dkdev.dk_openmask == 0) &&
602 ((rs->sc_flags & RAIDF_INITED) != 0)) {
603 /* First one... mark things as dirty... Note that we *MUST*
604 have done a configure before this. I DO NOT WANT TO BE
605 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
606 THAT THEY BELONG TOGETHER!!!!! */
607 /* XXX should check to see if we're only open for reading
608 here... If so, we needn't do this, but then need some
609 other way of keeping track of what's happened.. */
610
611 rf_markalldirty( raidPtrs[unit] );
612 }
613
614
615 rs->sc_dkdev.dk_openmask =
616 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
617
618 bad:
619 raidunlock(rs);
620
621 return (error);
622
623
624 }
625 /* ARGSUSED */
626 int
627 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
628 {
629 int unit = raidunit(dev);
630 struct cfdata *cf;
631 struct raid_softc *rs;
632 int error = 0;
633 int part;
634
635 if (unit >= numraid)
636 return (ENXIO);
637 rs = &raid_softc[unit];
638
639 if ((error = raidlock(rs)) != 0)
640 return (error);
641
642 part = DISKPART(dev);
643
644 /* ...that much closer to allowing unconfiguration... */
645 switch (fmt) {
646 case S_IFCHR:
647 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
648 break;
649
650 case S_IFBLK:
651 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
652 break;
653 }
654 rs->sc_dkdev.dk_openmask =
655 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
656
657 if ((rs->sc_dkdev.dk_openmask == 0) &&
658 ((rs->sc_flags & RAIDF_INITED) != 0)) {
659 /* Last one... device is not unconfigured yet.
660 Device shutdown has taken care of setting the
661 clean bits if RAIDF_INITED is not set
662 mark things as clean... */
663
664 rf_update_component_labels(raidPtrs[unit],
665 RF_FINAL_COMPONENT_UPDATE);
666 if (doing_shutdown) {
667 /* last one, and we're going down, so
668 lights out for this RAID set too. */
669 error = rf_Shutdown(raidPtrs[unit]);
670
671 /* It's no longer initialized... */
672 rs->sc_flags &= ~RAIDF_INITED;
673
674 /* detach the device */
675
676 cf = device_cfdata(rs->sc_dev);
677 error = config_detach(rs->sc_dev, DETACH_QUIET);
678 free(cf, M_RAIDFRAME);
679
680 /* Detach the disk. */
681 pseudo_disk_detach(&rs->sc_dkdev);
682 }
683 }
684
685 raidunlock(rs);
686 return (0);
687
688 }
689
690 void
691 raidstrategy(struct buf *bp)
692 {
693 int s;
694
695 unsigned int raidID = raidunit(bp->b_dev);
696 RF_Raid_t *raidPtr;
697 struct raid_softc *rs = &raid_softc[raidID];
698 int wlabel;
699
700 if ((rs->sc_flags & RAIDF_INITED) ==0) {
701 bp->b_error = ENXIO;
702 bp->b_flags |= B_ERROR;
703 goto done;
704 }
705 if (raidID >= numraid || !raidPtrs[raidID]) {
706 bp->b_error = ENODEV;
707 bp->b_flags |= B_ERROR;
708 goto done;
709 }
710 raidPtr = raidPtrs[raidID];
711 if (!raidPtr->valid) {
712 bp->b_error = ENODEV;
713 bp->b_flags |= B_ERROR;
714 goto done;
715 }
716 if (bp->b_bcount == 0) {
717 db1_printf(("b_bcount is zero..\n"));
718 goto done;
719 }
720
721 /*
722 * Do bounds checking and adjust transfer. If there's an
723 * error, the bounds check will flag that for us.
724 */
725
726 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
727 if (DISKPART(bp->b_dev) == RAW_PART) {
728 uint64_t size; /* device size in DEV_BSIZE unit */
729
730 if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
731 size = raidPtr->totalSectors <<
732 (raidPtr->logBytesPerSector - DEV_BSHIFT);
733 } else {
734 size = raidPtr->totalSectors >>
735 (DEV_BSHIFT - raidPtr->logBytesPerSector);
736 }
737 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
738 goto done;
739 }
740 } else {
741 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
742 db1_printf(("Bounds check failed!!:%d %d\n",
743 (int) bp->b_blkno, (int) wlabel));
744 goto done;
745 }
746 }
747 s = splbio();
748
749 bp->b_resid = 0;
750
751 /* stuff it onto our queue */
752 BUFQ_PUT(rs->buf_queue, bp);
753
754 /* scheduled the IO to happen at the next convenient time */
755 wakeup(&(raidPtrs[raidID]->iodone));
756
757 splx(s);
758 return;
759
760 done:
761 bp->b_resid = bp->b_bcount;
762 biodone(bp);
763 }
764 /* ARGSUSED */
765 int
766 raidread(dev_t dev, struct uio *uio, int flags)
767 {
768 int unit = raidunit(dev);
769 struct raid_softc *rs;
770
771 if (unit >= numraid)
772 return (ENXIO);
773 rs = &raid_softc[unit];
774
775 if ((rs->sc_flags & RAIDF_INITED) == 0)
776 return (ENXIO);
777
778 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
779
780 }
781 /* ARGSUSED */
782 int
783 raidwrite(dev_t dev, struct uio *uio, int flags)
784 {
785 int unit = raidunit(dev);
786 struct raid_softc *rs;
787
788 if (unit >= numraid)
789 return (ENXIO);
790 rs = &raid_softc[unit];
791
792 if ((rs->sc_flags & RAIDF_INITED) == 0)
793 return (ENXIO);
794
795 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
796
797 }
798
799 int
800 raidioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct lwp *l)
801 {
802 int unit = raidunit(dev);
803 int error = 0;
804 int part, pmask;
805 struct cfdata *cf;
806 struct raid_softc *rs;
807 RF_Config_t *k_cfg, *u_cfg;
808 RF_Raid_t *raidPtr;
809 RF_RaidDisk_t *diskPtr;
810 RF_AccTotals_t *totals;
811 RF_DeviceConfig_t *d_cfg, **ucfgp;
812 u_char *specific_buf;
813 int retcode = 0;
814 int column;
815 int raidid;
816 struct rf_recon_req *rrcopy, *rr;
817 RF_ComponentLabel_t *clabel;
818 RF_ComponentLabel_t *ci_label;
819 RF_ComponentLabel_t **clabel_ptr;
820 RF_SingleComponent_t *sparePtr,*componentPtr;
821 RF_SingleComponent_t component;
822 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
823 int i, j, d;
824 #ifdef __HAVE_OLD_DISKLABEL
825 struct disklabel newlabel;
826 #endif
827 struct dkwedge_info *dkw;
828
829 if (unit >= numraid)
830 return (ENXIO);
831 rs = &raid_softc[unit];
832 raidPtr = raidPtrs[unit];
833
834 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
835 (int) DISKPART(dev), (int) unit, (int) cmd));
836
837 /* Must be open for writes for these commands... */
838 switch (cmd) {
839 #ifdef DIOCGSECTORSIZE
840 case DIOCGSECTORSIZE:
841 *(u_int *)data = raidPtr->bytesPerSector;
842 return 0;
843 case DIOCGMEDIASIZE:
844 *(off_t *)data =
845 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
846 return 0;
847 #endif
848 case DIOCSDINFO:
849 case DIOCWDINFO:
850 #ifdef __HAVE_OLD_DISKLABEL
851 case ODIOCWDINFO:
852 case ODIOCSDINFO:
853 #endif
854 case DIOCWLABEL:
855 case DIOCAWEDGE:
856 case DIOCDWEDGE:
857 if ((flag & FWRITE) == 0)
858 return (EBADF);
859 }
860
861 /* Must be initialized for these... */
862 switch (cmd) {
863 case DIOCGDINFO:
864 case DIOCSDINFO:
865 case DIOCWDINFO:
866 #ifdef __HAVE_OLD_DISKLABEL
867 case ODIOCGDINFO:
868 case ODIOCWDINFO:
869 case ODIOCSDINFO:
870 case ODIOCGDEFLABEL:
871 #endif
872 case DIOCGPART:
873 case DIOCWLABEL:
874 case DIOCGDEFLABEL:
875 case DIOCAWEDGE:
876 case DIOCDWEDGE:
877 case DIOCLWEDGES:
878 case RAIDFRAME_SHUTDOWN:
879 case RAIDFRAME_REWRITEPARITY:
880 case RAIDFRAME_GET_INFO:
881 case RAIDFRAME_RESET_ACCTOTALS:
882 case RAIDFRAME_GET_ACCTOTALS:
883 case RAIDFRAME_KEEP_ACCTOTALS:
884 case RAIDFRAME_GET_SIZE:
885 case RAIDFRAME_FAIL_DISK:
886 case RAIDFRAME_COPYBACK:
887 case RAIDFRAME_CHECK_RECON_STATUS:
888 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
889 case RAIDFRAME_GET_COMPONENT_LABEL:
890 case RAIDFRAME_SET_COMPONENT_LABEL:
891 case RAIDFRAME_ADD_HOT_SPARE:
892 case RAIDFRAME_REMOVE_HOT_SPARE:
893 case RAIDFRAME_INIT_LABELS:
894 case RAIDFRAME_REBUILD_IN_PLACE:
895 case RAIDFRAME_CHECK_PARITY:
896 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
897 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
898 case RAIDFRAME_CHECK_COPYBACK_STATUS:
899 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
900 case RAIDFRAME_SET_AUTOCONFIG:
901 case RAIDFRAME_SET_ROOT:
902 case RAIDFRAME_DELETE_COMPONENT:
903 case RAIDFRAME_INCORPORATE_HOT_SPARE:
904 if ((rs->sc_flags & RAIDF_INITED) == 0)
905 return (ENXIO);
906 }
907
908 switch (cmd) {
909
910 /* configure the system */
911 case RAIDFRAME_CONFIGURE:
912
913 if (raidPtr->valid) {
914 /* There is a valid RAID set running on this unit! */
915 printf("raid%d: Device already configured!\n",unit);
916 return(EINVAL);
917 }
918
919 /* copy-in the configuration information */
920 /* data points to a pointer to the configuration structure */
921
922 u_cfg = *((RF_Config_t **) data);
923 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
924 if (k_cfg == NULL) {
925 return (ENOMEM);
926 }
927 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
928 if (retcode) {
929 RF_Free(k_cfg, sizeof(RF_Config_t));
930 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
931 retcode));
932 return (retcode);
933 }
934 /* allocate a buffer for the layout-specific data, and copy it
935 * in */
936 if (k_cfg->layoutSpecificSize) {
937 if (k_cfg->layoutSpecificSize > 10000) {
938 /* sanity check */
939 RF_Free(k_cfg, sizeof(RF_Config_t));
940 return (EINVAL);
941 }
942 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
943 (u_char *));
944 if (specific_buf == NULL) {
945 RF_Free(k_cfg, sizeof(RF_Config_t));
946 return (ENOMEM);
947 }
948 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
949 k_cfg->layoutSpecificSize);
950 if (retcode) {
951 RF_Free(k_cfg, sizeof(RF_Config_t));
952 RF_Free(specific_buf,
953 k_cfg->layoutSpecificSize);
954 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
955 retcode));
956 return (retcode);
957 }
958 } else
959 specific_buf = NULL;
960 k_cfg->layoutSpecific = specific_buf;
961
962 /* should do some kind of sanity check on the configuration.
963 * Store the sum of all the bytes in the last byte? */
964
965 /* configure the system */
966
967 /*
968 * Clear the entire RAID descriptor, just to make sure
969 * there is no stale data left in the case of a
970 * reconfiguration
971 */
972 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
973 raidPtr->raidid = unit;
974
975 retcode = rf_Configure(raidPtr, k_cfg, NULL);
976
977 if (retcode == 0) {
978
979 /* allow this many simultaneous IO's to
980 this RAID device */
981 raidPtr->openings = RAIDOUTSTANDING;
982
983 raidinit(raidPtr);
984 rf_markalldirty(raidPtr);
985 }
986 /* free the buffers. No return code here. */
987 if (k_cfg->layoutSpecificSize) {
988 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
989 }
990 RF_Free(k_cfg, sizeof(RF_Config_t));
991
992 return (retcode);
993
994 /* shutdown the system */
995 case RAIDFRAME_SHUTDOWN:
996
997 if ((error = raidlock(rs)) != 0)
998 return (error);
999
1000 /*
1001 * If somebody has a partition mounted, we shouldn't
1002 * shutdown.
1003 */
1004
1005 part = DISKPART(dev);
1006 pmask = (1 << part);
1007 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
1008 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1009 (rs->sc_dkdev.dk_copenmask & pmask))) {
1010 raidunlock(rs);
1011 return (EBUSY);
1012 }
1013
1014 retcode = rf_Shutdown(raidPtr);
1015
1016 /* It's no longer initialized... */
1017 rs->sc_flags &= ~RAIDF_INITED;
1018
1019 /* free the pseudo device attach bits */
1020
1021 cf = device_cfdata(rs->sc_dev);
1022 /* XXX this causes us to not return any errors
1023 from the above call to rf_Shutdown() */
1024 retcode = config_detach(rs->sc_dev, DETACH_QUIET);
1025 free(cf, M_RAIDFRAME);
1026
1027 /* Detach the disk. */
1028 pseudo_disk_detach(&rs->sc_dkdev);
1029
1030 raidunlock(rs);
1031
1032 return (retcode);
1033 case RAIDFRAME_GET_COMPONENT_LABEL:
1034 clabel_ptr = (RF_ComponentLabel_t **) data;
1035 /* need to read the component label for the disk indicated
1036 by row,column in clabel */
1037
1038 /* For practice, let's get it directly fromdisk, rather
1039 than from the in-core copy */
1040 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
1041 (RF_ComponentLabel_t *));
1042 if (clabel == NULL)
1043 return (ENOMEM);
1044
1045 retcode = copyin( *clabel_ptr, clabel,
1046 sizeof(RF_ComponentLabel_t));
1047
1048 if (retcode) {
1049 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1050 return(retcode);
1051 }
1052
1053 clabel->row = 0; /* Don't allow looking at anything else.*/
1054
1055 column = clabel->column;
1056
1057 if ((column < 0) || (column >= raidPtr->numCol +
1058 raidPtr->numSpare)) {
1059 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1060 return(EINVAL);
1061 }
1062
1063 retcode = raidread_component_label(raidPtr->Disks[column].dev,
1064 raidPtr->raid_cinfo[column].ci_vp,
1065 clabel );
1066
1067 if (retcode == 0) {
1068 retcode = copyout(clabel, *clabel_ptr,
1069 sizeof(RF_ComponentLabel_t));
1070 }
1071 RF_Free(clabel, sizeof(RF_ComponentLabel_t));
1072 return (retcode);
1073
1074 case RAIDFRAME_SET_COMPONENT_LABEL:
1075 clabel = (RF_ComponentLabel_t *) data;
1076
1077 /* XXX check the label for valid stuff... */
1078 /* Note that some things *should not* get modified --
1079 the user should be re-initing the labels instead of
1080 trying to patch things.
1081 */
1082
1083 raidid = raidPtr->raidid;
1084 #if DEBUG
1085 printf("raid%d: Got component label:\n", raidid);
1086 printf("raid%d: Version: %d\n", raidid, clabel->version);
1087 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1088 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1089 printf("raid%d: Column: %d\n", raidid, clabel->column);
1090 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1091 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1092 printf("raid%d: Status: %d\n", raidid, clabel->status);
1093 #endif
1094 clabel->row = 0;
1095 column = clabel->column;
1096
1097 if ((column < 0) || (column >= raidPtr->numCol)) {
1098 return(EINVAL);
1099 }
1100
1101 /* XXX this isn't allowed to do anything for now :-) */
1102
1103 /* XXX and before it is, we need to fill in the rest
1104 of the fields!?!?!?! */
1105 #if 0
1106 raidwrite_component_label(
1107 raidPtr->Disks[column].dev,
1108 raidPtr->raid_cinfo[column].ci_vp,
1109 clabel );
1110 #endif
1111 return (0);
1112
1113 case RAIDFRAME_INIT_LABELS:
1114 clabel = (RF_ComponentLabel_t *) data;
1115 /*
1116 we only want the serial number from
1117 the above. We get all the rest of the information
1118 from the config that was used to create this RAID
1119 set.
1120 */
1121
1122 raidPtr->serial_number = clabel->serial_number;
1123
1124 RF_Malloc(ci_label, sizeof(RF_ComponentLabel_t),
1125 (RF_ComponentLabel_t *));
1126 if (ci_label == NULL)
1127 return (ENOMEM);
1128
1129 raid_init_component_label(raidPtr, ci_label);
1130 ci_label->serial_number = clabel->serial_number;
1131 ci_label->row = 0; /* we dont' pretend to support more */
1132
1133 for(column=0;column<raidPtr->numCol;column++) {
1134 diskPtr = &raidPtr->Disks[column];
1135 if (!RF_DEAD_DISK(diskPtr->status)) {
1136 ci_label->partitionSize = diskPtr->partitionSize;
1137 ci_label->column = column;
1138 raidwrite_component_label(
1139 raidPtr->Disks[column].dev,
1140 raidPtr->raid_cinfo[column].ci_vp,
1141 ci_label );
1142 }
1143 }
1144 RF_Free(ci_label, sizeof(RF_ComponentLabel_t));
1145
1146 return (retcode);
1147 case RAIDFRAME_SET_AUTOCONFIG:
1148 d = rf_set_autoconfig(raidPtr, *(int *) data);
1149 printf("raid%d: New autoconfig value is: %d\n",
1150 raidPtr->raidid, d);
1151 *(int *) data = d;
1152 return (retcode);
1153
1154 case RAIDFRAME_SET_ROOT:
1155 d = rf_set_rootpartition(raidPtr, *(int *) data);
1156 printf("raid%d: New rootpartition value is: %d\n",
1157 raidPtr->raidid, d);
1158 *(int *) data = d;
1159 return (retcode);
1160
1161 /* initialize all parity */
1162 case RAIDFRAME_REWRITEPARITY:
1163
1164 if (raidPtr->Layout.map->faultsTolerated == 0) {
1165 /* Parity for RAID 0 is trivially correct */
1166 raidPtr->parity_good = RF_RAID_CLEAN;
1167 return(0);
1168 }
1169
1170 if (raidPtr->parity_rewrite_in_progress == 1) {
1171 /* Re-write is already in progress! */
1172 return(EINVAL);
1173 }
1174
1175 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1176 rf_RewriteParityThread,
1177 raidPtr,"raid_parity");
1178 return (retcode);
1179
1180
1181 case RAIDFRAME_ADD_HOT_SPARE:
1182 sparePtr = (RF_SingleComponent_t *) data;
1183 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1184 retcode = rf_add_hot_spare(raidPtr, &component);
1185 return(retcode);
1186
1187 case RAIDFRAME_REMOVE_HOT_SPARE:
1188 return(retcode);
1189
1190 case RAIDFRAME_DELETE_COMPONENT:
1191 componentPtr = (RF_SingleComponent_t *)data;
1192 memcpy( &component, componentPtr,
1193 sizeof(RF_SingleComponent_t));
1194 retcode = rf_delete_component(raidPtr, &component);
1195 return(retcode);
1196
1197 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1198 componentPtr = (RF_SingleComponent_t *)data;
1199 memcpy( &component, componentPtr,
1200 sizeof(RF_SingleComponent_t));
1201 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1202 return(retcode);
1203
1204 case RAIDFRAME_REBUILD_IN_PLACE:
1205
1206 if (raidPtr->Layout.map->faultsTolerated == 0) {
1207 /* Can't do this on a RAID 0!! */
1208 return(EINVAL);
1209 }
1210
1211 if (raidPtr->recon_in_progress == 1) {
1212 /* a reconstruct is already in progress! */
1213 return(EINVAL);
1214 }
1215
1216 componentPtr = (RF_SingleComponent_t *) data;
1217 memcpy( &component, componentPtr,
1218 sizeof(RF_SingleComponent_t));
1219 component.row = 0; /* we don't support any more */
1220 column = component.column;
1221
1222 if ((column < 0) || (column >= raidPtr->numCol)) {
1223 return(EINVAL);
1224 }
1225
1226 RF_LOCK_MUTEX(raidPtr->mutex);
1227 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1228 (raidPtr->numFailures > 0)) {
1229 /* XXX 0 above shouldn't be constant!!! */
1230 /* some component other than this has failed.
1231 Let's not make things worse than they already
1232 are... */
1233 printf("raid%d: Unable to reconstruct to disk at:\n",
1234 raidPtr->raidid);
1235 printf("raid%d: Col: %d Too many failures.\n",
1236 raidPtr->raidid, column);
1237 RF_UNLOCK_MUTEX(raidPtr->mutex);
1238 return (EINVAL);
1239 }
1240 if (raidPtr->Disks[column].status ==
1241 rf_ds_reconstructing) {
1242 printf("raid%d: Unable to reconstruct to disk at:\n",
1243 raidPtr->raidid);
1244 printf("raid%d: Col: %d Reconstruction already occuring!\n", raidPtr->raidid, column);
1245
1246 RF_UNLOCK_MUTEX(raidPtr->mutex);
1247 return (EINVAL);
1248 }
1249 if (raidPtr->Disks[column].status == rf_ds_spared) {
1250 RF_UNLOCK_MUTEX(raidPtr->mutex);
1251 return (EINVAL);
1252 }
1253 RF_UNLOCK_MUTEX(raidPtr->mutex);
1254
1255 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1256 if (rrcopy == NULL)
1257 return(ENOMEM);
1258
1259 rrcopy->raidPtr = (void *) raidPtr;
1260 rrcopy->col = column;
1261
1262 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1263 rf_ReconstructInPlaceThread,
1264 rrcopy,"raid_reconip");
1265 return(retcode);
1266
1267 case RAIDFRAME_GET_INFO:
1268 if (!raidPtr->valid)
1269 return (ENODEV);
1270 ucfgp = (RF_DeviceConfig_t **) data;
1271 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1272 (RF_DeviceConfig_t *));
1273 if (d_cfg == NULL)
1274 return (ENOMEM);
1275 d_cfg->rows = 1; /* there is only 1 row now */
1276 d_cfg->cols = raidPtr->numCol;
1277 d_cfg->ndevs = raidPtr->numCol;
1278 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1279 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1280 return (ENOMEM);
1281 }
1282 d_cfg->nspares = raidPtr->numSpare;
1283 if (d_cfg->nspares >= RF_MAX_DISKS) {
1284 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1285 return (ENOMEM);
1286 }
1287 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1288 d = 0;
1289 for (j = 0; j < d_cfg->cols; j++) {
1290 d_cfg->devs[d] = raidPtr->Disks[j];
1291 d++;
1292 }
1293 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1294 d_cfg->spares[i] = raidPtr->Disks[j];
1295 }
1296 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1297 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1298
1299 return (retcode);
1300
1301 case RAIDFRAME_CHECK_PARITY:
1302 *(int *) data = raidPtr->parity_good;
1303 return (0);
1304
1305 case RAIDFRAME_RESET_ACCTOTALS:
1306 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1307 return (0);
1308
1309 case RAIDFRAME_GET_ACCTOTALS:
1310 totals = (RF_AccTotals_t *) data;
1311 *totals = raidPtr->acc_totals;
1312 return (0);
1313
1314 case RAIDFRAME_KEEP_ACCTOTALS:
1315 raidPtr->keep_acc_totals = *(int *)data;
1316 return (0);
1317
1318 case RAIDFRAME_GET_SIZE:
1319 *(int *) data = raidPtr->totalSectors;
1320 return (0);
1321
1322 /* fail a disk & optionally start reconstruction */
1323 case RAIDFRAME_FAIL_DISK:
1324
1325 if (raidPtr->Layout.map->faultsTolerated == 0) {
1326 /* Can't do this on a RAID 0!! */
1327 return(EINVAL);
1328 }
1329
1330 rr = (struct rf_recon_req *) data;
1331 rr->row = 0;
1332 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1333 return (EINVAL);
1334
1335
1336 RF_LOCK_MUTEX(raidPtr->mutex);
1337 if (raidPtr->status == rf_rs_reconstructing) {
1338 /* you can't fail a disk while we're reconstructing! */
1339 /* XXX wrong for RAID6 */
1340 RF_UNLOCK_MUTEX(raidPtr->mutex);
1341 return (EINVAL);
1342 }
1343 if ((raidPtr->Disks[rr->col].status ==
1344 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1345 /* some other component has failed. Let's not make
1346 things worse. XXX wrong for RAID6 */
1347 RF_UNLOCK_MUTEX(raidPtr->mutex);
1348 return (EINVAL);
1349 }
1350 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1351 /* Can't fail a spared disk! */
1352 RF_UNLOCK_MUTEX(raidPtr->mutex);
1353 return (EINVAL);
1354 }
1355 RF_UNLOCK_MUTEX(raidPtr->mutex);
1356
1357 /* make a copy of the recon request so that we don't rely on
1358 * the user's buffer */
1359 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1360 if (rrcopy == NULL)
1361 return(ENOMEM);
1362 memcpy(rrcopy, rr, sizeof(*rr));
1363 rrcopy->raidPtr = (void *) raidPtr;
1364
1365 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1366 rf_ReconThread,
1367 rrcopy,"raid_recon");
1368 return (0);
1369
1370 /* invoke a copyback operation after recon on whatever disk
1371 * needs it, if any */
1372 case RAIDFRAME_COPYBACK:
1373
1374 if (raidPtr->Layout.map->faultsTolerated == 0) {
1375 /* This makes no sense on a RAID 0!! */
1376 return(EINVAL);
1377 }
1378
1379 if (raidPtr->copyback_in_progress == 1) {
1380 /* Copyback is already in progress! */
1381 return(EINVAL);
1382 }
1383
1384 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1385 rf_CopybackThread,
1386 raidPtr,"raid_copyback");
1387 return (retcode);
1388
1389 /* return the percentage completion of reconstruction */
1390 case RAIDFRAME_CHECK_RECON_STATUS:
1391 if (raidPtr->Layout.map->faultsTolerated == 0) {
1392 /* This makes no sense on a RAID 0, so tell the
1393 user it's done. */
1394 *(int *) data = 100;
1395 return(0);
1396 }
1397 if (raidPtr->status != rf_rs_reconstructing)
1398 *(int *) data = 100;
1399 else {
1400 if (raidPtr->reconControl->numRUsTotal > 0) {
1401 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1402 } else {
1403 *(int *) data = 0;
1404 }
1405 }
1406 return (0);
1407 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1408 progressInfoPtr = (RF_ProgressInfo_t **) data;
1409 if (raidPtr->status != rf_rs_reconstructing) {
1410 progressInfo.remaining = 0;
1411 progressInfo.completed = 100;
1412 progressInfo.total = 100;
1413 } else {
1414 progressInfo.total =
1415 raidPtr->reconControl->numRUsTotal;
1416 progressInfo.completed =
1417 raidPtr->reconControl->numRUsComplete;
1418 progressInfo.remaining = progressInfo.total -
1419 progressInfo.completed;
1420 }
1421 retcode = copyout(&progressInfo, *progressInfoPtr,
1422 sizeof(RF_ProgressInfo_t));
1423 return (retcode);
1424
1425 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1426 if (raidPtr->Layout.map->faultsTolerated == 0) {
1427 /* This makes no sense on a RAID 0, so tell the
1428 user it's done. */
1429 *(int *) data = 100;
1430 return(0);
1431 }
1432 if (raidPtr->parity_rewrite_in_progress == 1) {
1433 *(int *) data = 100 *
1434 raidPtr->parity_rewrite_stripes_done /
1435 raidPtr->Layout.numStripe;
1436 } else {
1437 *(int *) data = 100;
1438 }
1439 return (0);
1440
1441 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1442 progressInfoPtr = (RF_ProgressInfo_t **) data;
1443 if (raidPtr->parity_rewrite_in_progress == 1) {
1444 progressInfo.total = raidPtr->Layout.numStripe;
1445 progressInfo.completed =
1446 raidPtr->parity_rewrite_stripes_done;
1447 progressInfo.remaining = progressInfo.total -
1448 progressInfo.completed;
1449 } else {
1450 progressInfo.remaining = 0;
1451 progressInfo.completed = 100;
1452 progressInfo.total = 100;
1453 }
1454 retcode = copyout(&progressInfo, *progressInfoPtr,
1455 sizeof(RF_ProgressInfo_t));
1456 return (retcode);
1457
1458 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1459 if (raidPtr->Layout.map->faultsTolerated == 0) {
1460 /* This makes no sense on a RAID 0 */
1461 *(int *) data = 100;
1462 return(0);
1463 }
1464 if (raidPtr->copyback_in_progress == 1) {
1465 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1466 raidPtr->Layout.numStripe;
1467 } else {
1468 *(int *) data = 100;
1469 }
1470 return (0);
1471
1472 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1473 progressInfoPtr = (RF_ProgressInfo_t **) data;
1474 if (raidPtr->copyback_in_progress == 1) {
1475 progressInfo.total = raidPtr->Layout.numStripe;
1476 progressInfo.completed =
1477 raidPtr->copyback_stripes_done;
1478 progressInfo.remaining = progressInfo.total -
1479 progressInfo.completed;
1480 } else {
1481 progressInfo.remaining = 0;
1482 progressInfo.completed = 100;
1483 progressInfo.total = 100;
1484 }
1485 retcode = copyout(&progressInfo, *progressInfoPtr,
1486 sizeof(RF_ProgressInfo_t));
1487 return (retcode);
1488
1489 /* the sparetable daemon calls this to wait for the kernel to
1490 * need a spare table. this ioctl does not return until a
1491 * spare table is needed. XXX -- calling mpsleep here in the
1492 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1493 * -- I should either compute the spare table in the kernel,
1494 * or have a different -- XXX XXX -- interface (a different
1495 * character device) for delivering the table -- XXX */
1496 #if 0
1497 case RAIDFRAME_SPARET_WAIT:
1498 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1499 while (!rf_sparet_wait_queue)
1500 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1501 waitreq = rf_sparet_wait_queue;
1502 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1503 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1504
1505 /* structure assignment */
1506 *((RF_SparetWait_t *) data) = *waitreq;
1507
1508 RF_Free(waitreq, sizeof(*waitreq));
1509 return (0);
1510
1511 /* wakes up a process waiting on SPARET_WAIT and puts an error
1512 * code in it that will cause the dameon to exit */
1513 case RAIDFRAME_ABORT_SPARET_WAIT:
1514 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1515 waitreq->fcol = -1;
1516 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1517 waitreq->next = rf_sparet_wait_queue;
1518 rf_sparet_wait_queue = waitreq;
1519 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1520 wakeup(&rf_sparet_wait_queue);
1521 return (0);
1522
1523 /* used by the spare table daemon to deliver a spare table
1524 * into the kernel */
1525 case RAIDFRAME_SEND_SPARET:
1526
1527 /* install the spare table */
1528 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1529
1530 /* respond to the requestor. the return status of the spare
1531 * table installation is passed in the "fcol" field */
1532 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1533 waitreq->fcol = retcode;
1534 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1535 waitreq->next = rf_sparet_resp_queue;
1536 rf_sparet_resp_queue = waitreq;
1537 wakeup(&rf_sparet_resp_queue);
1538 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1539
1540 return (retcode);
1541 #endif
1542
1543 default:
1544 break; /* fall through to the os-specific code below */
1545
1546 }
1547
1548 if (!raidPtr->valid)
1549 return (EINVAL);
1550
1551 /*
1552 * Add support for "regular" device ioctls here.
1553 */
1554
1555 switch (cmd) {
1556 case DIOCGDINFO:
1557 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1558 break;
1559 #ifdef __HAVE_OLD_DISKLABEL
1560 case ODIOCGDINFO:
1561 newlabel = *(rs->sc_dkdev.dk_label);
1562 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1563 return ENOTTY;
1564 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1565 break;
1566 #endif
1567
1568 case DIOCGPART:
1569 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1570 ((struct partinfo *) data)->part =
1571 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1572 break;
1573
1574 case DIOCWDINFO:
1575 case DIOCSDINFO:
1576 #ifdef __HAVE_OLD_DISKLABEL
1577 case ODIOCWDINFO:
1578 case ODIOCSDINFO:
1579 #endif
1580 {
1581 struct disklabel *lp;
1582 #ifdef __HAVE_OLD_DISKLABEL
1583 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1584 memset(&newlabel, 0, sizeof newlabel);
1585 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1586 lp = &newlabel;
1587 } else
1588 #endif
1589 lp = (struct disklabel *)data;
1590
1591 if ((error = raidlock(rs)) != 0)
1592 return (error);
1593
1594 rs->sc_flags |= RAIDF_LABELLING;
1595
1596 error = setdisklabel(rs->sc_dkdev.dk_label,
1597 lp, 0, rs->sc_dkdev.dk_cpulabel);
1598 if (error == 0) {
1599 if (cmd == DIOCWDINFO
1600 #ifdef __HAVE_OLD_DISKLABEL
1601 || cmd == ODIOCWDINFO
1602 #endif
1603 )
1604 error = writedisklabel(RAIDLABELDEV(dev),
1605 raidstrategy, rs->sc_dkdev.dk_label,
1606 rs->sc_dkdev.dk_cpulabel);
1607 }
1608 rs->sc_flags &= ~RAIDF_LABELLING;
1609
1610 raidunlock(rs);
1611
1612 if (error)
1613 return (error);
1614 break;
1615 }
1616
1617 case DIOCWLABEL:
1618 if (*(int *) data != 0)
1619 rs->sc_flags |= RAIDF_WLABEL;
1620 else
1621 rs->sc_flags &= ~RAIDF_WLABEL;
1622 break;
1623
1624 case DIOCGDEFLABEL:
1625 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1626 break;
1627
1628 #ifdef __HAVE_OLD_DISKLABEL
1629 case ODIOCGDEFLABEL:
1630 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1631 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1632 return ENOTTY;
1633 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1634 break;
1635 #endif
1636
1637 case DIOCAWEDGE:
1638 case DIOCDWEDGE:
1639 dkw = (void *)data;
1640
1641 /* If the ioctl happens here, the parent is us. */
1642 (void)strcpy(dkw->dkw_parent, rs->sc_xname);
1643 return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw);
1644
1645 case DIOCLWEDGES:
1646 return dkwedge_list(&rs->sc_dkdev,
1647 (struct dkwedge_list *)data, l);
1648
1649 default:
1650 retcode = ENOTTY;
1651 }
1652 return (retcode);
1653
1654 }
1655
1656
1657 /* raidinit -- complete the rest of the initialization for the
1658 RAIDframe device. */
1659
1660
1661 static void
1662 raidinit(RF_Raid_t *raidPtr)
1663 {
1664 struct cfdata *cf;
1665 struct raid_softc *rs;
1666 int unit;
1667
1668 unit = raidPtr->raidid;
1669
1670 rs = &raid_softc[unit];
1671
1672 /* XXX should check return code first... */
1673 rs->sc_flags |= RAIDF_INITED;
1674
1675 /* XXX doesn't check bounds. */
1676 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1677
1678 rs->sc_dkdev.dk_name = rs->sc_xname;
1679
1680 /* attach the pseudo device */
1681 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1682 cf->cf_name = raid_cd.cd_name;
1683 cf->cf_atname = raid_cd.cd_name;
1684 cf->cf_unit = unit;
1685 cf->cf_fstate = FSTATE_STAR;
1686
1687 rs->sc_dev = config_attach_pseudo(cf);
1688
1689 if (rs->sc_dev==NULL) {
1690 printf("raid%d: config_attach_pseudo failed\n",
1691 raidPtr->raidid);
1692 }
1693
1694 /* disk_attach actually creates space for the CPU disklabel, among
1695 * other things, so it's critical to call this *BEFORE* we try putzing
1696 * with disklabels. */
1697
1698 disk_attach(&rs->sc_dkdev);
1699
1700 /* XXX There may be a weird interaction here between this, and
1701 * protectedSectors, as used in RAIDframe. */
1702
1703 rs->sc_size = raidPtr->totalSectors;
1704 }
1705 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1706 /* wake up the daemon & tell it to get us a spare table
1707 * XXX
1708 * the entries in the queues should be tagged with the raidPtr
1709 * so that in the extremely rare case that two recons happen at once,
1710 * we know for which device were requesting a spare table
1711 * XXX
1712 *
1713 * XXX This code is not currently used. GO
1714 */
1715 int
1716 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1717 {
1718 int retcode;
1719
1720 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1721 req->next = rf_sparet_wait_queue;
1722 rf_sparet_wait_queue = req;
1723 wakeup(&rf_sparet_wait_queue);
1724
1725 /* mpsleep unlocks the mutex */
1726 while (!rf_sparet_resp_queue) {
1727 tsleep(&rf_sparet_resp_queue, PRIBIO,
1728 "raidframe getsparetable", 0);
1729 }
1730 req = rf_sparet_resp_queue;
1731 rf_sparet_resp_queue = req->next;
1732 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1733
1734 retcode = req->fcol;
1735 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1736 * alloc'd */
1737 return (retcode);
1738 }
1739 #endif
1740
1741 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1742 * bp & passes it down.
1743 * any calls originating in the kernel must use non-blocking I/O
1744 * do some extra sanity checking to return "appropriate" error values for
1745 * certain conditions (to make some standard utilities work)
1746 *
1747 * Formerly known as: rf_DoAccessKernel
1748 */
1749 void
1750 raidstart(RF_Raid_t *raidPtr)
1751 {
1752 RF_SectorCount_t num_blocks, pb, sum;
1753 RF_RaidAddr_t raid_addr;
1754 struct partition *pp;
1755 daddr_t blocknum;
1756 int unit;
1757 struct raid_softc *rs;
1758 int do_async;
1759 struct buf *bp;
1760 int rc;
1761
1762 unit = raidPtr->raidid;
1763 rs = &raid_softc[unit];
1764
1765 /* quick check to see if anything has died recently */
1766 RF_LOCK_MUTEX(raidPtr->mutex);
1767 if (raidPtr->numNewFailures > 0) {
1768 RF_UNLOCK_MUTEX(raidPtr->mutex);
1769 rf_update_component_labels(raidPtr,
1770 RF_NORMAL_COMPONENT_UPDATE);
1771 RF_LOCK_MUTEX(raidPtr->mutex);
1772 raidPtr->numNewFailures--;
1773 }
1774
1775 /* Check to see if we're at the limit... */
1776 while (raidPtr->openings > 0) {
1777 RF_UNLOCK_MUTEX(raidPtr->mutex);
1778
1779 /* get the next item, if any, from the queue */
1780 if ((bp = BUFQ_GET(rs->buf_queue)) == NULL) {
1781 /* nothing more to do */
1782 return;
1783 }
1784
1785 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1786 * partition.. Need to make it absolute to the underlying
1787 * device.. */
1788
1789 blocknum = bp->b_blkno;
1790 if (DISKPART(bp->b_dev) != RAW_PART) {
1791 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1792 blocknum += pp->p_offset;
1793 }
1794
1795 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1796 (int) blocknum));
1797
1798 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1799 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1800
1801 /* *THIS* is where we adjust what block we're going to...
1802 * but DO NOT TOUCH bp->b_blkno!!! */
1803 raid_addr = blocknum;
1804
1805 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1806 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1807 sum = raid_addr + num_blocks + pb;
1808 if (1 || rf_debugKernelAccess) {
1809 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1810 (int) raid_addr, (int) sum, (int) num_blocks,
1811 (int) pb, (int) bp->b_resid));
1812 }
1813 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1814 || (sum < num_blocks) || (sum < pb)) {
1815 bp->b_error = ENOSPC;
1816 bp->b_flags |= B_ERROR;
1817 bp->b_resid = bp->b_bcount;
1818 biodone(bp);
1819 RF_LOCK_MUTEX(raidPtr->mutex);
1820 continue;
1821 }
1822 /*
1823 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1824 */
1825
1826 if (bp->b_bcount & raidPtr->sectorMask) {
1827 bp->b_error = EINVAL;
1828 bp->b_flags |= B_ERROR;
1829 bp->b_resid = bp->b_bcount;
1830 biodone(bp);
1831 RF_LOCK_MUTEX(raidPtr->mutex);
1832 continue;
1833
1834 }
1835 db1_printf(("Calling DoAccess..\n"));
1836
1837
1838 RF_LOCK_MUTEX(raidPtr->mutex);
1839 raidPtr->openings--;
1840 RF_UNLOCK_MUTEX(raidPtr->mutex);
1841
1842 /*
1843 * Everything is async.
1844 */
1845 do_async = 1;
1846
1847 disk_busy(&rs->sc_dkdev);
1848
1849 /* XXX we're still at splbio() here... do we *really*
1850 need to be? */
1851
1852 /* don't ever condition on bp->b_flags & B_WRITE.
1853 * always condition on B_READ instead */
1854
1855 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1856 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1857 do_async, raid_addr, num_blocks,
1858 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1859
1860 if (rc) {
1861 bp->b_error = rc;
1862 bp->b_flags |= B_ERROR;
1863 bp->b_resid = bp->b_bcount;
1864 biodone(bp);
1865 /* continue loop */
1866 }
1867
1868 RF_LOCK_MUTEX(raidPtr->mutex);
1869 }
1870 RF_UNLOCK_MUTEX(raidPtr->mutex);
1871 }
1872
1873
1874
1875
1876 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1877
1878 int
1879 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
1880 {
1881 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1882 struct buf *bp;
1883
1884 req->queue = queue;
1885
1886 #if DIAGNOSTIC
1887 if (queue->raidPtr->raidid >= numraid) {
1888 printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
1889 numraid);
1890 panic("Invalid Unit number in rf_DispatchKernelIO");
1891 }
1892 #endif
1893
1894 bp = req->bp;
1895
1896 switch (req->type) {
1897 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1898 /* XXX need to do something extra here.. */
1899 /* I'm leaving this in, as I've never actually seen it used,
1900 * and I'd like folks to report it... GO */
1901 printf(("WAKEUP CALLED\n"));
1902 queue->numOutstanding++;
1903
1904 bp->b_flags = 0;
1905 bp->b_private = req;
1906
1907 KernelWakeupFunc(bp);
1908 break;
1909
1910 case RF_IO_TYPE_READ:
1911 case RF_IO_TYPE_WRITE:
1912 #if RF_ACC_TRACE > 0
1913 if (req->tracerec) {
1914 RF_ETIMER_START(req->tracerec->timer);
1915 }
1916 #endif
1917 InitBP(bp, queue->rf_cinfo->ci_vp,
1918 op, queue->rf_cinfo->ci_dev,
1919 req->sectorOffset, req->numSector,
1920 req->buf, KernelWakeupFunc, (void *) req,
1921 queue->raidPtr->logBytesPerSector, req->b_proc);
1922
1923 if (rf_debugKernelAccess) {
1924 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1925 (long) bp->b_blkno));
1926 }
1927 queue->numOutstanding++;
1928 queue->last_deq_sector = req->sectorOffset;
1929 /* acc wouldn't have been let in if there were any pending
1930 * reqs at any other priority */
1931 queue->curPriority = req->priority;
1932
1933 db1_printf(("Going for %c to unit %d col %d\n",
1934 req->type, queue->raidPtr->raidid,
1935 queue->col));
1936 db1_printf(("sector %d count %d (%d bytes) %d\n",
1937 (int) req->sectorOffset, (int) req->numSector,
1938 (int) (req->numSector <<
1939 queue->raidPtr->logBytesPerSector),
1940 (int) queue->raidPtr->logBytesPerSector));
1941 VOP_STRATEGY(bp->b_vp, bp);
1942
1943 break;
1944
1945 default:
1946 panic("bad req->type in rf_DispatchKernelIO");
1947 }
1948 db1_printf(("Exiting from DispatchKernelIO\n"));
1949
1950 return (0);
1951 }
1952 /* this is the callback function associated with a I/O invoked from
1953 kernel code.
1954 */
1955 static void
1956 KernelWakeupFunc(struct buf *bp)
1957 {
1958 RF_DiskQueueData_t *req = NULL;
1959 RF_DiskQueue_t *queue;
1960 int s;
1961
1962 s = splbio();
1963 db1_printf(("recovering the request queue:\n"));
1964 req = bp->b_private;
1965
1966 queue = (RF_DiskQueue_t *) req->queue;
1967
1968 #if RF_ACC_TRACE > 0
1969 if (req->tracerec) {
1970 RF_ETIMER_STOP(req->tracerec->timer);
1971 RF_ETIMER_EVAL(req->tracerec->timer);
1972 RF_LOCK_MUTEX(rf_tracing_mutex);
1973 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1974 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1975 req->tracerec->num_phys_ios++;
1976 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1977 }
1978 #endif
1979
1980 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1981 * ballistic, and mark the component as hosed... */
1982
1983 if (bp->b_flags & B_ERROR) {
1984 /* Mark the disk as dead */
1985 /* but only mark it once... */
1986 /* and only if it wouldn't leave this RAID set
1987 completely broken */
1988 if (((queue->raidPtr->Disks[queue->col].status ==
1989 rf_ds_optimal) ||
1990 (queue->raidPtr->Disks[queue->col].status ==
1991 rf_ds_used_spare)) &&
1992 (queue->raidPtr->numFailures <
1993 queue->raidPtr->Layout.map->faultsTolerated)) {
1994 printf("raid%d: IO Error. Marking %s as failed.\n",
1995 queue->raidPtr->raidid,
1996 queue->raidPtr->Disks[queue->col].devname);
1997 queue->raidPtr->Disks[queue->col].status =
1998 rf_ds_failed;
1999 queue->raidPtr->status = rf_rs_degraded;
2000 queue->raidPtr->numFailures++;
2001 queue->raidPtr->numNewFailures++;
2002 } else { /* Disk is already dead... */
2003 /* printf("Disk already marked as dead!\n"); */
2004 }
2005
2006 }
2007
2008 /* Fill in the error value */
2009
2010 req->error = (bp->b_flags & B_ERROR) ? bp->b_error : 0;
2011
2012 simple_lock(&queue->raidPtr->iodone_lock);
2013
2014 /* Drop this one on the "finished" queue... */
2015 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2016
2017 /* Let the raidio thread know there is work to be done. */
2018 wakeup(&(queue->raidPtr->iodone));
2019
2020 simple_unlock(&queue->raidPtr->iodone_lock);
2021
2022 splx(s);
2023 }
2024
2025
2026
2027 /*
2028 * initialize a buf structure for doing an I/O in the kernel.
2029 */
2030 static void
2031 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2032 RF_SectorNum_t startSect, RF_SectorCount_t numSect, caddr_t bf,
2033 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2034 struct proc *b_proc)
2035 {
2036 /* bp->b_flags = B_PHYS | rw_flag; */
2037 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
2038 bp->b_bcount = numSect << logBytesPerSector;
2039 bp->b_bufsize = bp->b_bcount;
2040 bp->b_error = 0;
2041 bp->b_dev = dev;
2042 bp->b_data = bf;
2043 bp->b_blkno = startSect;
2044 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2045 if (bp->b_bcount == 0) {
2046 panic("bp->b_bcount is zero in InitBP!!");
2047 }
2048 bp->b_proc = b_proc;
2049 bp->b_iodone = cbFunc;
2050 bp->b_private = cbArg;
2051 bp->b_vp = b_vp;
2052 if ((bp->b_flags & B_READ) == 0) {
2053 bp->b_vp->v_numoutput++;
2054 }
2055
2056 }
2057
2058 static void
2059 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2060 struct disklabel *lp)
2061 {
2062 memset(lp, 0, sizeof(*lp));
2063
2064 /* fabricate a label... */
2065 lp->d_secperunit = raidPtr->totalSectors;
2066 lp->d_secsize = raidPtr->bytesPerSector;
2067 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2068 lp->d_ntracks = 4 * raidPtr->numCol;
2069 lp->d_ncylinders = raidPtr->totalSectors /
2070 (lp->d_nsectors * lp->d_ntracks);
2071 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2072
2073 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2074 lp->d_type = DTYPE_RAID;
2075 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2076 lp->d_rpm = 3600;
2077 lp->d_interleave = 1;
2078 lp->d_flags = 0;
2079
2080 lp->d_partitions[RAW_PART].p_offset = 0;
2081 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2082 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2083 lp->d_npartitions = RAW_PART + 1;
2084
2085 lp->d_magic = DISKMAGIC;
2086 lp->d_magic2 = DISKMAGIC;
2087 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2088
2089 }
2090 /*
2091 * Read the disklabel from the raid device. If one is not present, fake one
2092 * up.
2093 */
2094 static void
2095 raidgetdisklabel(dev_t dev)
2096 {
2097 int unit = raidunit(dev);
2098 struct raid_softc *rs = &raid_softc[unit];
2099 const char *errstring;
2100 struct disklabel *lp = rs->sc_dkdev.dk_label;
2101 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2102 RF_Raid_t *raidPtr;
2103
2104 db1_printf(("Getting the disklabel...\n"));
2105
2106 memset(clp, 0, sizeof(*clp));
2107
2108 raidPtr = raidPtrs[unit];
2109
2110 raidgetdefaultlabel(raidPtr, rs, lp);
2111
2112 /*
2113 * Call the generic disklabel extraction routine.
2114 */
2115 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2116 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2117 if (errstring)
2118 raidmakedisklabel(rs);
2119 else {
2120 int i;
2121 struct partition *pp;
2122
2123 /*
2124 * Sanity check whether the found disklabel is valid.
2125 *
2126 * This is necessary since total size of the raid device
2127 * may vary when an interleave is changed even though exactly
2128 * same components are used, and old disklabel may used
2129 * if that is found.
2130 */
2131 if (lp->d_secperunit != rs->sc_size)
2132 printf("raid%d: WARNING: %s: "
2133 "total sector size in disklabel (%d) != "
2134 "the size of raid (%ld)\n", unit, rs->sc_xname,
2135 lp->d_secperunit, (long) rs->sc_size);
2136 for (i = 0; i < lp->d_npartitions; i++) {
2137 pp = &lp->d_partitions[i];
2138 if (pp->p_offset + pp->p_size > rs->sc_size)
2139 printf("raid%d: WARNING: %s: end of partition `%c' "
2140 "exceeds the size of raid (%ld)\n",
2141 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
2142 }
2143 }
2144
2145 }
2146 /*
2147 * Take care of things one might want to take care of in the event
2148 * that a disklabel isn't present.
2149 */
2150 static void
2151 raidmakedisklabel(struct raid_softc *rs)
2152 {
2153 struct disklabel *lp = rs->sc_dkdev.dk_label;
2154 db1_printf(("Making a label..\n"));
2155
2156 /*
2157 * For historical reasons, if there's no disklabel present
2158 * the raw partition must be marked FS_BSDFFS.
2159 */
2160
2161 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2162
2163 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2164
2165 lp->d_checksum = dkcksum(lp);
2166 }
2167 /*
2168 * Wait interruptibly for an exclusive lock.
2169 *
2170 * XXX
2171 * Several drivers do this; it should be abstracted and made MP-safe.
2172 * (Hmm... where have we seen this warning before :-> GO )
2173 */
2174 static int
2175 raidlock(struct raid_softc *rs)
2176 {
2177 int error;
2178
2179 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2180 rs->sc_flags |= RAIDF_WANTED;
2181 if ((error =
2182 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2183 return (error);
2184 }
2185 rs->sc_flags |= RAIDF_LOCKED;
2186 return (0);
2187 }
2188 /*
2189 * Unlock and wake up any waiters.
2190 */
2191 static void
2192 raidunlock(struct raid_softc *rs)
2193 {
2194
2195 rs->sc_flags &= ~RAIDF_LOCKED;
2196 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2197 rs->sc_flags &= ~RAIDF_WANTED;
2198 wakeup(rs);
2199 }
2200 }
2201
2202
2203 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2204 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2205
2206 int
2207 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2208 {
2209 RF_ComponentLabel_t clabel;
2210 raidread_component_label(dev, b_vp, &clabel);
2211 clabel.mod_counter = mod_counter;
2212 clabel.clean = RF_RAID_CLEAN;
2213 raidwrite_component_label(dev, b_vp, &clabel);
2214 return(0);
2215 }
2216
2217
2218 int
2219 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2220 {
2221 RF_ComponentLabel_t clabel;
2222 raidread_component_label(dev, b_vp, &clabel);
2223 clabel.mod_counter = mod_counter;
2224 clabel.clean = RF_RAID_DIRTY;
2225 raidwrite_component_label(dev, b_vp, &clabel);
2226 return(0);
2227 }
2228
2229 /* ARGSUSED */
2230 int
2231 raidread_component_label(dev_t dev, struct vnode *b_vp,
2232 RF_ComponentLabel_t *clabel)
2233 {
2234 struct buf *bp;
2235 const struct bdevsw *bdev;
2236 int error;
2237
2238 /* XXX should probably ensure that we don't try to do this if
2239 someone has changed rf_protected_sectors. */
2240
2241 if (b_vp == NULL) {
2242 /* For whatever reason, this component is not valid.
2243 Don't try to read a component label from it. */
2244 return(EINVAL);
2245 }
2246
2247 /* get a block of the appropriate size... */
2248 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2249 bp->b_dev = dev;
2250
2251 /* get our ducks in a row for the read */
2252 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2253 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2254 bp->b_flags |= B_READ;
2255 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2256
2257 bdev = bdevsw_lookup(bp->b_dev);
2258 if (bdev == NULL)
2259 return (ENXIO);
2260 (*bdev->d_strategy)(bp);
2261
2262 error = biowait(bp);
2263
2264 if (!error) {
2265 memcpy(clabel, bp->b_data,
2266 sizeof(RF_ComponentLabel_t));
2267 }
2268
2269 brelse(bp);
2270 return(error);
2271 }
2272 /* ARGSUSED */
2273 int
2274 raidwrite_component_label(dev_t dev, struct vnode *b_vp,
2275 RF_ComponentLabel_t *clabel)
2276 {
2277 struct buf *bp;
2278 const struct bdevsw *bdev;
2279 int error;
2280
2281 /* get a block of the appropriate size... */
2282 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2283 bp->b_dev = dev;
2284
2285 /* get our ducks in a row for the write */
2286 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2287 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2288 bp->b_flags |= B_WRITE;
2289 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2290
2291 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2292
2293 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2294
2295 bdev = bdevsw_lookup(bp->b_dev);
2296 if (bdev == NULL)
2297 return (ENXIO);
2298 (*bdev->d_strategy)(bp);
2299 error = biowait(bp);
2300 brelse(bp);
2301 if (error) {
2302 #if 1
2303 printf("Failed to write RAID component info!\n");
2304 #endif
2305 }
2306
2307 return(error);
2308 }
2309
2310 void
2311 rf_markalldirty(RF_Raid_t *raidPtr)
2312 {
2313 RF_ComponentLabel_t clabel;
2314 int sparecol;
2315 int c;
2316 int j;
2317 int scol = -1;
2318
2319 raidPtr->mod_counter++;
2320 for (c = 0; c < raidPtr->numCol; c++) {
2321 /* we don't want to touch (at all) a disk that has
2322 failed */
2323 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2324 raidread_component_label(
2325 raidPtr->Disks[c].dev,
2326 raidPtr->raid_cinfo[c].ci_vp,
2327 &clabel);
2328 if (clabel.status == rf_ds_spared) {
2329 /* XXX do something special...
2330 but whatever you do, don't
2331 try to access it!! */
2332 } else {
2333 raidmarkdirty(
2334 raidPtr->Disks[c].dev,
2335 raidPtr->raid_cinfo[c].ci_vp,
2336 raidPtr->mod_counter);
2337 }
2338 }
2339 }
2340
2341 for( c = 0; c < raidPtr->numSpare ; c++) {
2342 sparecol = raidPtr->numCol + c;
2343 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2344 /*
2345
2346 we claim this disk is "optimal" if it's
2347 rf_ds_used_spare, as that means it should be
2348 directly substitutable for the disk it replaced.
2349 We note that too...
2350
2351 */
2352
2353 for(j=0;j<raidPtr->numCol;j++) {
2354 if (raidPtr->Disks[j].spareCol == sparecol) {
2355 scol = j;
2356 break;
2357 }
2358 }
2359
2360 raidread_component_label(
2361 raidPtr->Disks[sparecol].dev,
2362 raidPtr->raid_cinfo[sparecol].ci_vp,
2363 &clabel);
2364 /* make sure status is noted */
2365
2366 raid_init_component_label(raidPtr, &clabel);
2367
2368 clabel.row = 0;
2369 clabel.column = scol;
2370 /* Note: we *don't* change status from rf_ds_used_spare
2371 to rf_ds_optimal */
2372 /* clabel.status = rf_ds_optimal; */
2373
2374 raidmarkdirty(raidPtr->Disks[sparecol].dev,
2375 raidPtr->raid_cinfo[sparecol].ci_vp,
2376 raidPtr->mod_counter);
2377 }
2378 }
2379 }
2380
2381
2382 void
2383 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2384 {
2385 RF_ComponentLabel_t clabel;
2386 int sparecol;
2387 int c;
2388 int j;
2389 int scol;
2390
2391 scol = -1;
2392
2393 /* XXX should do extra checks to make sure things really are clean,
2394 rather than blindly setting the clean bit... */
2395
2396 raidPtr->mod_counter++;
2397
2398 for (c = 0; c < raidPtr->numCol; c++) {
2399 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2400 raidread_component_label(
2401 raidPtr->Disks[c].dev,
2402 raidPtr->raid_cinfo[c].ci_vp,
2403 &clabel);
2404 /* make sure status is noted */
2405 clabel.status = rf_ds_optimal;
2406
2407 /* bump the counter */
2408 clabel.mod_counter = raidPtr->mod_counter;
2409
2410 /* note what unit we are configured as */
2411 clabel.last_unit = raidPtr->raidid;
2412
2413 raidwrite_component_label(
2414 raidPtr->Disks[c].dev,
2415 raidPtr->raid_cinfo[c].ci_vp,
2416 &clabel);
2417 if (final == RF_FINAL_COMPONENT_UPDATE) {
2418 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2419 raidmarkclean(
2420 raidPtr->Disks[c].dev,
2421 raidPtr->raid_cinfo[c].ci_vp,
2422 raidPtr->mod_counter);
2423 }
2424 }
2425 }
2426 /* else we don't touch it.. */
2427 }
2428
2429 for( c = 0; c < raidPtr->numSpare ; c++) {
2430 sparecol = raidPtr->numCol + c;
2431 /* Need to ensure that the reconstruct actually completed! */
2432 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2433 /*
2434
2435 we claim this disk is "optimal" if it's
2436 rf_ds_used_spare, as that means it should be
2437 directly substitutable for the disk it replaced.
2438 We note that too...
2439
2440 */
2441
2442 for(j=0;j<raidPtr->numCol;j++) {
2443 if (raidPtr->Disks[j].spareCol == sparecol) {
2444 scol = j;
2445 break;
2446 }
2447 }
2448
2449 /* XXX shouldn't *really* need this... */
2450 raidread_component_label(
2451 raidPtr->Disks[sparecol].dev,
2452 raidPtr->raid_cinfo[sparecol].ci_vp,
2453 &clabel);
2454 /* make sure status is noted */
2455
2456 raid_init_component_label(raidPtr, &clabel);
2457
2458 clabel.mod_counter = raidPtr->mod_counter;
2459 clabel.column = scol;
2460 clabel.status = rf_ds_optimal;
2461 clabel.last_unit = raidPtr->raidid;
2462
2463 raidwrite_component_label(
2464 raidPtr->Disks[sparecol].dev,
2465 raidPtr->raid_cinfo[sparecol].ci_vp,
2466 &clabel);
2467 if (final == RF_FINAL_COMPONENT_UPDATE) {
2468 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2469 raidmarkclean( raidPtr->Disks[sparecol].dev,
2470 raidPtr->raid_cinfo[sparecol].ci_vp,
2471 raidPtr->mod_counter);
2472 }
2473 }
2474 }
2475 }
2476 }
2477
2478 void
2479 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2480 {
2481 struct proc *p;
2482 struct lwp *l;
2483
2484 p = raidPtr->engine_thread;
2485 l = LIST_FIRST(&p->p_lwps);
2486
2487 if (vp != NULL) {
2488 if (auto_configured == 1) {
2489 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2490 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2491 vput(vp);
2492
2493 } else {
2494 (void) vn_close(vp, FREAD | FWRITE, p->p_cred, l);
2495 }
2496 }
2497 }
2498
2499
2500 void
2501 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2502 {
2503 int r,c;
2504 struct vnode *vp;
2505 int acd;
2506
2507
2508 /* We take this opportunity to close the vnodes like we should.. */
2509
2510 for (c = 0; c < raidPtr->numCol; c++) {
2511 vp = raidPtr->raid_cinfo[c].ci_vp;
2512 acd = raidPtr->Disks[c].auto_configured;
2513 rf_close_component(raidPtr, vp, acd);
2514 raidPtr->raid_cinfo[c].ci_vp = NULL;
2515 raidPtr->Disks[c].auto_configured = 0;
2516 }
2517
2518 for (r = 0; r < raidPtr->numSpare; r++) {
2519 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2520 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2521 rf_close_component(raidPtr, vp, acd);
2522 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2523 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2524 }
2525 }
2526
2527
2528 void
2529 rf_ReconThread(struct rf_recon_req *req)
2530 {
2531 int s;
2532 RF_Raid_t *raidPtr;
2533
2534 s = splbio();
2535 raidPtr = (RF_Raid_t *) req->raidPtr;
2536 raidPtr->recon_in_progress = 1;
2537
2538 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2539 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2540
2541 RF_Free(req, sizeof(*req));
2542
2543 raidPtr->recon_in_progress = 0;
2544 splx(s);
2545
2546 /* That's all... */
2547 kthread_exit(0); /* does not return */
2548 }
2549
2550 void
2551 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2552 {
2553 int retcode;
2554 int s;
2555
2556 raidPtr->parity_rewrite_stripes_done = 0;
2557 raidPtr->parity_rewrite_in_progress = 1;
2558 s = splbio();
2559 retcode = rf_RewriteParity(raidPtr);
2560 splx(s);
2561 if (retcode) {
2562 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2563 } else {
2564 /* set the clean bit! If we shutdown correctly,
2565 the clean bit on each component label will get
2566 set */
2567 raidPtr->parity_good = RF_RAID_CLEAN;
2568 }
2569 raidPtr->parity_rewrite_in_progress = 0;
2570
2571 /* Anyone waiting for us to stop? If so, inform them... */
2572 if (raidPtr->waitShutdown) {
2573 wakeup(&raidPtr->parity_rewrite_in_progress);
2574 }
2575
2576 /* That's all... */
2577 kthread_exit(0); /* does not return */
2578 }
2579
2580
2581 void
2582 rf_CopybackThread(RF_Raid_t *raidPtr)
2583 {
2584 int s;
2585
2586 raidPtr->copyback_in_progress = 1;
2587 s = splbio();
2588 rf_CopybackReconstructedData(raidPtr);
2589 splx(s);
2590 raidPtr->copyback_in_progress = 0;
2591
2592 /* That's all... */
2593 kthread_exit(0); /* does not return */
2594 }
2595
2596
2597 void
2598 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
2599 {
2600 int s;
2601 RF_Raid_t *raidPtr;
2602
2603 s = splbio();
2604 raidPtr = req->raidPtr;
2605 raidPtr->recon_in_progress = 1;
2606 rf_ReconstructInPlace(raidPtr, req->col);
2607 RF_Free(req, sizeof(*req));
2608 raidPtr->recon_in_progress = 0;
2609 splx(s);
2610
2611 /* That's all... */
2612 kthread_exit(0); /* does not return */
2613 }
2614
2615 static RF_AutoConfig_t *
2616 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
2617 const char *cname, RF_SectorCount_t size)
2618 {
2619 int good_one = 0;
2620 RF_ComponentLabel_t *clabel;
2621 RF_AutoConfig_t *ac;
2622
2623 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
2624 if (clabel == NULL) {
2625 oomem:
2626 while(ac_list) {
2627 ac = ac_list;
2628 if (ac->clabel)
2629 free(ac->clabel, M_RAIDFRAME);
2630 ac_list = ac_list->next;
2631 free(ac, M_RAIDFRAME);
2632 }
2633 printf("RAID auto config: out of memory!\n");
2634 return NULL; /* XXX probably should panic? */
2635 }
2636
2637 if (!raidread_component_label(dev, vp, clabel)) {
2638 /* Got the label. Does it look reasonable? */
2639 if (rf_reasonable_label(clabel) &&
2640 (clabel->partitionSize <= size)) {
2641 #if DEBUG
2642 printf("Component on: %s: %llu\n",
2643 cname, (unsigned long long)size);
2644 rf_print_component_label(clabel);
2645 #endif
2646 /* if it's reasonable, add it, else ignore it. */
2647 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
2648 M_NOWAIT);
2649 if (ac == NULL) {
2650 free(clabel, M_RAIDFRAME);
2651 goto oomem;
2652 }
2653 strlcpy(ac->devname, cname, sizeof(ac->devname));
2654 ac->dev = dev;
2655 ac->vp = vp;
2656 ac->clabel = clabel;
2657 ac->next = ac_list;
2658 ac_list = ac;
2659 good_one = 1;
2660 }
2661 }
2662 if (!good_one) {
2663 /* cleanup */
2664 free(clabel, M_RAIDFRAME);
2665 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2666 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2667 vput(vp);
2668 }
2669 return ac_list;
2670 }
2671
2672 RF_AutoConfig_t *
2673 rf_find_raid_components()
2674 {
2675 struct vnode *vp;
2676 struct disklabel label;
2677 struct device *dv;
2678 dev_t dev;
2679 int bmajor, bminor, wedge;
2680 int error;
2681 int i;
2682 RF_AutoConfig_t *ac_list;
2683
2684
2685 /* initialize the AutoConfig list */
2686 ac_list = NULL;
2687
2688 /* we begin by trolling through *all* the devices on the system */
2689
2690 for (dv = alldevs.tqh_first; dv != NULL;
2691 dv = dv->dv_list.tqe_next) {
2692
2693 /* we are only interested in disks... */
2694 if (device_class(dv) != DV_DISK)
2695 continue;
2696
2697 /* we don't care about floppies... */
2698 if (device_is_a(dv, "fd")) {
2699 continue;
2700 }
2701
2702 /* we don't care about CD's... */
2703 if (device_is_a(dv, "cd")) {
2704 continue;
2705 }
2706
2707 /* hdfd is the Atari/Hades floppy driver */
2708 if (device_is_a(dv, "hdfd")) {
2709 continue;
2710 }
2711
2712 /* fdisa is the Atari/Milan floppy driver */
2713 if (device_is_a(dv, "fdisa")) {
2714 continue;
2715 }
2716
2717 /* need to find the device_name_to_block_device_major stuff */
2718 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
2719
2720 /* get a vnode for the raw partition of this disk */
2721
2722 wedge = device_is_a(dv, "dk");
2723 bminor = minor(device_unit(dv));
2724 dev = wedge ? makedev(bmajor, bminor) :
2725 MAKEDISKDEV(bmajor, bminor, RAW_PART);
2726 if (bdevvp(dev, &vp))
2727 panic("RAID can't alloc vnode");
2728
2729 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2730
2731 if (error) {
2732 /* "Who cares." Continue looking
2733 for something that exists*/
2734 vput(vp);
2735 continue;
2736 }
2737
2738 if (wedge) {
2739 struct dkwedge_info dkw;
2740 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
2741 NOCRED, 0);
2742 if (error) {
2743 printf("RAIDframe: can't get wedge info for "
2744 "dev %s (%d)\n", dv->dv_xname, error);
2745 out:
2746 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2747 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2748 vput(vp);
2749 continue;
2750 }
2751
2752 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0)
2753 goto out;
2754
2755 ac_list = rf_get_component(ac_list, dev, vp,
2756 dv->dv_xname, dkw.dkw_size);
2757 continue;
2758 }
2759
2760 /* Ok, the disk exists. Go get the disklabel. */
2761 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED, 0);
2762 if (error) {
2763 /*
2764 * XXX can't happen - open() would
2765 * have errored out (or faked up one)
2766 */
2767 if (error != ENOTTY)
2768 printf("RAIDframe: can't get label for dev "
2769 "%s (%d)\n", dv->dv_xname, error);
2770 }
2771
2772 /* don't need this any more. We'll allocate it again
2773 a little later if we really do... */
2774 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2775 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2776 vput(vp);
2777
2778 if (error)
2779 continue;
2780
2781 for (i = 0; i < label.d_npartitions; i++) {
2782 char cname[sizeof(ac_list->devname)];
2783
2784 /* We only support partitions marked as RAID */
2785 if (label.d_partitions[i].p_fstype != FS_RAID)
2786 continue;
2787
2788 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
2789 if (bdevvp(dev, &vp))
2790 panic("RAID can't alloc vnode");
2791
2792 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2793 if (error) {
2794 /* Whatever... */
2795 vput(vp);
2796 continue;
2797 }
2798 snprintf(cname, sizeof(cname), "%s%c",
2799 dv->dv_xname, 'a' + i);
2800 ac_list = rf_get_component(ac_list, dev, vp, cname,
2801 label.d_partitions[i].p_size);
2802 }
2803 }
2804 return ac_list;
2805 }
2806
2807
2808 static int
2809 rf_reasonable_label(RF_ComponentLabel_t *clabel)
2810 {
2811
2812 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2813 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2814 ((clabel->clean == RF_RAID_CLEAN) ||
2815 (clabel->clean == RF_RAID_DIRTY)) &&
2816 clabel->row >=0 &&
2817 clabel->column >= 0 &&
2818 clabel->num_rows > 0 &&
2819 clabel->num_columns > 0 &&
2820 clabel->row < clabel->num_rows &&
2821 clabel->column < clabel->num_columns &&
2822 clabel->blockSize > 0 &&
2823 clabel->numBlocks > 0) {
2824 /* label looks reasonable enough... */
2825 return(1);
2826 }
2827 return(0);
2828 }
2829
2830
2831 #if DEBUG
2832 void
2833 rf_print_component_label(RF_ComponentLabel_t *clabel)
2834 {
2835 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2836 clabel->row, clabel->column,
2837 clabel->num_rows, clabel->num_columns);
2838 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2839 clabel->version, clabel->serial_number,
2840 clabel->mod_counter);
2841 printf(" Clean: %s Status: %d\n",
2842 clabel->clean ? "Yes" : "No", clabel->status );
2843 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2844 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2845 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2846 (char) clabel->parityConfig, clabel->blockSize,
2847 clabel->numBlocks);
2848 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2849 printf(" Contains root partition: %s\n",
2850 clabel->root_partition ? "Yes" : "No" );
2851 printf(" Last configured as: raid%d\n", clabel->last_unit );
2852 #if 0
2853 printf(" Config order: %d\n", clabel->config_order);
2854 #endif
2855
2856 }
2857 #endif
2858
2859 RF_ConfigSet_t *
2860 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
2861 {
2862 RF_AutoConfig_t *ac;
2863 RF_ConfigSet_t *config_sets;
2864 RF_ConfigSet_t *cset;
2865 RF_AutoConfig_t *ac_next;
2866
2867
2868 config_sets = NULL;
2869
2870 /* Go through the AutoConfig list, and figure out which components
2871 belong to what sets. */
2872 ac = ac_list;
2873 while(ac!=NULL) {
2874 /* we're going to putz with ac->next, so save it here
2875 for use at the end of the loop */
2876 ac_next = ac->next;
2877
2878 if (config_sets == NULL) {
2879 /* will need at least this one... */
2880 config_sets = (RF_ConfigSet_t *)
2881 malloc(sizeof(RF_ConfigSet_t),
2882 M_RAIDFRAME, M_NOWAIT);
2883 if (config_sets == NULL) {
2884 panic("rf_create_auto_sets: No memory!");
2885 }
2886 /* this one is easy :) */
2887 config_sets->ac = ac;
2888 config_sets->next = NULL;
2889 config_sets->rootable = 0;
2890 ac->next = NULL;
2891 } else {
2892 /* which set does this component fit into? */
2893 cset = config_sets;
2894 while(cset!=NULL) {
2895 if (rf_does_it_fit(cset, ac)) {
2896 /* looks like it matches... */
2897 ac->next = cset->ac;
2898 cset->ac = ac;
2899 break;
2900 }
2901 cset = cset->next;
2902 }
2903 if (cset==NULL) {
2904 /* didn't find a match above... new set..*/
2905 cset = (RF_ConfigSet_t *)
2906 malloc(sizeof(RF_ConfigSet_t),
2907 M_RAIDFRAME, M_NOWAIT);
2908 if (cset == NULL) {
2909 panic("rf_create_auto_sets: No memory!");
2910 }
2911 cset->ac = ac;
2912 ac->next = NULL;
2913 cset->next = config_sets;
2914 cset->rootable = 0;
2915 config_sets = cset;
2916 }
2917 }
2918 ac = ac_next;
2919 }
2920
2921
2922 return(config_sets);
2923 }
2924
2925 static int
2926 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
2927 {
2928 RF_ComponentLabel_t *clabel1, *clabel2;
2929
2930 /* If this one matches the *first* one in the set, that's good
2931 enough, since the other members of the set would have been
2932 through here too... */
2933 /* note that we are not checking partitionSize here..
2934
2935 Note that we are also not checking the mod_counters here.
2936 If everything else matches execpt the mod_counter, that's
2937 good enough for this test. We will deal with the mod_counters
2938 a little later in the autoconfiguration process.
2939
2940 (clabel1->mod_counter == clabel2->mod_counter) &&
2941
2942 The reason we don't check for this is that failed disks
2943 will have lower modification counts. If those disks are
2944 not added to the set they used to belong to, then they will
2945 form their own set, which may result in 2 different sets,
2946 for example, competing to be configured at raid0, and
2947 perhaps competing to be the root filesystem set. If the
2948 wrong ones get configured, or both attempt to become /,
2949 weird behaviour and or serious lossage will occur. Thus we
2950 need to bring them into the fold here, and kick them out at
2951 a later point.
2952
2953 */
2954
2955 clabel1 = cset->ac->clabel;
2956 clabel2 = ac->clabel;
2957 if ((clabel1->version == clabel2->version) &&
2958 (clabel1->serial_number == clabel2->serial_number) &&
2959 (clabel1->num_rows == clabel2->num_rows) &&
2960 (clabel1->num_columns == clabel2->num_columns) &&
2961 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2962 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2963 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2964 (clabel1->parityConfig == clabel2->parityConfig) &&
2965 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2966 (clabel1->blockSize == clabel2->blockSize) &&
2967 (clabel1->numBlocks == clabel2->numBlocks) &&
2968 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2969 (clabel1->root_partition == clabel2->root_partition) &&
2970 (clabel1->last_unit == clabel2->last_unit) &&
2971 (clabel1->config_order == clabel2->config_order)) {
2972 /* if it get's here, it almost *has* to be a match */
2973 } else {
2974 /* it's not consistent with somebody in the set..
2975 punt */
2976 return(0);
2977 }
2978 /* all was fine.. it must fit... */
2979 return(1);
2980 }
2981
2982 int
2983 rf_have_enough_components(RF_ConfigSet_t *cset)
2984 {
2985 RF_AutoConfig_t *ac;
2986 RF_AutoConfig_t *auto_config;
2987 RF_ComponentLabel_t *clabel;
2988 int c;
2989 int num_cols;
2990 int num_missing;
2991 int mod_counter;
2992 int mod_counter_found;
2993 int even_pair_failed;
2994 char parity_type;
2995
2996
2997 /* check to see that we have enough 'live' components
2998 of this set. If so, we can configure it if necessary */
2999
3000 num_cols = cset->ac->clabel->num_columns;
3001 parity_type = cset->ac->clabel->parityConfig;
3002
3003 /* XXX Check for duplicate components!?!?!? */
3004
3005 /* Determine what the mod_counter is supposed to be for this set. */
3006
3007 mod_counter_found = 0;
3008 mod_counter = 0;
3009 ac = cset->ac;
3010 while(ac!=NULL) {
3011 if (mod_counter_found==0) {
3012 mod_counter = ac->clabel->mod_counter;
3013 mod_counter_found = 1;
3014 } else {
3015 if (ac->clabel->mod_counter > mod_counter) {
3016 mod_counter = ac->clabel->mod_counter;
3017 }
3018 }
3019 ac = ac->next;
3020 }
3021
3022 num_missing = 0;
3023 auto_config = cset->ac;
3024
3025 even_pair_failed = 0;
3026 for(c=0; c<num_cols; c++) {
3027 ac = auto_config;
3028 while(ac!=NULL) {
3029 if ((ac->clabel->column == c) &&
3030 (ac->clabel->mod_counter == mod_counter)) {
3031 /* it's this one... */
3032 #if DEBUG
3033 printf("Found: %s at %d\n",
3034 ac->devname,c);
3035 #endif
3036 break;
3037 }
3038 ac=ac->next;
3039 }
3040 if (ac==NULL) {
3041 /* Didn't find one here! */
3042 /* special case for RAID 1, especially
3043 where there are more than 2
3044 components (where RAIDframe treats
3045 things a little differently :( ) */
3046 if (parity_type == '1') {
3047 if (c%2 == 0) { /* even component */
3048 even_pair_failed = 1;
3049 } else { /* odd component. If
3050 we're failed, and
3051 so is the even
3052 component, it's
3053 "Good Night, Charlie" */
3054 if (even_pair_failed == 1) {
3055 return(0);
3056 }
3057 }
3058 } else {
3059 /* normal accounting */
3060 num_missing++;
3061 }
3062 }
3063 if ((parity_type == '1') && (c%2 == 1)) {
3064 /* Just did an even component, and we didn't
3065 bail.. reset the even_pair_failed flag,
3066 and go on to the next component.... */
3067 even_pair_failed = 0;
3068 }
3069 }
3070
3071 clabel = cset->ac->clabel;
3072
3073 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3074 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3075 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3076 /* XXX this needs to be made *much* more general */
3077 /* Too many failures */
3078 return(0);
3079 }
3080 /* otherwise, all is well, and we've got enough to take a kick
3081 at autoconfiguring this set */
3082 return(1);
3083 }
3084
3085 void
3086 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3087 RF_Raid_t *raidPtr)
3088 {
3089 RF_ComponentLabel_t *clabel;
3090 int i;
3091
3092 clabel = ac->clabel;
3093
3094 /* 1. Fill in the common stuff */
3095 config->numRow = clabel->num_rows = 1;
3096 config->numCol = clabel->num_columns;
3097 config->numSpare = 0; /* XXX should this be set here? */
3098 config->sectPerSU = clabel->sectPerSU;
3099 config->SUsPerPU = clabel->SUsPerPU;
3100 config->SUsPerRU = clabel->SUsPerRU;
3101 config->parityConfig = clabel->parityConfig;
3102 /* XXX... */
3103 strcpy(config->diskQueueType,"fifo");
3104 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3105 config->layoutSpecificSize = 0; /* XXX ?? */
3106
3107 while(ac!=NULL) {
3108 /* row/col values will be in range due to the checks
3109 in reasonable_label() */
3110 strcpy(config->devnames[0][ac->clabel->column],
3111 ac->devname);
3112 ac = ac->next;
3113 }
3114
3115 for(i=0;i<RF_MAXDBGV;i++) {
3116 config->debugVars[i][0] = 0;
3117 }
3118 }
3119
3120 int
3121 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3122 {
3123 RF_ComponentLabel_t clabel;
3124 struct vnode *vp;
3125 dev_t dev;
3126 int column;
3127 int sparecol;
3128
3129 raidPtr->autoconfigure = new_value;
3130
3131 for(column=0; column<raidPtr->numCol; column++) {
3132 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3133 dev = raidPtr->Disks[column].dev;
3134 vp = raidPtr->raid_cinfo[column].ci_vp;
3135 raidread_component_label(dev, vp, &clabel);
3136 clabel.autoconfigure = new_value;
3137 raidwrite_component_label(dev, vp, &clabel);
3138 }
3139 }
3140 for(column = 0; column < raidPtr->numSpare ; column++) {
3141 sparecol = raidPtr->numCol + column;
3142 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3143 dev = raidPtr->Disks[sparecol].dev;
3144 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3145 raidread_component_label(dev, vp, &clabel);
3146 clabel.autoconfigure = new_value;
3147 raidwrite_component_label(dev, vp, &clabel);
3148 }
3149 }
3150 return(new_value);
3151 }
3152
3153 int
3154 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3155 {
3156 RF_ComponentLabel_t clabel;
3157 struct vnode *vp;
3158 dev_t dev;
3159 int column;
3160 int sparecol;
3161
3162 raidPtr->root_partition = new_value;
3163 for(column=0; column<raidPtr->numCol; column++) {
3164 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3165 dev = raidPtr->Disks[column].dev;
3166 vp = raidPtr->raid_cinfo[column].ci_vp;
3167 raidread_component_label(dev, vp, &clabel);
3168 clabel.root_partition = new_value;
3169 raidwrite_component_label(dev, vp, &clabel);
3170 }
3171 }
3172 for(column = 0; column < raidPtr->numSpare ; column++) {
3173 sparecol = raidPtr->numCol + column;
3174 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3175 dev = raidPtr->Disks[sparecol].dev;
3176 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3177 raidread_component_label(dev, vp, &clabel);
3178 clabel.root_partition = new_value;
3179 raidwrite_component_label(dev, vp, &clabel);
3180 }
3181 }
3182 return(new_value);
3183 }
3184
3185 void
3186 rf_release_all_vps(RF_ConfigSet_t *cset)
3187 {
3188 RF_AutoConfig_t *ac;
3189
3190 ac = cset->ac;
3191 while(ac!=NULL) {
3192 /* Close the vp, and give it back */
3193 if (ac->vp) {
3194 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3195 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3196 vput(ac->vp);
3197 ac->vp = NULL;
3198 }
3199 ac = ac->next;
3200 }
3201 }
3202
3203
3204 void
3205 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3206 {
3207 RF_AutoConfig_t *ac;
3208 RF_AutoConfig_t *next_ac;
3209
3210 ac = cset->ac;
3211 while(ac!=NULL) {
3212 next_ac = ac->next;
3213 /* nuke the label */
3214 free(ac->clabel, M_RAIDFRAME);
3215 /* cleanup the config structure */
3216 free(ac, M_RAIDFRAME);
3217 /* "next.." */
3218 ac = next_ac;
3219 }
3220 /* and, finally, nuke the config set */
3221 free(cset, M_RAIDFRAME);
3222 }
3223
3224
3225 void
3226 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3227 {
3228 /* current version number */
3229 clabel->version = RF_COMPONENT_LABEL_VERSION;
3230 clabel->serial_number = raidPtr->serial_number;
3231 clabel->mod_counter = raidPtr->mod_counter;
3232 clabel->num_rows = 1;
3233 clabel->num_columns = raidPtr->numCol;
3234 clabel->clean = RF_RAID_DIRTY; /* not clean */
3235 clabel->status = rf_ds_optimal; /* "It's good!" */
3236
3237 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3238 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3239 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3240
3241 clabel->blockSize = raidPtr->bytesPerSector;
3242 clabel->numBlocks = raidPtr->sectorsPerDisk;
3243
3244 /* XXX not portable */
3245 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3246 clabel->maxOutstanding = raidPtr->maxOutstanding;
3247 clabel->autoconfigure = raidPtr->autoconfigure;
3248 clabel->root_partition = raidPtr->root_partition;
3249 clabel->last_unit = raidPtr->raidid;
3250 clabel->config_order = raidPtr->config_order;
3251 }
3252
3253 int
3254 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
3255 {
3256 RF_Raid_t *raidPtr;
3257 RF_Config_t *config;
3258 int raidID;
3259 int retcode;
3260
3261 #if DEBUG
3262 printf("RAID autoconfigure\n");
3263 #endif
3264
3265 retcode = 0;
3266 *unit = -1;
3267
3268 /* 1. Create a config structure */
3269
3270 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3271 M_RAIDFRAME,
3272 M_NOWAIT);
3273 if (config==NULL) {
3274 printf("Out of mem!?!?\n");
3275 /* XXX do something more intelligent here. */
3276 return(1);
3277 }
3278
3279 memset(config, 0, sizeof(RF_Config_t));
3280
3281 /*
3282 2. Figure out what RAID ID this one is supposed to live at
3283 See if we can get the same RAID dev that it was configured
3284 on last time..
3285 */
3286
3287 raidID = cset->ac->clabel->last_unit;
3288 if ((raidID < 0) || (raidID >= numraid)) {
3289 /* let's not wander off into lala land. */
3290 raidID = numraid - 1;
3291 }
3292 if (raidPtrs[raidID]->valid != 0) {
3293
3294 /*
3295 Nope... Go looking for an alternative...
3296 Start high so we don't immediately use raid0 if that's
3297 not taken.
3298 */
3299
3300 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3301 if (raidPtrs[raidID]->valid == 0) {
3302 /* can use this one! */
3303 break;
3304 }
3305 }
3306 }
3307
3308 if (raidID < 0) {
3309 /* punt... */
3310 printf("Unable to auto configure this set!\n");
3311 printf("(Out of RAID devs!)\n");
3312 free(config, M_RAIDFRAME);
3313 return(1);
3314 }
3315
3316 #if DEBUG
3317 printf("Configuring raid%d:\n",raidID);
3318 #endif
3319
3320 raidPtr = raidPtrs[raidID];
3321
3322 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3323 raidPtr->raidid = raidID;
3324 raidPtr->openings = RAIDOUTSTANDING;
3325
3326 /* 3. Build the configuration structure */
3327 rf_create_configuration(cset->ac, config, raidPtr);
3328
3329 /* 4. Do the configuration */
3330 retcode = rf_Configure(raidPtr, config, cset->ac);
3331
3332 if (retcode == 0) {
3333
3334 raidinit(raidPtrs[raidID]);
3335
3336 rf_markalldirty(raidPtrs[raidID]);
3337 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3338 if (cset->ac->clabel->root_partition==1) {
3339 /* everything configured just fine. Make a note
3340 that this set is eligible to be root. */
3341 cset->rootable = 1;
3342 /* XXX do this here? */
3343 raidPtrs[raidID]->root_partition = 1;
3344 }
3345 }
3346
3347 /* 5. Cleanup */
3348 free(config, M_RAIDFRAME);
3349
3350 *unit = raidID;
3351 return(retcode);
3352 }
3353
3354 void
3355 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3356 {
3357 struct buf *bp;
3358
3359 bp = (struct buf *)desc->bp;
3360 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3361 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
3362 }
3363
3364 void
3365 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3366 size_t xmin, size_t xmax)
3367 {
3368 pool_init(p, size, 0, 0, 0, w_chan, NULL);
3369 pool_sethiwat(p, xmax);
3370 pool_prime(p, xmin);
3371 pool_setlowat(p, xmin);
3372 }
3373
3374 /*
3375 * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see
3376 * if there is IO pending and if that IO could possibly be done for a
3377 * given RAID set. Returns 0 if IO is waiting and can be done, 1
3378 * otherwise.
3379 *
3380 */
3381
3382 int
3383 rf_buf_queue_check(int raidid)
3384 {
3385 if ((BUFQ_PEEK(raid_softc[raidid].buf_queue) != NULL) &&
3386 raidPtrs[raidid]->openings > 0) {
3387 /* there is work to do */
3388 return 0;
3389 }
3390 /* default is nothing to do */
3391 return 1;
3392 }
3393
3394 int
3395 rf_getdisksize(struct vnode *vp, struct lwp *l, RF_RaidDisk_t *diskPtr)
3396 {
3397 struct partinfo dpart;
3398 struct dkwedge_info dkw;
3399 int error;
3400
3401 error = VOP_IOCTL(vp, DIOCGPART, &dpart, FREAD, l->l_cred, l);
3402 if (error == 0) {
3403 diskPtr->blockSize = dpart.disklab->d_secsize;
3404 diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
3405 diskPtr->partitionSize = dpart.part->p_size;
3406 return 0;
3407 }
3408
3409 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD, l->l_cred, l);
3410 if (error == 0) {
3411 diskPtr->blockSize = 512; /* XXX */
3412 diskPtr->numBlocks = dkw.dkw_size - rf_protectedSectors;
3413 diskPtr->partitionSize = dkw.dkw_size;
3414 return 0;
3415 }
3416 return error;
3417 }
3418
3419 static int
3420 raid_match(struct device *self, struct cfdata *cfdata,
3421 void *aux)
3422 {
3423 return 1;
3424 }
3425
3426 static void
3427 raid_attach(struct device *parent, struct device *self,
3428 void *aux)
3429 {
3430
3431 }
3432
3433
3434 static int
3435 raid_detach(struct device *self, int flags)
3436 {
3437 struct raid_softc *rs = (struct raid_softc *)self;
3438
3439 if (rs->sc_flags & RAIDF_INITED)
3440 return EBUSY;
3441
3442 return 0;
3443 }
3444
3445
3446