rf_netbsdkintf.c revision 1.199 1 /* $NetBSD: rf_netbsdkintf.c,v 1.199 2006/01/08 22:26:30 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1990, 1993
40 * The Regents of the University of California. All rights reserved.
41 *
42 * This code is derived from software contributed to Berkeley by
43 * the Systems Programming Group of the University of Utah Computer
44 * Science Department.
45 *
46 * Redistribution and use in source and binary forms, with or without
47 * modification, are permitted provided that the following conditions
48 * are met:
49 * 1. Redistributions of source code must retain the above copyright
50 * notice, this list of conditions and the following disclaimer.
51 * 2. Redistributions in binary form must reproduce the above copyright
52 * notice, this list of conditions and the following disclaimer in the
53 * documentation and/or other materials provided with the distribution.
54 * 3. Neither the name of the University nor the names of its contributors
55 * may be used to endorse or promote products derived from this software
56 * without specific prior written permission.
57 *
58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE.
69 *
70 * from: Utah $Hdr: cd.c 1.6 90/11/28$
71 *
72 * @(#)cd.c 8.2 (Berkeley) 11/16/93
73 */
74
75 /*
76 * Copyright (c) 1988 University of Utah.
77 *
78 * This code is derived from software contributed to Berkeley by
79 * the Systems Programming Group of the University of Utah Computer
80 * Science Department.
81 *
82 * Redistribution and use in source and binary forms, with or without
83 * modification, are permitted provided that the following conditions
84 * are met:
85 * 1. Redistributions of source code must retain the above copyright
86 * notice, this list of conditions and the following disclaimer.
87 * 2. Redistributions in binary form must reproduce the above copyright
88 * notice, this list of conditions and the following disclaimer in the
89 * documentation and/or other materials provided with the distribution.
90 * 3. All advertising materials mentioning features or use of this software
91 * must display the following acknowledgement:
92 * This product includes software developed by the University of
93 * California, Berkeley and its contributors.
94 * 4. Neither the name of the University nor the names of its contributors
95 * may be used to endorse or promote products derived from this software
96 * without specific prior written permission.
97 *
98 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
99 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
100 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
101 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
102 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
103 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
104 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
105 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
106 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
107 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
108 * SUCH DAMAGE.
109 *
110 * from: Utah $Hdr: cd.c 1.6 90/11/28$
111 *
112 * @(#)cd.c 8.2 (Berkeley) 11/16/93
113 */
114
115 /*
116 * Copyright (c) 1995 Carnegie-Mellon University.
117 * All rights reserved.
118 *
119 * Authors: Mark Holland, Jim Zelenka
120 *
121 * Permission to use, copy, modify and distribute this software and
122 * its documentation is hereby granted, provided that both the copyright
123 * notice and this permission notice appear in all copies of the
124 * software, derivative works or modified versions, and any portions
125 * thereof, and that both notices appear in supporting documentation.
126 *
127 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
128 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
129 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
130 *
131 * Carnegie Mellon requests users of this software to return to
132 *
133 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
134 * School of Computer Science
135 * Carnegie Mellon University
136 * Pittsburgh PA 15213-3890
137 *
138 * any improvements or extensions that they make and grant Carnegie the
139 * rights to redistribute these changes.
140 */
141
142 /***********************************************************
143 *
144 * rf_kintf.c -- the kernel interface routines for RAIDframe
145 *
146 ***********************************************************/
147
148 #include <sys/cdefs.h>
149 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.199 2006/01/08 22:26:30 oster Exp $");
150
151 #include <sys/param.h>
152 #include <sys/errno.h>
153 #include <sys/pool.h>
154 #include <sys/proc.h>
155 #include <sys/queue.h>
156 #include <sys/disk.h>
157 #include <sys/device.h>
158 #include <sys/stat.h>
159 #include <sys/ioctl.h>
160 #include <sys/fcntl.h>
161 #include <sys/systm.h>
162 #include <sys/namei.h>
163 #include <sys/vnode.h>
164 #include <sys/disklabel.h>
165 #include <sys/conf.h>
166 #include <sys/lock.h>
167 #include <sys/buf.h>
168 #include <sys/bufq.h>
169 #include <sys/user.h>
170 #include <sys/reboot.h>
171
172 #include <dev/raidframe/raidframevar.h>
173 #include <dev/raidframe/raidframeio.h>
174 #include "raid.h"
175 #include "opt_raid_autoconfig.h"
176 #include "rf_raid.h"
177 #include "rf_copyback.h"
178 #include "rf_dag.h"
179 #include "rf_dagflags.h"
180 #include "rf_desc.h"
181 #include "rf_diskqueue.h"
182 #include "rf_etimer.h"
183 #include "rf_general.h"
184 #include "rf_kintf.h"
185 #include "rf_options.h"
186 #include "rf_driver.h"
187 #include "rf_parityscan.h"
188 #include "rf_threadstuff.h"
189
190 #ifdef DEBUG
191 int rf_kdebug_level = 0;
192 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
193 #else /* DEBUG */
194 #define db1_printf(a) { }
195 #endif /* DEBUG */
196
197 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
198
199 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
200
201 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
202 * spare table */
203 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
204 * installation process */
205
206 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
207
208 /* prototypes */
209 static void KernelWakeupFunc(struct buf *);
210 static void InitBP(struct buf *, struct vnode *, unsigned,
211 dev_t, RF_SectorNum_t, RF_SectorCount_t, caddr_t, void (*) (struct buf *),
212 void *, int, struct proc *);
213 static void raidinit(RF_Raid_t *);
214
215 void raidattach(int);
216
217 dev_type_open(raidopen);
218 dev_type_close(raidclose);
219 dev_type_read(raidread);
220 dev_type_write(raidwrite);
221 dev_type_ioctl(raidioctl);
222 dev_type_strategy(raidstrategy);
223 dev_type_dump(raiddump);
224 dev_type_size(raidsize);
225
226 const struct bdevsw raid_bdevsw = {
227 raidopen, raidclose, raidstrategy, raidioctl,
228 raiddump, raidsize, D_DISK
229 };
230
231 const struct cdevsw raid_cdevsw = {
232 raidopen, raidclose, raidread, raidwrite, raidioctl,
233 nostop, notty, nopoll, nommap, nokqfilter, D_DISK
234 };
235
236 /* XXX Not sure if the following should be replacing the raidPtrs above,
237 or if it should be used in conjunction with that...
238 */
239
240 struct raid_softc {
241 int sc_flags; /* flags */
242 int sc_cflags; /* configuration flags */
243 size_t sc_size; /* size of the raid device */
244 char sc_xname[20]; /* XXX external name */
245 struct disk sc_dkdev; /* generic disk device info */
246 struct bufq_state *buf_queue; /* used for the device queue */
247 };
248 /* sc_flags */
249 #define RAIDF_INITED 0x01 /* unit has been initialized */
250 #define RAIDF_WLABEL 0x02 /* label area is writable */
251 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
252 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
253 #define RAIDF_LOCKED 0x80 /* unit is locked */
254
255 #define raidunit(x) DISKUNIT(x)
256 int numraid = 0;
257
258 /*
259 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
260 * Be aware that large numbers can allow the driver to consume a lot of
261 * kernel memory, especially on writes, and in degraded mode reads.
262 *
263 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
264 * a single 64K write will typically require 64K for the old data,
265 * 64K for the old parity, and 64K for the new parity, for a total
266 * of 192K (if the parity buffer is not re-used immediately).
267 * Even it if is used immediately, that's still 128K, which when multiplied
268 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
269 *
270 * Now in degraded mode, for example, a 64K read on the above setup may
271 * require data reconstruction, which will require *all* of the 4 remaining
272 * disks to participate -- 4 * 32K/disk == 128K again.
273 */
274
275 #ifndef RAIDOUTSTANDING
276 #define RAIDOUTSTANDING 6
277 #endif
278
279 #define RAIDLABELDEV(dev) \
280 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
281
282 /* declared here, and made public, for the benefit of KVM stuff.. */
283 struct raid_softc *raid_softc;
284
285 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
286 struct disklabel *);
287 static void raidgetdisklabel(dev_t);
288 static void raidmakedisklabel(struct raid_softc *);
289
290 static int raidlock(struct raid_softc *);
291 static void raidunlock(struct raid_softc *);
292
293 static void rf_markalldirty(RF_Raid_t *);
294
295 struct device *raidrootdev;
296
297 void rf_ReconThread(struct rf_recon_req *);
298 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
299 void rf_CopybackThread(RF_Raid_t *raidPtr);
300 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
301 int rf_autoconfig(struct device *self);
302 void rf_buildroothack(RF_ConfigSet_t *);
303
304 RF_AutoConfig_t *rf_find_raid_components(void);
305 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
306 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
307 static int rf_reasonable_label(RF_ComponentLabel_t *);
308 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
309 int rf_set_autoconfig(RF_Raid_t *, int);
310 int rf_set_rootpartition(RF_Raid_t *, int);
311 void rf_release_all_vps(RF_ConfigSet_t *);
312 void rf_cleanup_config_set(RF_ConfigSet_t *);
313 int rf_have_enough_components(RF_ConfigSet_t *);
314 int rf_auto_config_set(RF_ConfigSet_t *, int *);
315
316 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
317 allow autoconfig to take place.
318 Note that this is overridden by having
319 RAID_AUTOCONFIG as an option in the
320 kernel config file. */
321
322 struct RF_Pools_s rf_pools;
323
324 void
325 raidattach(int num)
326 {
327 int raidID;
328 int i, rc;
329
330 #ifdef DEBUG
331 printf("raidattach: Asked for %d units\n", num);
332 #endif
333
334 if (num <= 0) {
335 #ifdef DIAGNOSTIC
336 panic("raidattach: count <= 0");
337 #endif
338 return;
339 }
340 /* This is where all the initialization stuff gets done. */
341
342 numraid = num;
343
344 /* Make some space for requested number of units... */
345
346 RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
347 if (raidPtrs == NULL) {
348 panic("raidPtrs is NULL!!");
349 }
350
351 rf_mutex_init(&rf_sparet_wait_mutex);
352
353 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
354
355 for (i = 0; i < num; i++)
356 raidPtrs[i] = NULL;
357 rc = rf_BootRaidframe();
358 if (rc == 0)
359 printf("Kernelized RAIDframe activated\n");
360 else
361 panic("Serious error booting RAID!!");
362
363 /* put together some datastructures like the CCD device does.. This
364 * lets us lock the device and what-not when it gets opened. */
365
366 raid_softc = (struct raid_softc *)
367 malloc(num * sizeof(struct raid_softc),
368 M_RAIDFRAME, M_NOWAIT);
369 if (raid_softc == NULL) {
370 printf("WARNING: no memory for RAIDframe driver\n");
371 return;
372 }
373
374 memset(raid_softc, 0, num * sizeof(struct raid_softc));
375
376 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
377 M_RAIDFRAME, M_NOWAIT);
378 if (raidrootdev == NULL) {
379 panic("No memory for RAIDframe driver!!?!?!");
380 }
381
382 for (raidID = 0; raidID < num; raidID++) {
383 bufq_alloc(&raid_softc[raidID].buf_queue, "fcfs", 0);
384 pseudo_disk_init(&raid_softc[raidID].sc_dkdev);
385
386 raidrootdev[raidID].dv_class = DV_DISK;
387 raidrootdev[raidID].dv_cfdata = NULL;
388 raidrootdev[raidID].dv_unit = raidID;
389 raidrootdev[raidID].dv_parent = NULL;
390 raidrootdev[raidID].dv_flags = 0;
391 snprintf(raidrootdev[raidID].dv_xname,
392 sizeof(raidrootdev[raidID].dv_xname), "raid%d", raidID);
393
394 RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
395 (RF_Raid_t *));
396 if (raidPtrs[raidID] == NULL) {
397 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
398 numraid = raidID;
399 return;
400 }
401 }
402
403 #ifdef RAID_AUTOCONFIG
404 raidautoconfig = 1;
405 #endif
406
407 /*
408 * Register a finalizer which will be used to auto-config RAID
409 * sets once all real hardware devices have been found.
410 */
411 if (config_finalize_register(NULL, rf_autoconfig) != 0)
412 printf("WARNING: unable to register RAIDframe finalizer\n");
413 }
414
415 int
416 rf_autoconfig(struct device *self)
417 {
418 RF_AutoConfig_t *ac_list;
419 RF_ConfigSet_t *config_sets;
420
421 if (raidautoconfig == 0)
422 return (0);
423
424 /* XXX This code can only be run once. */
425 raidautoconfig = 0;
426
427 /* 1. locate all RAID components on the system */
428 #ifdef DEBUG
429 printf("Searching for RAID components...\n");
430 #endif
431 ac_list = rf_find_raid_components();
432
433 /* 2. Sort them into their respective sets. */
434 config_sets = rf_create_auto_sets(ac_list);
435
436 /*
437 * 3. Evaluate each set andconfigure the valid ones.
438 * This gets done in rf_buildroothack().
439 */
440 rf_buildroothack(config_sets);
441
442 return (1);
443 }
444
445 void
446 rf_buildroothack(RF_ConfigSet_t *config_sets)
447 {
448 RF_ConfigSet_t *cset;
449 RF_ConfigSet_t *next_cset;
450 int retcode;
451 int raidID;
452 int rootID;
453 int num_root;
454
455 rootID = 0;
456 num_root = 0;
457 cset = config_sets;
458 while(cset != NULL ) {
459 next_cset = cset->next;
460 if (rf_have_enough_components(cset) &&
461 cset->ac->clabel->autoconfigure==1) {
462 retcode = rf_auto_config_set(cset,&raidID);
463 if (!retcode) {
464 if (cset->rootable) {
465 rootID = raidID;
466 num_root++;
467 }
468 } else {
469 /* The autoconfig didn't work :( */
470 #if DEBUG
471 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
472 #endif
473 rf_release_all_vps(cset);
474 }
475 } else {
476 /* we're not autoconfiguring this set...
477 release the associated resources */
478 rf_release_all_vps(cset);
479 }
480 /* cleanup */
481 rf_cleanup_config_set(cset);
482 cset = next_cset;
483 }
484
485 /* we found something bootable... */
486
487 if (num_root == 1) {
488 booted_device = &raidrootdev[rootID];
489 } else if (num_root > 1) {
490 /* we can't guess.. require the user to answer... */
491 boothowto |= RB_ASKNAME;
492 }
493 }
494
495
496 int
497 raidsize(dev_t dev)
498 {
499 struct raid_softc *rs;
500 struct disklabel *lp;
501 int part, unit, omask, size;
502
503 unit = raidunit(dev);
504 if (unit >= numraid)
505 return (-1);
506 rs = &raid_softc[unit];
507
508 if ((rs->sc_flags & RAIDF_INITED) == 0)
509 return (-1);
510
511 part = DISKPART(dev);
512 omask = rs->sc_dkdev.dk_openmask & (1 << part);
513 lp = rs->sc_dkdev.dk_label;
514
515 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
516 return (-1);
517
518 if (lp->d_partitions[part].p_fstype != FS_SWAP)
519 size = -1;
520 else
521 size = lp->d_partitions[part].p_size *
522 (lp->d_secsize / DEV_BSIZE);
523
524 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
525 return (-1);
526
527 return (size);
528
529 }
530
531 int
532 raiddump(dev_t dev, daddr_t blkno, caddr_t va, size_t size)
533 {
534 /* Not implemented. */
535 return ENXIO;
536 }
537 /* ARGSUSED */
538 int
539 raidopen(dev_t dev, int flags, int fmt, struct lwp *l)
540 {
541 int unit = raidunit(dev);
542 struct raid_softc *rs;
543 struct disklabel *lp;
544 int part, pmask;
545 int error = 0;
546
547 if (unit >= numraid)
548 return (ENXIO);
549 rs = &raid_softc[unit];
550
551 if ((error = raidlock(rs)) != 0)
552 return (error);
553 lp = rs->sc_dkdev.dk_label;
554
555 part = DISKPART(dev);
556 pmask = (1 << part);
557
558 if ((rs->sc_flags & RAIDF_INITED) &&
559 (rs->sc_dkdev.dk_openmask == 0))
560 raidgetdisklabel(dev);
561
562 /* make sure that this partition exists */
563
564 if (part != RAW_PART) {
565 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
566 ((part >= lp->d_npartitions) ||
567 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
568 error = ENXIO;
569 raidunlock(rs);
570 return (error);
571 }
572 }
573 /* Prevent this unit from being unconfigured while open. */
574 switch (fmt) {
575 case S_IFCHR:
576 rs->sc_dkdev.dk_copenmask |= pmask;
577 break;
578
579 case S_IFBLK:
580 rs->sc_dkdev.dk_bopenmask |= pmask;
581 break;
582 }
583
584 if ((rs->sc_dkdev.dk_openmask == 0) &&
585 ((rs->sc_flags & RAIDF_INITED) != 0)) {
586 /* First one... mark things as dirty... Note that we *MUST*
587 have done a configure before this. I DO NOT WANT TO BE
588 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
589 THAT THEY BELONG TOGETHER!!!!! */
590 /* XXX should check to see if we're only open for reading
591 here... If so, we needn't do this, but then need some
592 other way of keeping track of what's happened.. */
593
594 rf_markalldirty( raidPtrs[unit] );
595 }
596
597
598 rs->sc_dkdev.dk_openmask =
599 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
600
601 raidunlock(rs);
602
603 return (error);
604
605
606 }
607 /* ARGSUSED */
608 int
609 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
610 {
611 int unit = raidunit(dev);
612 struct raid_softc *rs;
613 int error = 0;
614 int part;
615
616 if (unit >= numraid)
617 return (ENXIO);
618 rs = &raid_softc[unit];
619
620 if ((error = raidlock(rs)) != 0)
621 return (error);
622
623 part = DISKPART(dev);
624
625 /* ...that much closer to allowing unconfiguration... */
626 switch (fmt) {
627 case S_IFCHR:
628 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
629 break;
630
631 case S_IFBLK:
632 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
633 break;
634 }
635 rs->sc_dkdev.dk_openmask =
636 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
637
638 if ((rs->sc_dkdev.dk_openmask == 0) &&
639 ((rs->sc_flags & RAIDF_INITED) != 0)) {
640 /* Last one... device is not unconfigured yet.
641 Device shutdown has taken care of setting the
642 clean bits if RAIDF_INITED is not set
643 mark things as clean... */
644
645 rf_update_component_labels(raidPtrs[unit],
646 RF_FINAL_COMPONENT_UPDATE);
647 if (doing_shutdown) {
648 /* last one, and we're going down, so
649 lights out for this RAID set too. */
650 error = rf_Shutdown(raidPtrs[unit]);
651
652 /* It's no longer initialized... */
653 rs->sc_flags &= ~RAIDF_INITED;
654
655 /* Detach the disk. */
656 pseudo_disk_detach(&rs->sc_dkdev);
657 }
658 }
659
660 raidunlock(rs);
661 return (0);
662
663 }
664
665 void
666 raidstrategy(struct buf *bp)
667 {
668 int s;
669
670 unsigned int raidID = raidunit(bp->b_dev);
671 RF_Raid_t *raidPtr;
672 struct raid_softc *rs = &raid_softc[raidID];
673 int wlabel;
674
675 if ((rs->sc_flags & RAIDF_INITED) ==0) {
676 bp->b_error = ENXIO;
677 bp->b_flags |= B_ERROR;
678 goto done;
679 }
680 if (raidID >= numraid || !raidPtrs[raidID]) {
681 bp->b_error = ENODEV;
682 bp->b_flags |= B_ERROR;
683 goto done;
684 }
685 raidPtr = raidPtrs[raidID];
686 if (!raidPtr->valid) {
687 bp->b_error = ENODEV;
688 bp->b_flags |= B_ERROR;
689 goto done;
690 }
691 if (bp->b_bcount == 0) {
692 db1_printf(("b_bcount is zero..\n"));
693 goto done;
694 }
695
696 /*
697 * Do bounds checking and adjust transfer. If there's an
698 * error, the bounds check will flag that for us.
699 */
700
701 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
702 if (DISKPART(bp->b_dev) == RAW_PART) {
703 uint64_t size; /* device size in DEV_BSIZE unit */
704
705 if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
706 size = raidPtr->totalSectors <<
707 (raidPtr->logBytesPerSector - DEV_BSHIFT);
708 } else {
709 size = raidPtr->totalSectors >>
710 (DEV_BSHIFT - raidPtr->logBytesPerSector);
711 }
712 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
713 goto done;
714 }
715 } else {
716 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
717 db1_printf(("Bounds check failed!!:%d %d\n",
718 (int) bp->b_blkno, (int) wlabel));
719 goto done;
720 }
721 }
722 s = splbio();
723
724 bp->b_resid = 0;
725
726 /* stuff it onto our queue */
727 BUFQ_PUT(rs->buf_queue, bp);
728
729 /* scheduled the IO to happen at the next convenient time */
730 wakeup(&(raidPtrs[raidID]->iodone));
731
732 splx(s);
733 return;
734
735 done:
736 bp->b_resid = bp->b_bcount;
737 biodone(bp);
738 }
739 /* ARGSUSED */
740 int
741 raidread(dev_t dev, struct uio *uio, int flags)
742 {
743 int unit = raidunit(dev);
744 struct raid_softc *rs;
745
746 if (unit >= numraid)
747 return (ENXIO);
748 rs = &raid_softc[unit];
749
750 if ((rs->sc_flags & RAIDF_INITED) == 0)
751 return (ENXIO);
752
753 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
754
755 }
756 /* ARGSUSED */
757 int
758 raidwrite(dev_t dev, struct uio *uio, int flags)
759 {
760 int unit = raidunit(dev);
761 struct raid_softc *rs;
762
763 if (unit >= numraid)
764 return (ENXIO);
765 rs = &raid_softc[unit];
766
767 if ((rs->sc_flags & RAIDF_INITED) == 0)
768 return (ENXIO);
769
770 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
771
772 }
773
774 int
775 raidioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct lwp *l)
776 {
777 int unit = raidunit(dev);
778 int error = 0;
779 int part, pmask;
780 struct raid_softc *rs;
781 RF_Config_t *k_cfg, *u_cfg;
782 RF_Raid_t *raidPtr;
783 RF_RaidDisk_t *diskPtr;
784 RF_AccTotals_t *totals;
785 RF_DeviceConfig_t *d_cfg, **ucfgp;
786 u_char *specific_buf;
787 int retcode = 0;
788 int column;
789 int raidid;
790 struct rf_recon_req *rrcopy, *rr;
791 RF_ComponentLabel_t *clabel;
792 RF_ComponentLabel_t ci_label;
793 RF_ComponentLabel_t **clabel_ptr;
794 RF_SingleComponent_t *sparePtr,*componentPtr;
795 RF_SingleComponent_t hot_spare;
796 RF_SingleComponent_t component;
797 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
798 int i, j, d;
799 #ifdef __HAVE_OLD_DISKLABEL
800 struct disklabel newlabel;
801 #endif
802
803 if (unit >= numraid)
804 return (ENXIO);
805 rs = &raid_softc[unit];
806 raidPtr = raidPtrs[unit];
807
808 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
809 (int) DISKPART(dev), (int) unit, (int) cmd));
810
811 /* Must be open for writes for these commands... */
812 switch (cmd) {
813 case DIOCSDINFO:
814 case DIOCWDINFO:
815 #ifdef __HAVE_OLD_DISKLABEL
816 case ODIOCWDINFO:
817 case ODIOCSDINFO:
818 #endif
819 case DIOCWLABEL:
820 if ((flag & FWRITE) == 0)
821 return (EBADF);
822 }
823
824 /* Must be initialized for these... */
825 switch (cmd) {
826 case DIOCGDINFO:
827 case DIOCSDINFO:
828 case DIOCWDINFO:
829 #ifdef __HAVE_OLD_DISKLABEL
830 case ODIOCGDINFO:
831 case ODIOCWDINFO:
832 case ODIOCSDINFO:
833 case ODIOCGDEFLABEL:
834 #endif
835 case DIOCGPART:
836 case DIOCWLABEL:
837 case DIOCGDEFLABEL:
838 case RAIDFRAME_SHUTDOWN:
839 case RAIDFRAME_REWRITEPARITY:
840 case RAIDFRAME_GET_INFO:
841 case RAIDFRAME_RESET_ACCTOTALS:
842 case RAIDFRAME_GET_ACCTOTALS:
843 case RAIDFRAME_KEEP_ACCTOTALS:
844 case RAIDFRAME_GET_SIZE:
845 case RAIDFRAME_FAIL_DISK:
846 case RAIDFRAME_COPYBACK:
847 case RAIDFRAME_CHECK_RECON_STATUS:
848 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
849 case RAIDFRAME_GET_COMPONENT_LABEL:
850 case RAIDFRAME_SET_COMPONENT_LABEL:
851 case RAIDFRAME_ADD_HOT_SPARE:
852 case RAIDFRAME_REMOVE_HOT_SPARE:
853 case RAIDFRAME_INIT_LABELS:
854 case RAIDFRAME_REBUILD_IN_PLACE:
855 case RAIDFRAME_CHECK_PARITY:
856 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
857 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
858 case RAIDFRAME_CHECK_COPYBACK_STATUS:
859 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
860 case RAIDFRAME_SET_AUTOCONFIG:
861 case RAIDFRAME_SET_ROOT:
862 case RAIDFRAME_DELETE_COMPONENT:
863 case RAIDFRAME_INCORPORATE_HOT_SPARE:
864 if ((rs->sc_flags & RAIDF_INITED) == 0)
865 return (ENXIO);
866 }
867
868 switch (cmd) {
869
870 /* configure the system */
871 case RAIDFRAME_CONFIGURE:
872
873 if (raidPtr->valid) {
874 /* There is a valid RAID set running on this unit! */
875 printf("raid%d: Device already configured!\n",unit);
876 return(EINVAL);
877 }
878
879 /* copy-in the configuration information */
880 /* data points to a pointer to the configuration structure */
881
882 u_cfg = *((RF_Config_t **) data);
883 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
884 if (k_cfg == NULL) {
885 return (ENOMEM);
886 }
887 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
888 if (retcode) {
889 RF_Free(k_cfg, sizeof(RF_Config_t));
890 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
891 retcode));
892 return (retcode);
893 }
894 /* allocate a buffer for the layout-specific data, and copy it
895 * in */
896 if (k_cfg->layoutSpecificSize) {
897 if (k_cfg->layoutSpecificSize > 10000) {
898 /* sanity check */
899 RF_Free(k_cfg, sizeof(RF_Config_t));
900 return (EINVAL);
901 }
902 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
903 (u_char *));
904 if (specific_buf == NULL) {
905 RF_Free(k_cfg, sizeof(RF_Config_t));
906 return (ENOMEM);
907 }
908 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
909 k_cfg->layoutSpecificSize);
910 if (retcode) {
911 RF_Free(k_cfg, sizeof(RF_Config_t));
912 RF_Free(specific_buf,
913 k_cfg->layoutSpecificSize);
914 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
915 retcode));
916 return (retcode);
917 }
918 } else
919 specific_buf = NULL;
920 k_cfg->layoutSpecific = specific_buf;
921
922 /* should do some kind of sanity check on the configuration.
923 * Store the sum of all the bytes in the last byte? */
924
925 /* configure the system */
926
927 /*
928 * Clear the entire RAID descriptor, just to make sure
929 * there is no stale data left in the case of a
930 * reconfiguration
931 */
932 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
933 raidPtr->raidid = unit;
934
935 retcode = rf_Configure(raidPtr, k_cfg, NULL);
936
937 if (retcode == 0) {
938
939 /* allow this many simultaneous IO's to
940 this RAID device */
941 raidPtr->openings = RAIDOUTSTANDING;
942
943 raidinit(raidPtr);
944 rf_markalldirty(raidPtr);
945 }
946 /* free the buffers. No return code here. */
947 if (k_cfg->layoutSpecificSize) {
948 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
949 }
950 RF_Free(k_cfg, sizeof(RF_Config_t));
951
952 return (retcode);
953
954 /* shutdown the system */
955 case RAIDFRAME_SHUTDOWN:
956
957 if ((error = raidlock(rs)) != 0)
958 return (error);
959
960 /*
961 * If somebody has a partition mounted, we shouldn't
962 * shutdown.
963 */
964
965 part = DISKPART(dev);
966 pmask = (1 << part);
967 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
968 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
969 (rs->sc_dkdev.dk_copenmask & pmask))) {
970 raidunlock(rs);
971 return (EBUSY);
972 }
973
974 retcode = rf_Shutdown(raidPtr);
975
976 /* It's no longer initialized... */
977 rs->sc_flags &= ~RAIDF_INITED;
978
979 /* Detach the disk. */
980 pseudo_disk_detach(&rs->sc_dkdev);
981
982 raidunlock(rs);
983
984 return (retcode);
985 case RAIDFRAME_GET_COMPONENT_LABEL:
986 clabel_ptr = (RF_ComponentLabel_t **) data;
987 /* need to read the component label for the disk indicated
988 by row,column in clabel */
989
990 /* For practice, let's get it directly fromdisk, rather
991 than from the in-core copy */
992 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
993 (RF_ComponentLabel_t *));
994 if (clabel == NULL)
995 return (ENOMEM);
996
997 retcode = copyin( *clabel_ptr, clabel,
998 sizeof(RF_ComponentLabel_t));
999
1000 if (retcode) {
1001 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1002 return(retcode);
1003 }
1004
1005 clabel->row = 0; /* Don't allow looking at anything else.*/
1006
1007 column = clabel->column;
1008
1009 if ((column < 0) || (column >= raidPtr->numCol +
1010 raidPtr->numSpare)) {
1011 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1012 return(EINVAL);
1013 }
1014
1015 retcode = raidread_component_label(raidPtr->Disks[column].dev,
1016 raidPtr->raid_cinfo[column].ci_vp,
1017 clabel );
1018
1019 if (retcode == 0) {
1020 retcode = copyout(clabel, *clabel_ptr,
1021 sizeof(RF_ComponentLabel_t));
1022 }
1023 RF_Free(clabel, sizeof(RF_ComponentLabel_t));
1024 return (retcode);
1025
1026 case RAIDFRAME_SET_COMPONENT_LABEL:
1027 clabel = (RF_ComponentLabel_t *) data;
1028
1029 /* XXX check the label for valid stuff... */
1030 /* Note that some things *should not* get modified --
1031 the user should be re-initing the labels instead of
1032 trying to patch things.
1033 */
1034
1035 raidid = raidPtr->raidid;
1036 #if DEBUG
1037 printf("raid%d: Got component label:\n", raidid);
1038 printf("raid%d: Version: %d\n", raidid, clabel->version);
1039 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1040 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1041 printf("raid%d: Column: %d\n", raidid, clabel->column);
1042 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1043 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1044 printf("raid%d: Status: %d\n", raidid, clabel->status);
1045 #endif
1046 clabel->row = 0;
1047 column = clabel->column;
1048
1049 if ((column < 0) || (column >= raidPtr->numCol)) {
1050 return(EINVAL);
1051 }
1052
1053 /* XXX this isn't allowed to do anything for now :-) */
1054
1055 /* XXX and before it is, we need to fill in the rest
1056 of the fields!?!?!?! */
1057 #if 0
1058 raidwrite_component_label(
1059 raidPtr->Disks[column].dev,
1060 raidPtr->raid_cinfo[column].ci_vp,
1061 clabel );
1062 #endif
1063 return (0);
1064
1065 case RAIDFRAME_INIT_LABELS:
1066 clabel = (RF_ComponentLabel_t *) data;
1067 /*
1068 we only want the serial number from
1069 the above. We get all the rest of the information
1070 from the config that was used to create this RAID
1071 set.
1072 */
1073
1074 raidPtr->serial_number = clabel->serial_number;
1075
1076 raid_init_component_label(raidPtr, &ci_label);
1077 ci_label.serial_number = clabel->serial_number;
1078 ci_label.row = 0; /* we dont' pretend to support more */
1079
1080 for(column=0;column<raidPtr->numCol;column++) {
1081 diskPtr = &raidPtr->Disks[column];
1082 if (!RF_DEAD_DISK(diskPtr->status)) {
1083 ci_label.partitionSize = diskPtr->partitionSize;
1084 ci_label.column = column;
1085 raidwrite_component_label(
1086 raidPtr->Disks[column].dev,
1087 raidPtr->raid_cinfo[column].ci_vp,
1088 &ci_label );
1089 }
1090 }
1091
1092 return (retcode);
1093 case RAIDFRAME_SET_AUTOCONFIG:
1094 d = rf_set_autoconfig(raidPtr, *(int *) data);
1095 printf("raid%d: New autoconfig value is: %d\n",
1096 raidPtr->raidid, d);
1097 *(int *) data = d;
1098 return (retcode);
1099
1100 case RAIDFRAME_SET_ROOT:
1101 d = rf_set_rootpartition(raidPtr, *(int *) data);
1102 printf("raid%d: New rootpartition value is: %d\n",
1103 raidPtr->raidid, d);
1104 *(int *) data = d;
1105 return (retcode);
1106
1107 /* initialize all parity */
1108 case RAIDFRAME_REWRITEPARITY:
1109
1110 if (raidPtr->Layout.map->faultsTolerated == 0) {
1111 /* Parity for RAID 0 is trivially correct */
1112 raidPtr->parity_good = RF_RAID_CLEAN;
1113 return(0);
1114 }
1115
1116 if (raidPtr->parity_rewrite_in_progress == 1) {
1117 /* Re-write is already in progress! */
1118 return(EINVAL);
1119 }
1120
1121 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1122 rf_RewriteParityThread,
1123 raidPtr,"raid_parity");
1124 return (retcode);
1125
1126
1127 case RAIDFRAME_ADD_HOT_SPARE:
1128 sparePtr = (RF_SingleComponent_t *) data;
1129 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1130 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1131 return(retcode);
1132
1133 case RAIDFRAME_REMOVE_HOT_SPARE:
1134 return(retcode);
1135
1136 case RAIDFRAME_DELETE_COMPONENT:
1137 componentPtr = (RF_SingleComponent_t *)data;
1138 memcpy( &component, componentPtr,
1139 sizeof(RF_SingleComponent_t));
1140 retcode = rf_delete_component(raidPtr, &component);
1141 return(retcode);
1142
1143 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1144 componentPtr = (RF_SingleComponent_t *)data;
1145 memcpy( &component, componentPtr,
1146 sizeof(RF_SingleComponent_t));
1147 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1148 return(retcode);
1149
1150 case RAIDFRAME_REBUILD_IN_PLACE:
1151
1152 if (raidPtr->Layout.map->faultsTolerated == 0) {
1153 /* Can't do this on a RAID 0!! */
1154 return(EINVAL);
1155 }
1156
1157 if (raidPtr->recon_in_progress == 1) {
1158 /* a reconstruct is already in progress! */
1159 return(EINVAL);
1160 }
1161
1162 componentPtr = (RF_SingleComponent_t *) data;
1163 memcpy( &component, componentPtr,
1164 sizeof(RF_SingleComponent_t));
1165 component.row = 0; /* we don't support any more */
1166 column = component.column;
1167
1168 if ((column < 0) || (column >= raidPtr->numCol)) {
1169 return(EINVAL);
1170 }
1171
1172 RF_LOCK_MUTEX(raidPtr->mutex);
1173 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1174 (raidPtr->numFailures > 0)) {
1175 /* XXX 0 above shouldn't be constant!!! */
1176 /* some component other than this has failed.
1177 Let's not make things worse than they already
1178 are... */
1179 printf("raid%d: Unable to reconstruct to disk at:\n",
1180 raidPtr->raidid);
1181 printf("raid%d: Col: %d Too many failures.\n",
1182 raidPtr->raidid, column);
1183 RF_UNLOCK_MUTEX(raidPtr->mutex);
1184 return (EINVAL);
1185 }
1186 if (raidPtr->Disks[column].status ==
1187 rf_ds_reconstructing) {
1188 printf("raid%d: Unable to reconstruct to disk at:\n",
1189 raidPtr->raidid);
1190 printf("raid%d: Col: %d Reconstruction already occuring!\n", raidPtr->raidid, column);
1191
1192 RF_UNLOCK_MUTEX(raidPtr->mutex);
1193 return (EINVAL);
1194 }
1195 if (raidPtr->Disks[column].status == rf_ds_spared) {
1196 RF_UNLOCK_MUTEX(raidPtr->mutex);
1197 return (EINVAL);
1198 }
1199 RF_UNLOCK_MUTEX(raidPtr->mutex);
1200
1201 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1202 if (rrcopy == NULL)
1203 return(ENOMEM);
1204
1205 rrcopy->raidPtr = (void *) raidPtr;
1206 rrcopy->col = column;
1207
1208 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1209 rf_ReconstructInPlaceThread,
1210 rrcopy,"raid_reconip");
1211 return(retcode);
1212
1213 case RAIDFRAME_GET_INFO:
1214 if (!raidPtr->valid)
1215 return (ENODEV);
1216 ucfgp = (RF_DeviceConfig_t **) data;
1217 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1218 (RF_DeviceConfig_t *));
1219 if (d_cfg == NULL)
1220 return (ENOMEM);
1221 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
1222 d_cfg->rows = 1; /* there is only 1 row now */
1223 d_cfg->cols = raidPtr->numCol;
1224 d_cfg->ndevs = raidPtr->numCol;
1225 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1226 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1227 return (ENOMEM);
1228 }
1229 d_cfg->nspares = raidPtr->numSpare;
1230 if (d_cfg->nspares >= RF_MAX_DISKS) {
1231 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1232 return (ENOMEM);
1233 }
1234 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1235 d = 0;
1236 for (j = 0; j < d_cfg->cols; j++) {
1237 d_cfg->devs[d] = raidPtr->Disks[j];
1238 d++;
1239 }
1240 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1241 d_cfg->spares[i] = raidPtr->Disks[j];
1242 }
1243 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1244 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1245
1246 return (retcode);
1247
1248 case RAIDFRAME_CHECK_PARITY:
1249 *(int *) data = raidPtr->parity_good;
1250 return (0);
1251
1252 case RAIDFRAME_RESET_ACCTOTALS:
1253 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1254 return (0);
1255
1256 case RAIDFRAME_GET_ACCTOTALS:
1257 totals = (RF_AccTotals_t *) data;
1258 *totals = raidPtr->acc_totals;
1259 return (0);
1260
1261 case RAIDFRAME_KEEP_ACCTOTALS:
1262 raidPtr->keep_acc_totals = *(int *)data;
1263 return (0);
1264
1265 case RAIDFRAME_GET_SIZE:
1266 *(int *) data = raidPtr->totalSectors;
1267 return (0);
1268
1269 /* fail a disk & optionally start reconstruction */
1270 case RAIDFRAME_FAIL_DISK:
1271
1272 if (raidPtr->Layout.map->faultsTolerated == 0) {
1273 /* Can't do this on a RAID 0!! */
1274 return(EINVAL);
1275 }
1276
1277 rr = (struct rf_recon_req *) data;
1278 rr->row = 0;
1279 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1280 return (EINVAL);
1281
1282
1283 RF_LOCK_MUTEX(raidPtr->mutex);
1284 if (raidPtr->status == rf_rs_reconstructing) {
1285 /* you can't fail a disk while we're reconstructing! */
1286 /* XXX wrong for RAID6 */
1287 RF_UNLOCK_MUTEX(raidPtr->mutex);
1288 return (EINVAL);
1289 }
1290 if ((raidPtr->Disks[rr->col].status ==
1291 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1292 /* some other component has failed. Let's not make
1293 things worse. XXX wrong for RAID6 */
1294 RF_UNLOCK_MUTEX(raidPtr->mutex);
1295 return (EINVAL);
1296 }
1297 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1298 /* Can't fail a spared disk! */
1299 RF_UNLOCK_MUTEX(raidPtr->mutex);
1300 return (EINVAL);
1301 }
1302 RF_UNLOCK_MUTEX(raidPtr->mutex);
1303
1304 /* make a copy of the recon request so that we don't rely on
1305 * the user's buffer */
1306 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1307 if (rrcopy == NULL)
1308 return(ENOMEM);
1309 memcpy(rrcopy, rr, sizeof(*rr));
1310 rrcopy->raidPtr = (void *) raidPtr;
1311
1312 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1313 rf_ReconThread,
1314 rrcopy,"raid_recon");
1315 return (0);
1316
1317 /* invoke a copyback operation after recon on whatever disk
1318 * needs it, if any */
1319 case RAIDFRAME_COPYBACK:
1320
1321 if (raidPtr->Layout.map->faultsTolerated == 0) {
1322 /* This makes no sense on a RAID 0!! */
1323 return(EINVAL);
1324 }
1325
1326 if (raidPtr->copyback_in_progress == 1) {
1327 /* Copyback is already in progress! */
1328 return(EINVAL);
1329 }
1330
1331 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1332 rf_CopybackThread,
1333 raidPtr,"raid_copyback");
1334 return (retcode);
1335
1336 /* return the percentage completion of reconstruction */
1337 case RAIDFRAME_CHECK_RECON_STATUS:
1338 if (raidPtr->Layout.map->faultsTolerated == 0) {
1339 /* This makes no sense on a RAID 0, so tell the
1340 user it's done. */
1341 *(int *) data = 100;
1342 return(0);
1343 }
1344 if (raidPtr->status != rf_rs_reconstructing)
1345 *(int *) data = 100;
1346 else {
1347 if (raidPtr->reconControl->numRUsTotal > 0) {
1348 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1349 } else {
1350 *(int *) data = 0;
1351 }
1352 }
1353 return (0);
1354 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1355 progressInfoPtr = (RF_ProgressInfo_t **) data;
1356 if (raidPtr->status != rf_rs_reconstructing) {
1357 progressInfo.remaining = 0;
1358 progressInfo.completed = 100;
1359 progressInfo.total = 100;
1360 } else {
1361 progressInfo.total =
1362 raidPtr->reconControl->numRUsTotal;
1363 progressInfo.completed =
1364 raidPtr->reconControl->numRUsComplete;
1365 progressInfo.remaining = progressInfo.total -
1366 progressInfo.completed;
1367 }
1368 retcode = copyout(&progressInfo, *progressInfoPtr,
1369 sizeof(RF_ProgressInfo_t));
1370 return (retcode);
1371
1372 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1373 if (raidPtr->Layout.map->faultsTolerated == 0) {
1374 /* This makes no sense on a RAID 0, so tell the
1375 user it's done. */
1376 *(int *) data = 100;
1377 return(0);
1378 }
1379 if (raidPtr->parity_rewrite_in_progress == 1) {
1380 *(int *) data = 100 *
1381 raidPtr->parity_rewrite_stripes_done /
1382 raidPtr->Layout.numStripe;
1383 } else {
1384 *(int *) data = 100;
1385 }
1386 return (0);
1387
1388 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1389 progressInfoPtr = (RF_ProgressInfo_t **) data;
1390 if (raidPtr->parity_rewrite_in_progress == 1) {
1391 progressInfo.total = raidPtr->Layout.numStripe;
1392 progressInfo.completed =
1393 raidPtr->parity_rewrite_stripes_done;
1394 progressInfo.remaining = progressInfo.total -
1395 progressInfo.completed;
1396 } else {
1397 progressInfo.remaining = 0;
1398 progressInfo.completed = 100;
1399 progressInfo.total = 100;
1400 }
1401 retcode = copyout(&progressInfo, *progressInfoPtr,
1402 sizeof(RF_ProgressInfo_t));
1403 return (retcode);
1404
1405 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1406 if (raidPtr->Layout.map->faultsTolerated == 0) {
1407 /* This makes no sense on a RAID 0 */
1408 *(int *) data = 100;
1409 return(0);
1410 }
1411 if (raidPtr->copyback_in_progress == 1) {
1412 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1413 raidPtr->Layout.numStripe;
1414 } else {
1415 *(int *) data = 100;
1416 }
1417 return (0);
1418
1419 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1420 progressInfoPtr = (RF_ProgressInfo_t **) data;
1421 if (raidPtr->copyback_in_progress == 1) {
1422 progressInfo.total = raidPtr->Layout.numStripe;
1423 progressInfo.completed =
1424 raidPtr->copyback_stripes_done;
1425 progressInfo.remaining = progressInfo.total -
1426 progressInfo.completed;
1427 } else {
1428 progressInfo.remaining = 0;
1429 progressInfo.completed = 100;
1430 progressInfo.total = 100;
1431 }
1432 retcode = copyout(&progressInfo, *progressInfoPtr,
1433 sizeof(RF_ProgressInfo_t));
1434 return (retcode);
1435
1436 /* the sparetable daemon calls this to wait for the kernel to
1437 * need a spare table. this ioctl does not return until a
1438 * spare table is needed. XXX -- calling mpsleep here in the
1439 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1440 * -- I should either compute the spare table in the kernel,
1441 * or have a different -- XXX XXX -- interface (a different
1442 * character device) for delivering the table -- XXX */
1443 #if 0
1444 case RAIDFRAME_SPARET_WAIT:
1445 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1446 while (!rf_sparet_wait_queue)
1447 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1448 waitreq = rf_sparet_wait_queue;
1449 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1450 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1451
1452 /* structure assignment */
1453 *((RF_SparetWait_t *) data) = *waitreq;
1454
1455 RF_Free(waitreq, sizeof(*waitreq));
1456 return (0);
1457
1458 /* wakes up a process waiting on SPARET_WAIT and puts an error
1459 * code in it that will cause the dameon to exit */
1460 case RAIDFRAME_ABORT_SPARET_WAIT:
1461 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1462 waitreq->fcol = -1;
1463 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1464 waitreq->next = rf_sparet_wait_queue;
1465 rf_sparet_wait_queue = waitreq;
1466 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1467 wakeup(&rf_sparet_wait_queue);
1468 return (0);
1469
1470 /* used by the spare table daemon to deliver a spare table
1471 * into the kernel */
1472 case RAIDFRAME_SEND_SPARET:
1473
1474 /* install the spare table */
1475 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1476
1477 /* respond to the requestor. the return status of the spare
1478 * table installation is passed in the "fcol" field */
1479 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1480 waitreq->fcol = retcode;
1481 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1482 waitreq->next = rf_sparet_resp_queue;
1483 rf_sparet_resp_queue = waitreq;
1484 wakeup(&rf_sparet_resp_queue);
1485 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1486
1487 return (retcode);
1488 #endif
1489
1490 default:
1491 break; /* fall through to the os-specific code below */
1492
1493 }
1494
1495 if (!raidPtr->valid)
1496 return (EINVAL);
1497
1498 /*
1499 * Add support for "regular" device ioctls here.
1500 */
1501
1502 switch (cmd) {
1503 case DIOCGDINFO:
1504 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1505 break;
1506 #ifdef __HAVE_OLD_DISKLABEL
1507 case ODIOCGDINFO:
1508 newlabel = *(rs->sc_dkdev.dk_label);
1509 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1510 return ENOTTY;
1511 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1512 break;
1513 #endif
1514
1515 case DIOCGPART:
1516 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1517 ((struct partinfo *) data)->part =
1518 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1519 break;
1520
1521 case DIOCWDINFO:
1522 case DIOCSDINFO:
1523 #ifdef __HAVE_OLD_DISKLABEL
1524 case ODIOCWDINFO:
1525 case ODIOCSDINFO:
1526 #endif
1527 {
1528 struct disklabel *lp;
1529 #ifdef __HAVE_OLD_DISKLABEL
1530 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1531 memset(&newlabel, 0, sizeof newlabel);
1532 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1533 lp = &newlabel;
1534 } else
1535 #endif
1536 lp = (struct disklabel *)data;
1537
1538 if ((error = raidlock(rs)) != 0)
1539 return (error);
1540
1541 rs->sc_flags |= RAIDF_LABELLING;
1542
1543 error = setdisklabel(rs->sc_dkdev.dk_label,
1544 lp, 0, rs->sc_dkdev.dk_cpulabel);
1545 if (error == 0) {
1546 if (cmd == DIOCWDINFO
1547 #ifdef __HAVE_OLD_DISKLABEL
1548 || cmd == ODIOCWDINFO
1549 #endif
1550 )
1551 error = writedisklabel(RAIDLABELDEV(dev),
1552 raidstrategy, rs->sc_dkdev.dk_label,
1553 rs->sc_dkdev.dk_cpulabel);
1554 }
1555 rs->sc_flags &= ~RAIDF_LABELLING;
1556
1557 raidunlock(rs);
1558
1559 if (error)
1560 return (error);
1561 break;
1562 }
1563
1564 case DIOCWLABEL:
1565 if (*(int *) data != 0)
1566 rs->sc_flags |= RAIDF_WLABEL;
1567 else
1568 rs->sc_flags &= ~RAIDF_WLABEL;
1569 break;
1570
1571 case DIOCGDEFLABEL:
1572 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1573 break;
1574
1575 #ifdef __HAVE_OLD_DISKLABEL
1576 case ODIOCGDEFLABEL:
1577 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1578 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1579 return ENOTTY;
1580 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1581 break;
1582 #endif
1583
1584 default:
1585 retcode = ENOTTY;
1586 }
1587 return (retcode);
1588
1589 }
1590
1591
1592 /* raidinit -- complete the rest of the initialization for the
1593 RAIDframe device. */
1594
1595
1596 static void
1597 raidinit(RF_Raid_t *raidPtr)
1598 {
1599 struct raid_softc *rs;
1600 int unit;
1601
1602 unit = raidPtr->raidid;
1603
1604 rs = &raid_softc[unit];
1605
1606 /* XXX should check return code first... */
1607 rs->sc_flags |= RAIDF_INITED;
1608
1609 /* XXX doesn't check bounds. */
1610 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1611
1612 rs->sc_dkdev.dk_name = rs->sc_xname;
1613
1614 /* disk_attach actually creates space for the CPU disklabel, among
1615 * other things, so it's critical to call this *BEFORE* we try putzing
1616 * with disklabels. */
1617
1618 pseudo_disk_attach(&rs->sc_dkdev);
1619
1620 /* XXX There may be a weird interaction here between this, and
1621 * protectedSectors, as used in RAIDframe. */
1622
1623 rs->sc_size = raidPtr->totalSectors;
1624 }
1625 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1626 /* wake up the daemon & tell it to get us a spare table
1627 * XXX
1628 * the entries in the queues should be tagged with the raidPtr
1629 * so that in the extremely rare case that two recons happen at once,
1630 * we know for which device were requesting a spare table
1631 * XXX
1632 *
1633 * XXX This code is not currently used. GO
1634 */
1635 int
1636 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1637 {
1638 int retcode;
1639
1640 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1641 req->next = rf_sparet_wait_queue;
1642 rf_sparet_wait_queue = req;
1643 wakeup(&rf_sparet_wait_queue);
1644
1645 /* mpsleep unlocks the mutex */
1646 while (!rf_sparet_resp_queue) {
1647 tsleep(&rf_sparet_resp_queue, PRIBIO,
1648 "raidframe getsparetable", 0);
1649 }
1650 req = rf_sparet_resp_queue;
1651 rf_sparet_resp_queue = req->next;
1652 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1653
1654 retcode = req->fcol;
1655 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1656 * alloc'd */
1657 return (retcode);
1658 }
1659 #endif
1660
1661 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1662 * bp & passes it down.
1663 * any calls originating in the kernel must use non-blocking I/O
1664 * do some extra sanity checking to return "appropriate" error values for
1665 * certain conditions (to make some standard utilities work)
1666 *
1667 * Formerly known as: rf_DoAccessKernel
1668 */
1669 void
1670 raidstart(RF_Raid_t *raidPtr)
1671 {
1672 RF_SectorCount_t num_blocks, pb, sum;
1673 RF_RaidAddr_t raid_addr;
1674 struct partition *pp;
1675 daddr_t blocknum;
1676 int unit;
1677 struct raid_softc *rs;
1678 int do_async;
1679 struct buf *bp;
1680 int rc;
1681
1682 unit = raidPtr->raidid;
1683 rs = &raid_softc[unit];
1684
1685 /* quick check to see if anything has died recently */
1686 RF_LOCK_MUTEX(raidPtr->mutex);
1687 if (raidPtr->numNewFailures > 0) {
1688 RF_UNLOCK_MUTEX(raidPtr->mutex);
1689 rf_update_component_labels(raidPtr,
1690 RF_NORMAL_COMPONENT_UPDATE);
1691 RF_LOCK_MUTEX(raidPtr->mutex);
1692 raidPtr->numNewFailures--;
1693 }
1694
1695 /* Check to see if we're at the limit... */
1696 while (raidPtr->openings > 0) {
1697 RF_UNLOCK_MUTEX(raidPtr->mutex);
1698
1699 /* get the next item, if any, from the queue */
1700 if ((bp = BUFQ_GET(rs->buf_queue)) == NULL) {
1701 /* nothing more to do */
1702 return;
1703 }
1704
1705 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1706 * partition.. Need to make it absolute to the underlying
1707 * device.. */
1708
1709 blocknum = bp->b_blkno;
1710 if (DISKPART(bp->b_dev) != RAW_PART) {
1711 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1712 blocknum += pp->p_offset;
1713 }
1714
1715 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1716 (int) blocknum));
1717
1718 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1719 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1720
1721 /* *THIS* is where we adjust what block we're going to...
1722 * but DO NOT TOUCH bp->b_blkno!!! */
1723 raid_addr = blocknum;
1724
1725 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1726 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1727 sum = raid_addr + num_blocks + pb;
1728 if (1 || rf_debugKernelAccess) {
1729 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1730 (int) raid_addr, (int) sum, (int) num_blocks,
1731 (int) pb, (int) bp->b_resid));
1732 }
1733 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1734 || (sum < num_blocks) || (sum < pb)) {
1735 bp->b_error = ENOSPC;
1736 bp->b_flags |= B_ERROR;
1737 bp->b_resid = bp->b_bcount;
1738 biodone(bp);
1739 RF_LOCK_MUTEX(raidPtr->mutex);
1740 continue;
1741 }
1742 /*
1743 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1744 */
1745
1746 if (bp->b_bcount & raidPtr->sectorMask) {
1747 bp->b_error = EINVAL;
1748 bp->b_flags |= B_ERROR;
1749 bp->b_resid = bp->b_bcount;
1750 biodone(bp);
1751 RF_LOCK_MUTEX(raidPtr->mutex);
1752 continue;
1753
1754 }
1755 db1_printf(("Calling DoAccess..\n"));
1756
1757
1758 RF_LOCK_MUTEX(raidPtr->mutex);
1759 raidPtr->openings--;
1760 RF_UNLOCK_MUTEX(raidPtr->mutex);
1761
1762 /*
1763 * Everything is async.
1764 */
1765 do_async = 1;
1766
1767 disk_busy(&rs->sc_dkdev);
1768
1769 /* XXX we're still at splbio() here... do we *really*
1770 need to be? */
1771
1772 /* don't ever condition on bp->b_flags & B_WRITE.
1773 * always condition on B_READ instead */
1774
1775 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1776 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1777 do_async, raid_addr, num_blocks,
1778 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1779
1780 if (rc) {
1781 bp->b_error = rc;
1782 bp->b_flags |= B_ERROR;
1783 bp->b_resid = bp->b_bcount;
1784 biodone(bp);
1785 /* continue loop */
1786 }
1787
1788 RF_LOCK_MUTEX(raidPtr->mutex);
1789 }
1790 RF_UNLOCK_MUTEX(raidPtr->mutex);
1791 }
1792
1793
1794
1795
1796 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1797
1798 int
1799 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
1800 {
1801 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1802 struct buf *bp;
1803
1804 req->queue = queue;
1805
1806 #if DIAGNOSTIC
1807 if (queue->raidPtr->raidid >= numraid) {
1808 printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
1809 numraid);
1810 panic("Invalid Unit number in rf_DispatchKernelIO");
1811 }
1812 #endif
1813
1814 bp = req->bp;
1815
1816 switch (req->type) {
1817 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1818 /* XXX need to do something extra here.. */
1819 /* I'm leaving this in, as I've never actually seen it used,
1820 * and I'd like folks to report it... GO */
1821 printf(("WAKEUP CALLED\n"));
1822 queue->numOutstanding++;
1823
1824 bp->b_flags = 0;
1825 bp->b_fspriv.bf_private = req;
1826
1827 KernelWakeupFunc(bp);
1828 break;
1829
1830 case RF_IO_TYPE_READ:
1831 case RF_IO_TYPE_WRITE:
1832 #if RF_ACC_TRACE > 0
1833 if (req->tracerec) {
1834 RF_ETIMER_START(req->tracerec->timer);
1835 }
1836 #endif
1837 InitBP(bp, queue->rf_cinfo->ci_vp,
1838 op, queue->rf_cinfo->ci_dev,
1839 req->sectorOffset, req->numSector,
1840 req->buf, KernelWakeupFunc, (void *) req,
1841 queue->raidPtr->logBytesPerSector, req->b_proc);
1842
1843 if (rf_debugKernelAccess) {
1844 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1845 (long) bp->b_blkno));
1846 }
1847 queue->numOutstanding++;
1848 queue->last_deq_sector = req->sectorOffset;
1849 /* acc wouldn't have been let in if there were any pending
1850 * reqs at any other priority */
1851 queue->curPriority = req->priority;
1852
1853 db1_printf(("Going for %c to unit %d col %d\n",
1854 req->type, queue->raidPtr->raidid,
1855 queue->col));
1856 db1_printf(("sector %d count %d (%d bytes) %d\n",
1857 (int) req->sectorOffset, (int) req->numSector,
1858 (int) (req->numSector <<
1859 queue->raidPtr->logBytesPerSector),
1860 (int) queue->raidPtr->logBytesPerSector));
1861 VOP_STRATEGY(bp->b_vp, bp);
1862
1863 break;
1864
1865 default:
1866 panic("bad req->type in rf_DispatchKernelIO");
1867 }
1868 db1_printf(("Exiting from DispatchKernelIO\n"));
1869
1870 return (0);
1871 }
1872 /* this is the callback function associated with a I/O invoked from
1873 kernel code.
1874 */
1875 static void
1876 KernelWakeupFunc(struct buf *bp)
1877 {
1878 RF_DiskQueueData_t *req = NULL;
1879 RF_DiskQueue_t *queue;
1880 int s;
1881
1882 s = splbio();
1883 db1_printf(("recovering the request queue:\n"));
1884 req = bp->b_fspriv.bf_private;
1885
1886 queue = (RF_DiskQueue_t *) req->queue;
1887
1888 #if RF_ACC_TRACE > 0
1889 if (req->tracerec) {
1890 RF_ETIMER_STOP(req->tracerec->timer);
1891 RF_ETIMER_EVAL(req->tracerec->timer);
1892 RF_LOCK_MUTEX(rf_tracing_mutex);
1893 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1894 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1895 req->tracerec->num_phys_ios++;
1896 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1897 }
1898 #endif
1899
1900 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1901 * ballistic, and mark the component as hosed... */
1902
1903 if (bp->b_flags & B_ERROR) {
1904 /* Mark the disk as dead */
1905 /* but only mark it once... */
1906 /* and only if it wouldn't leave this RAID set
1907 completely broken */
1908 if (((queue->raidPtr->Disks[queue->col].status ==
1909 rf_ds_optimal) ||
1910 (queue->raidPtr->Disks[queue->col].status ==
1911 rf_ds_used_spare)) &&
1912 (queue->raidPtr->numFailures <
1913 queue->raidPtr->Layout.map->faultsTolerated)) {
1914 printf("raid%d: IO Error. Marking %s as failed.\n",
1915 queue->raidPtr->raidid,
1916 queue->raidPtr->Disks[queue->col].devname);
1917 queue->raidPtr->Disks[queue->col].status =
1918 rf_ds_failed;
1919 queue->raidPtr->status = rf_rs_degraded;
1920 queue->raidPtr->numFailures++;
1921 queue->raidPtr->numNewFailures++;
1922 } else { /* Disk is already dead... */
1923 /* printf("Disk already marked as dead!\n"); */
1924 }
1925
1926 }
1927
1928 /* Fill in the error value */
1929
1930 req->error = (bp->b_flags & B_ERROR) ? bp->b_error : 0;
1931
1932 simple_lock(&queue->raidPtr->iodone_lock);
1933
1934 /* Drop this one on the "finished" queue... */
1935 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
1936
1937 /* Let the raidio thread know there is work to be done. */
1938 wakeup(&(queue->raidPtr->iodone));
1939
1940 simple_unlock(&queue->raidPtr->iodone_lock);
1941
1942 splx(s);
1943 }
1944
1945
1946
1947 /*
1948 * initialize a buf structure for doing an I/O in the kernel.
1949 */
1950 static void
1951 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
1952 RF_SectorNum_t startSect, RF_SectorCount_t numSect, caddr_t bf,
1953 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
1954 struct proc *b_proc)
1955 {
1956 /* bp->b_flags = B_PHYS | rw_flag; */
1957 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1958 bp->b_bcount = numSect << logBytesPerSector;
1959 bp->b_bufsize = bp->b_bcount;
1960 bp->b_error = 0;
1961 bp->b_dev = dev;
1962 bp->b_data = bf;
1963 bp->b_blkno = startSect;
1964 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1965 if (bp->b_bcount == 0) {
1966 panic("bp->b_bcount is zero in InitBP!!");
1967 }
1968 bp->b_proc = b_proc;
1969 bp->b_iodone = cbFunc;
1970 bp->b_fspriv.bf_private = cbArg;
1971 bp->b_vp = b_vp;
1972 if ((bp->b_flags & B_READ) == 0) {
1973 bp->b_vp->v_numoutput++;
1974 }
1975
1976 }
1977
1978 static void
1979 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
1980 struct disklabel *lp)
1981 {
1982 memset(lp, 0, sizeof(*lp));
1983
1984 /* fabricate a label... */
1985 lp->d_secperunit = raidPtr->totalSectors;
1986 lp->d_secsize = raidPtr->bytesPerSector;
1987 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1988 lp->d_ntracks = 4 * raidPtr->numCol;
1989 lp->d_ncylinders = raidPtr->totalSectors /
1990 (lp->d_nsectors * lp->d_ntracks);
1991 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1992
1993 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1994 lp->d_type = DTYPE_RAID;
1995 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1996 lp->d_rpm = 3600;
1997 lp->d_interleave = 1;
1998 lp->d_flags = 0;
1999
2000 lp->d_partitions[RAW_PART].p_offset = 0;
2001 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2002 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2003 lp->d_npartitions = RAW_PART + 1;
2004
2005 lp->d_magic = DISKMAGIC;
2006 lp->d_magic2 = DISKMAGIC;
2007 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2008
2009 }
2010 /*
2011 * Read the disklabel from the raid device. If one is not present, fake one
2012 * up.
2013 */
2014 static void
2015 raidgetdisklabel(dev_t dev)
2016 {
2017 int unit = raidunit(dev);
2018 struct raid_softc *rs = &raid_softc[unit];
2019 const char *errstring;
2020 struct disklabel *lp = rs->sc_dkdev.dk_label;
2021 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2022 RF_Raid_t *raidPtr;
2023
2024 db1_printf(("Getting the disklabel...\n"));
2025
2026 memset(clp, 0, sizeof(*clp));
2027
2028 raidPtr = raidPtrs[unit];
2029
2030 raidgetdefaultlabel(raidPtr, rs, lp);
2031
2032 /*
2033 * Call the generic disklabel extraction routine.
2034 */
2035 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2036 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2037 if (errstring)
2038 raidmakedisklabel(rs);
2039 else {
2040 int i;
2041 struct partition *pp;
2042
2043 /*
2044 * Sanity check whether the found disklabel is valid.
2045 *
2046 * This is necessary since total size of the raid device
2047 * may vary when an interleave is changed even though exactly
2048 * same componets are used, and old disklabel may used
2049 * if that is found.
2050 */
2051 if (lp->d_secperunit != rs->sc_size)
2052 printf("raid%d: WARNING: %s: "
2053 "total sector size in disklabel (%d) != "
2054 "the size of raid (%ld)\n", unit, rs->sc_xname,
2055 lp->d_secperunit, (long) rs->sc_size);
2056 for (i = 0; i < lp->d_npartitions; i++) {
2057 pp = &lp->d_partitions[i];
2058 if (pp->p_offset + pp->p_size > rs->sc_size)
2059 printf("raid%d: WARNING: %s: end of partition `%c' "
2060 "exceeds the size of raid (%ld)\n",
2061 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
2062 }
2063 }
2064
2065 }
2066 /*
2067 * Take care of things one might want to take care of in the event
2068 * that a disklabel isn't present.
2069 */
2070 static void
2071 raidmakedisklabel(struct raid_softc *rs)
2072 {
2073 struct disklabel *lp = rs->sc_dkdev.dk_label;
2074 db1_printf(("Making a label..\n"));
2075
2076 /*
2077 * For historical reasons, if there's no disklabel present
2078 * the raw partition must be marked FS_BSDFFS.
2079 */
2080
2081 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2082
2083 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2084
2085 lp->d_checksum = dkcksum(lp);
2086 }
2087 /*
2088 * Lookup the provided name in the filesystem. If the file exists,
2089 * is a valid block device, and isn't being used by anyone else,
2090 * set *vpp to the file's vnode.
2091 * You'll find the original of this in ccd.c
2092 */
2093 int
2094 raidlookup(char *path, struct lwp *l, struct vnode **vpp)
2095 {
2096 struct nameidata nd;
2097 struct vnode *vp;
2098 struct proc *p;
2099 struct vattr va;
2100 int error;
2101
2102 p = l ? l->l_proc : NULL;
2103 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, l);
2104 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2105 return (error);
2106 }
2107 vp = nd.ni_vp;
2108 if (vp->v_usecount > 1) {
2109 VOP_UNLOCK(vp, 0);
2110 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, l);
2111 return (EBUSY);
2112 }
2113 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, l)) != 0) {
2114 VOP_UNLOCK(vp, 0);
2115 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, l);
2116 return (error);
2117 }
2118 /* XXX: eventually we should handle VREG, too. */
2119 if (va.va_type != VBLK) {
2120 VOP_UNLOCK(vp, 0);
2121 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, l);
2122 return (ENOTBLK);
2123 }
2124 VOP_UNLOCK(vp, 0);
2125 *vpp = vp;
2126 return (0);
2127 }
2128 /*
2129 * Wait interruptibly for an exclusive lock.
2130 *
2131 * XXX
2132 * Several drivers do this; it should be abstracted and made MP-safe.
2133 * (Hmm... where have we seen this warning before :-> GO )
2134 */
2135 static int
2136 raidlock(struct raid_softc *rs)
2137 {
2138 int error;
2139
2140 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2141 rs->sc_flags |= RAIDF_WANTED;
2142 if ((error =
2143 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2144 return (error);
2145 }
2146 rs->sc_flags |= RAIDF_LOCKED;
2147 return (0);
2148 }
2149 /*
2150 * Unlock and wake up any waiters.
2151 */
2152 static void
2153 raidunlock(struct raid_softc *rs)
2154 {
2155
2156 rs->sc_flags &= ~RAIDF_LOCKED;
2157 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2158 rs->sc_flags &= ~RAIDF_WANTED;
2159 wakeup(rs);
2160 }
2161 }
2162
2163
2164 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2165 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2166
2167 int
2168 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2169 {
2170 RF_ComponentLabel_t clabel;
2171 raidread_component_label(dev, b_vp, &clabel);
2172 clabel.mod_counter = mod_counter;
2173 clabel.clean = RF_RAID_CLEAN;
2174 raidwrite_component_label(dev, b_vp, &clabel);
2175 return(0);
2176 }
2177
2178
2179 int
2180 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2181 {
2182 RF_ComponentLabel_t clabel;
2183 raidread_component_label(dev, b_vp, &clabel);
2184 clabel.mod_counter = mod_counter;
2185 clabel.clean = RF_RAID_DIRTY;
2186 raidwrite_component_label(dev, b_vp, &clabel);
2187 return(0);
2188 }
2189
2190 /* ARGSUSED */
2191 int
2192 raidread_component_label(dev_t dev, struct vnode *b_vp,
2193 RF_ComponentLabel_t *clabel)
2194 {
2195 struct buf *bp;
2196 const struct bdevsw *bdev;
2197 int error;
2198
2199 /* XXX should probably ensure that we don't try to do this if
2200 someone has changed rf_protected_sectors. */
2201
2202 if (b_vp == NULL) {
2203 /* For whatever reason, this component is not valid.
2204 Don't try to read a component label from it. */
2205 return(EINVAL);
2206 }
2207
2208 /* get a block of the appropriate size... */
2209 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2210 bp->b_dev = dev;
2211
2212 /* get our ducks in a row for the read */
2213 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2214 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2215 bp->b_flags |= B_READ;
2216 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2217
2218 bdev = bdevsw_lookup(bp->b_dev);
2219 if (bdev == NULL)
2220 return (ENXIO);
2221 (*bdev->d_strategy)(bp);
2222
2223 error = biowait(bp);
2224
2225 if (!error) {
2226 memcpy(clabel, bp->b_data,
2227 sizeof(RF_ComponentLabel_t));
2228 }
2229
2230 brelse(bp);
2231 return(error);
2232 }
2233 /* ARGSUSED */
2234 int
2235 raidwrite_component_label(dev_t dev, struct vnode *b_vp,
2236 RF_ComponentLabel_t *clabel)
2237 {
2238 struct buf *bp;
2239 const struct bdevsw *bdev;
2240 int error;
2241
2242 /* get a block of the appropriate size... */
2243 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2244 bp->b_dev = dev;
2245
2246 /* get our ducks in a row for the write */
2247 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2248 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2249 bp->b_flags |= B_WRITE;
2250 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2251
2252 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2253
2254 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2255
2256 bdev = bdevsw_lookup(bp->b_dev);
2257 if (bdev == NULL)
2258 return (ENXIO);
2259 (*bdev->d_strategy)(bp);
2260 error = biowait(bp);
2261 brelse(bp);
2262 if (error) {
2263 #if 1
2264 printf("Failed to write RAID component info!\n");
2265 #endif
2266 }
2267
2268 return(error);
2269 }
2270
2271 void
2272 rf_markalldirty(RF_Raid_t *raidPtr)
2273 {
2274 RF_ComponentLabel_t clabel;
2275 int sparecol;
2276 int c;
2277 int j;
2278 int scol = -1;
2279
2280 raidPtr->mod_counter++;
2281 for (c = 0; c < raidPtr->numCol; c++) {
2282 /* we don't want to touch (at all) a disk that has
2283 failed */
2284 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2285 raidread_component_label(
2286 raidPtr->Disks[c].dev,
2287 raidPtr->raid_cinfo[c].ci_vp,
2288 &clabel);
2289 if (clabel.status == rf_ds_spared) {
2290 /* XXX do something special...
2291 but whatever you do, don't
2292 try to access it!! */
2293 } else {
2294 raidmarkdirty(
2295 raidPtr->Disks[c].dev,
2296 raidPtr->raid_cinfo[c].ci_vp,
2297 raidPtr->mod_counter);
2298 }
2299 }
2300 }
2301
2302 for( c = 0; c < raidPtr->numSpare ; c++) {
2303 sparecol = raidPtr->numCol + c;
2304 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2305 /*
2306
2307 we claim this disk is "optimal" if it's
2308 rf_ds_used_spare, as that means it should be
2309 directly substitutable for the disk it replaced.
2310 We note that too...
2311
2312 */
2313
2314 for(j=0;j<raidPtr->numCol;j++) {
2315 if (raidPtr->Disks[j].spareCol == sparecol) {
2316 scol = j;
2317 break;
2318 }
2319 }
2320
2321 raidread_component_label(
2322 raidPtr->Disks[sparecol].dev,
2323 raidPtr->raid_cinfo[sparecol].ci_vp,
2324 &clabel);
2325 /* make sure status is noted */
2326
2327 raid_init_component_label(raidPtr, &clabel);
2328
2329 clabel.row = 0;
2330 clabel.column = scol;
2331 /* Note: we *don't* change status from rf_ds_used_spare
2332 to rf_ds_optimal */
2333 /* clabel.status = rf_ds_optimal; */
2334
2335 raidmarkdirty(raidPtr->Disks[sparecol].dev,
2336 raidPtr->raid_cinfo[sparecol].ci_vp,
2337 raidPtr->mod_counter);
2338 }
2339 }
2340 }
2341
2342
2343 void
2344 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2345 {
2346 RF_ComponentLabel_t clabel;
2347 int sparecol;
2348 int c;
2349 int j;
2350 int scol;
2351
2352 scol = -1;
2353
2354 /* XXX should do extra checks to make sure things really are clean,
2355 rather than blindly setting the clean bit... */
2356
2357 raidPtr->mod_counter++;
2358
2359 for (c = 0; c < raidPtr->numCol; c++) {
2360 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2361 raidread_component_label(
2362 raidPtr->Disks[c].dev,
2363 raidPtr->raid_cinfo[c].ci_vp,
2364 &clabel);
2365 /* make sure status is noted */
2366 clabel.status = rf_ds_optimal;
2367 /* bump the counter */
2368 clabel.mod_counter = raidPtr->mod_counter;
2369
2370 raidwrite_component_label(
2371 raidPtr->Disks[c].dev,
2372 raidPtr->raid_cinfo[c].ci_vp,
2373 &clabel);
2374 if (final == RF_FINAL_COMPONENT_UPDATE) {
2375 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2376 raidmarkclean(
2377 raidPtr->Disks[c].dev,
2378 raidPtr->raid_cinfo[c].ci_vp,
2379 raidPtr->mod_counter);
2380 }
2381 }
2382 }
2383 /* else we don't touch it.. */
2384 }
2385
2386 for( c = 0; c < raidPtr->numSpare ; c++) {
2387 sparecol = raidPtr->numCol + c;
2388 /* Need to ensure that the reconstruct actually completed! */
2389 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2390 /*
2391
2392 we claim this disk is "optimal" if it's
2393 rf_ds_used_spare, as that means it should be
2394 directly substitutable for the disk it replaced.
2395 We note that too...
2396
2397 */
2398
2399 for(j=0;j<raidPtr->numCol;j++) {
2400 if (raidPtr->Disks[j].spareCol == sparecol) {
2401 scol = j;
2402 break;
2403 }
2404 }
2405
2406 /* XXX shouldn't *really* need this... */
2407 raidread_component_label(
2408 raidPtr->Disks[sparecol].dev,
2409 raidPtr->raid_cinfo[sparecol].ci_vp,
2410 &clabel);
2411 /* make sure status is noted */
2412
2413 raid_init_component_label(raidPtr, &clabel);
2414
2415 clabel.mod_counter = raidPtr->mod_counter;
2416 clabel.column = scol;
2417 clabel.status = rf_ds_optimal;
2418
2419 raidwrite_component_label(
2420 raidPtr->Disks[sparecol].dev,
2421 raidPtr->raid_cinfo[sparecol].ci_vp,
2422 &clabel);
2423 if (final == RF_FINAL_COMPONENT_UPDATE) {
2424 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2425 raidmarkclean( raidPtr->Disks[sparecol].dev,
2426 raidPtr->raid_cinfo[sparecol].ci_vp,
2427 raidPtr->mod_counter);
2428 }
2429 }
2430 }
2431 }
2432 }
2433
2434 void
2435 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2436 {
2437 struct proc *p;
2438 struct lwp *l;
2439
2440 p = raidPtr->engine_thread;
2441 l = LIST_FIRST(&p->p_lwps);
2442
2443 if (vp != NULL) {
2444 if (auto_configured == 1) {
2445 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2446 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2447 vput(vp);
2448
2449 } else {
2450 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, l);
2451 }
2452 }
2453 }
2454
2455
2456 void
2457 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2458 {
2459 int r,c;
2460 struct vnode *vp;
2461 int acd;
2462
2463
2464 /* We take this opportunity to close the vnodes like we should.. */
2465
2466 for (c = 0; c < raidPtr->numCol; c++) {
2467 vp = raidPtr->raid_cinfo[c].ci_vp;
2468 acd = raidPtr->Disks[c].auto_configured;
2469 rf_close_component(raidPtr, vp, acd);
2470 raidPtr->raid_cinfo[c].ci_vp = NULL;
2471 raidPtr->Disks[c].auto_configured = 0;
2472 }
2473
2474 for (r = 0; r < raidPtr->numSpare; r++) {
2475 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2476 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2477 rf_close_component(raidPtr, vp, acd);
2478 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2479 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2480 }
2481 }
2482
2483
2484 void
2485 rf_ReconThread(struct rf_recon_req *req)
2486 {
2487 int s;
2488 RF_Raid_t *raidPtr;
2489
2490 s = splbio();
2491 raidPtr = (RF_Raid_t *) req->raidPtr;
2492 raidPtr->recon_in_progress = 1;
2493
2494 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2495 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2496
2497 RF_Free(req, sizeof(*req));
2498
2499 raidPtr->recon_in_progress = 0;
2500 splx(s);
2501
2502 /* That's all... */
2503 kthread_exit(0); /* does not return */
2504 }
2505
2506 void
2507 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2508 {
2509 int retcode;
2510 int s;
2511
2512 raidPtr->parity_rewrite_stripes_done = 0;
2513 raidPtr->parity_rewrite_in_progress = 1;
2514 s = splbio();
2515 retcode = rf_RewriteParity(raidPtr);
2516 splx(s);
2517 if (retcode) {
2518 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2519 } else {
2520 /* set the clean bit! If we shutdown correctly,
2521 the clean bit on each component label will get
2522 set */
2523 raidPtr->parity_good = RF_RAID_CLEAN;
2524 }
2525 raidPtr->parity_rewrite_in_progress = 0;
2526
2527 /* Anyone waiting for us to stop? If so, inform them... */
2528 if (raidPtr->waitShutdown) {
2529 wakeup(&raidPtr->parity_rewrite_in_progress);
2530 }
2531
2532 /* That's all... */
2533 kthread_exit(0); /* does not return */
2534 }
2535
2536
2537 void
2538 rf_CopybackThread(RF_Raid_t *raidPtr)
2539 {
2540 int s;
2541
2542 raidPtr->copyback_in_progress = 1;
2543 s = splbio();
2544 rf_CopybackReconstructedData(raidPtr);
2545 splx(s);
2546 raidPtr->copyback_in_progress = 0;
2547
2548 /* That's all... */
2549 kthread_exit(0); /* does not return */
2550 }
2551
2552
2553 void
2554 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
2555 {
2556 int s;
2557 RF_Raid_t *raidPtr;
2558
2559 s = splbio();
2560 raidPtr = req->raidPtr;
2561 raidPtr->recon_in_progress = 1;
2562 rf_ReconstructInPlace(raidPtr, req->col);
2563 RF_Free(req, sizeof(*req));
2564 raidPtr->recon_in_progress = 0;
2565 splx(s);
2566
2567 /* That's all... */
2568 kthread_exit(0); /* does not return */
2569 }
2570
2571 RF_AutoConfig_t *
2572 rf_find_raid_components()
2573 {
2574 struct vnode *vp;
2575 struct disklabel label;
2576 struct device *dv;
2577 dev_t dev;
2578 int bmajor;
2579 int error;
2580 int i;
2581 int good_one;
2582 RF_ComponentLabel_t *clabel;
2583 RF_AutoConfig_t *ac_list;
2584 RF_AutoConfig_t *ac;
2585
2586
2587 /* initialize the AutoConfig list */
2588 ac_list = NULL;
2589
2590 /* we begin by trolling through *all* the devices on the system */
2591
2592 for (dv = alldevs.tqh_first; dv != NULL;
2593 dv = dv->dv_list.tqe_next) {
2594
2595 /* we are only interested in disks... */
2596 if (dv->dv_class != DV_DISK)
2597 continue;
2598
2599 /* we don't care about floppies... */
2600 if (!strcmp(dv->dv_cfdata->cf_name,"fd")) {
2601 continue;
2602 }
2603
2604 /* we don't care about CD's... */
2605 if (!strcmp(dv->dv_cfdata->cf_name,"cd")) {
2606 continue;
2607 }
2608
2609 /* hdfd is the Atari/Hades floppy driver */
2610 if (!strcmp(dv->dv_cfdata->cf_name,"hdfd")) {
2611 continue;
2612 }
2613 /* fdisa is the Atari/Milan floppy driver */
2614 if (!strcmp(dv->dv_cfdata->cf_name,"fdisa")) {
2615 continue;
2616 }
2617
2618 /* need to find the device_name_to_block_device_major stuff */
2619 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
2620
2621 /* get a vnode for the raw partition of this disk */
2622
2623 dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART);
2624 if (bdevvp(dev, &vp))
2625 panic("RAID can't alloc vnode");
2626
2627 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2628
2629 if (error) {
2630 /* "Who cares." Continue looking
2631 for something that exists*/
2632 vput(vp);
2633 continue;
2634 }
2635
2636 /* Ok, the disk exists. Go get the disklabel. */
2637 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED, 0);
2638 if (error) {
2639 /*
2640 * XXX can't happen - open() would
2641 * have errored out (or faked up one)
2642 */
2643 if (error != ENOTTY)
2644 printf("RAIDframe: can't get label for dev "
2645 "%s (%d)\n", dv->dv_xname, error);
2646 }
2647
2648 /* don't need this any more. We'll allocate it again
2649 a little later if we really do... */
2650 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2651 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2652 vput(vp);
2653
2654 if (error)
2655 continue;
2656
2657 for (i=0; i < label.d_npartitions; i++) {
2658 /* We only support partitions marked as RAID */
2659 if (label.d_partitions[i].p_fstype != FS_RAID)
2660 continue;
2661
2662 dev = MAKEDISKDEV(bmajor, dv->dv_unit, i);
2663 if (bdevvp(dev, &vp))
2664 panic("RAID can't alloc vnode");
2665
2666 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2667 if (error) {
2668 /* Whatever... */
2669 vput(vp);
2670 continue;
2671 }
2672
2673 good_one = 0;
2674
2675 clabel = (RF_ComponentLabel_t *)
2676 malloc(sizeof(RF_ComponentLabel_t),
2677 M_RAIDFRAME, M_NOWAIT);
2678 if (clabel == NULL) {
2679 /* XXX CLEANUP HERE */
2680 printf("RAID auto config: out of memory!\n");
2681 return(NULL); /* XXX probably should panic? */
2682 }
2683
2684 if (!raidread_component_label(dev, vp, clabel)) {
2685 /* Got the label. Does it look reasonable? */
2686 if (rf_reasonable_label(clabel) &&
2687 (clabel->partitionSize <=
2688 label.d_partitions[i].p_size)) {
2689 #if DEBUG
2690 printf("Component on: %s%c: %d\n",
2691 dv->dv_xname, 'a'+i,
2692 label.d_partitions[i].p_size);
2693 rf_print_component_label(clabel);
2694 #endif
2695 /* if it's reasonable, add it,
2696 else ignore it. */
2697 ac = (RF_AutoConfig_t *)
2698 malloc(sizeof(RF_AutoConfig_t),
2699 M_RAIDFRAME,
2700 M_NOWAIT);
2701 if (ac == NULL) {
2702 /* XXX should panic?? */
2703 return(NULL);
2704 }
2705
2706 snprintf(ac->devname,
2707 sizeof(ac->devname), "%s%c",
2708 dv->dv_xname, 'a'+i);
2709 ac->dev = dev;
2710 ac->vp = vp;
2711 ac->clabel = clabel;
2712 ac->next = ac_list;
2713 ac_list = ac;
2714 good_one = 1;
2715 }
2716 }
2717 if (!good_one) {
2718 /* cleanup */
2719 free(clabel, M_RAIDFRAME);
2720 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2721 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2722 vput(vp);
2723 }
2724 }
2725 }
2726 return(ac_list);
2727 }
2728
2729 static int
2730 rf_reasonable_label(RF_ComponentLabel_t *clabel)
2731 {
2732
2733 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2734 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2735 ((clabel->clean == RF_RAID_CLEAN) ||
2736 (clabel->clean == RF_RAID_DIRTY)) &&
2737 clabel->row >=0 &&
2738 clabel->column >= 0 &&
2739 clabel->num_rows > 0 &&
2740 clabel->num_columns > 0 &&
2741 clabel->row < clabel->num_rows &&
2742 clabel->column < clabel->num_columns &&
2743 clabel->blockSize > 0 &&
2744 clabel->numBlocks > 0) {
2745 /* label looks reasonable enough... */
2746 return(1);
2747 }
2748 return(0);
2749 }
2750
2751
2752 #if DEBUG
2753 void
2754 rf_print_component_label(RF_ComponentLabel_t *clabel)
2755 {
2756 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2757 clabel->row, clabel->column,
2758 clabel->num_rows, clabel->num_columns);
2759 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2760 clabel->version, clabel->serial_number,
2761 clabel->mod_counter);
2762 printf(" Clean: %s Status: %d\n",
2763 clabel->clean ? "Yes" : "No", clabel->status );
2764 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2765 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2766 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2767 (char) clabel->parityConfig, clabel->blockSize,
2768 clabel->numBlocks);
2769 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2770 printf(" Contains root partition: %s\n",
2771 clabel->root_partition ? "Yes" : "No" );
2772 printf(" Last configured as: raid%d\n", clabel->last_unit );
2773 #if 0
2774 printf(" Config order: %d\n", clabel->config_order);
2775 #endif
2776
2777 }
2778 #endif
2779
2780 RF_ConfigSet_t *
2781 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
2782 {
2783 RF_AutoConfig_t *ac;
2784 RF_ConfigSet_t *config_sets;
2785 RF_ConfigSet_t *cset;
2786 RF_AutoConfig_t *ac_next;
2787
2788
2789 config_sets = NULL;
2790
2791 /* Go through the AutoConfig list, and figure out which components
2792 belong to what sets. */
2793 ac = ac_list;
2794 while(ac!=NULL) {
2795 /* we're going to putz with ac->next, so save it here
2796 for use at the end of the loop */
2797 ac_next = ac->next;
2798
2799 if (config_sets == NULL) {
2800 /* will need at least this one... */
2801 config_sets = (RF_ConfigSet_t *)
2802 malloc(sizeof(RF_ConfigSet_t),
2803 M_RAIDFRAME, M_NOWAIT);
2804 if (config_sets == NULL) {
2805 panic("rf_create_auto_sets: No memory!");
2806 }
2807 /* this one is easy :) */
2808 config_sets->ac = ac;
2809 config_sets->next = NULL;
2810 config_sets->rootable = 0;
2811 ac->next = NULL;
2812 } else {
2813 /* which set does this component fit into? */
2814 cset = config_sets;
2815 while(cset!=NULL) {
2816 if (rf_does_it_fit(cset, ac)) {
2817 /* looks like it matches... */
2818 ac->next = cset->ac;
2819 cset->ac = ac;
2820 break;
2821 }
2822 cset = cset->next;
2823 }
2824 if (cset==NULL) {
2825 /* didn't find a match above... new set..*/
2826 cset = (RF_ConfigSet_t *)
2827 malloc(sizeof(RF_ConfigSet_t),
2828 M_RAIDFRAME, M_NOWAIT);
2829 if (cset == NULL) {
2830 panic("rf_create_auto_sets: No memory!");
2831 }
2832 cset->ac = ac;
2833 ac->next = NULL;
2834 cset->next = config_sets;
2835 cset->rootable = 0;
2836 config_sets = cset;
2837 }
2838 }
2839 ac = ac_next;
2840 }
2841
2842
2843 return(config_sets);
2844 }
2845
2846 static int
2847 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
2848 {
2849 RF_ComponentLabel_t *clabel1, *clabel2;
2850
2851 /* If this one matches the *first* one in the set, that's good
2852 enough, since the other members of the set would have been
2853 through here too... */
2854 /* note that we are not checking partitionSize here..
2855
2856 Note that we are also not checking the mod_counters here.
2857 If everything else matches execpt the mod_counter, that's
2858 good enough for this test. We will deal with the mod_counters
2859 a little later in the autoconfiguration process.
2860
2861 (clabel1->mod_counter == clabel2->mod_counter) &&
2862
2863 The reason we don't check for this is that failed disks
2864 will have lower modification counts. If those disks are
2865 not added to the set they used to belong to, then they will
2866 form their own set, which may result in 2 different sets,
2867 for example, competing to be configured at raid0, and
2868 perhaps competing to be the root filesystem set. If the
2869 wrong ones get configured, or both attempt to become /,
2870 weird behaviour and or serious lossage will occur. Thus we
2871 need to bring them into the fold here, and kick them out at
2872 a later point.
2873
2874 */
2875
2876 clabel1 = cset->ac->clabel;
2877 clabel2 = ac->clabel;
2878 if ((clabel1->version == clabel2->version) &&
2879 (clabel1->serial_number == clabel2->serial_number) &&
2880 (clabel1->num_rows == clabel2->num_rows) &&
2881 (clabel1->num_columns == clabel2->num_columns) &&
2882 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2883 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2884 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2885 (clabel1->parityConfig == clabel2->parityConfig) &&
2886 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2887 (clabel1->blockSize == clabel2->blockSize) &&
2888 (clabel1->numBlocks == clabel2->numBlocks) &&
2889 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2890 (clabel1->root_partition == clabel2->root_partition) &&
2891 (clabel1->last_unit == clabel2->last_unit) &&
2892 (clabel1->config_order == clabel2->config_order)) {
2893 /* if it get's here, it almost *has* to be a match */
2894 } else {
2895 /* it's not consistent with somebody in the set..
2896 punt */
2897 return(0);
2898 }
2899 /* all was fine.. it must fit... */
2900 return(1);
2901 }
2902
2903 int
2904 rf_have_enough_components(RF_ConfigSet_t *cset)
2905 {
2906 RF_AutoConfig_t *ac;
2907 RF_AutoConfig_t *auto_config;
2908 RF_ComponentLabel_t *clabel;
2909 int c;
2910 int num_cols;
2911 int num_missing;
2912 int mod_counter;
2913 int mod_counter_found;
2914 int even_pair_failed;
2915 char parity_type;
2916
2917
2918 /* check to see that we have enough 'live' components
2919 of this set. If so, we can configure it if necessary */
2920
2921 num_cols = cset->ac->clabel->num_columns;
2922 parity_type = cset->ac->clabel->parityConfig;
2923
2924 /* XXX Check for duplicate components!?!?!? */
2925
2926 /* Determine what the mod_counter is supposed to be for this set. */
2927
2928 mod_counter_found = 0;
2929 mod_counter = 0;
2930 ac = cset->ac;
2931 while(ac!=NULL) {
2932 if (mod_counter_found==0) {
2933 mod_counter = ac->clabel->mod_counter;
2934 mod_counter_found = 1;
2935 } else {
2936 if (ac->clabel->mod_counter > mod_counter) {
2937 mod_counter = ac->clabel->mod_counter;
2938 }
2939 }
2940 ac = ac->next;
2941 }
2942
2943 num_missing = 0;
2944 auto_config = cset->ac;
2945
2946 even_pair_failed = 0;
2947 for(c=0; c<num_cols; c++) {
2948 ac = auto_config;
2949 while(ac!=NULL) {
2950 if ((ac->clabel->column == c) &&
2951 (ac->clabel->mod_counter == mod_counter)) {
2952 /* it's this one... */
2953 #if DEBUG
2954 printf("Found: %s at %d\n",
2955 ac->devname,c);
2956 #endif
2957 break;
2958 }
2959 ac=ac->next;
2960 }
2961 if (ac==NULL) {
2962 /* Didn't find one here! */
2963 /* special case for RAID 1, especially
2964 where there are more than 2
2965 components (where RAIDframe treats
2966 things a little differently :( ) */
2967 if (parity_type == '1') {
2968 if (c%2 == 0) { /* even component */
2969 even_pair_failed = 1;
2970 } else { /* odd component. If
2971 we're failed, and
2972 so is the even
2973 component, it's
2974 "Good Night, Charlie" */
2975 if (even_pair_failed == 1) {
2976 return(0);
2977 }
2978 }
2979 } else {
2980 /* normal accounting */
2981 num_missing++;
2982 }
2983 }
2984 if ((parity_type == '1') && (c%2 == 1)) {
2985 /* Just did an even component, and we didn't
2986 bail.. reset the even_pair_failed flag,
2987 and go on to the next component.... */
2988 even_pair_failed = 0;
2989 }
2990 }
2991
2992 clabel = cset->ac->clabel;
2993
2994 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
2995 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
2996 ((clabel->parityConfig == '5') && (num_missing > 1))) {
2997 /* XXX this needs to be made *much* more general */
2998 /* Too many failures */
2999 return(0);
3000 }
3001 /* otherwise, all is well, and we've got enough to take a kick
3002 at autoconfiguring this set */
3003 return(1);
3004 }
3005
3006 void
3007 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3008 RF_Raid_t *raidPtr)
3009 {
3010 RF_ComponentLabel_t *clabel;
3011 int i;
3012
3013 clabel = ac->clabel;
3014
3015 /* 1. Fill in the common stuff */
3016 config->numRow = clabel->num_rows = 1;
3017 config->numCol = clabel->num_columns;
3018 config->numSpare = 0; /* XXX should this be set here? */
3019 config->sectPerSU = clabel->sectPerSU;
3020 config->SUsPerPU = clabel->SUsPerPU;
3021 config->SUsPerRU = clabel->SUsPerRU;
3022 config->parityConfig = clabel->parityConfig;
3023 /* XXX... */
3024 strcpy(config->diskQueueType,"fifo");
3025 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3026 config->layoutSpecificSize = 0; /* XXX ?? */
3027
3028 while(ac!=NULL) {
3029 /* row/col values will be in range due to the checks
3030 in reasonable_label() */
3031 strcpy(config->devnames[0][ac->clabel->column],
3032 ac->devname);
3033 ac = ac->next;
3034 }
3035
3036 for(i=0;i<RF_MAXDBGV;i++) {
3037 config->debugVars[i][0] = 0;
3038 }
3039 }
3040
3041 int
3042 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3043 {
3044 RF_ComponentLabel_t clabel;
3045 struct vnode *vp;
3046 dev_t dev;
3047 int column;
3048 int sparecol;
3049
3050 raidPtr->autoconfigure = new_value;
3051
3052 for(column=0; column<raidPtr->numCol; column++) {
3053 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3054 dev = raidPtr->Disks[column].dev;
3055 vp = raidPtr->raid_cinfo[column].ci_vp;
3056 raidread_component_label(dev, vp, &clabel);
3057 clabel.autoconfigure = new_value;
3058 raidwrite_component_label(dev, vp, &clabel);
3059 }
3060 }
3061 for(column = 0; column < raidPtr->numSpare ; column++) {
3062 sparecol = raidPtr->numCol + column;
3063 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3064 dev = raidPtr->Disks[sparecol].dev;
3065 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3066 raidread_component_label(dev, vp, &clabel);
3067 clabel.autoconfigure = new_value;
3068 raidwrite_component_label(dev, vp, &clabel);
3069 }
3070 }
3071 return(new_value);
3072 }
3073
3074 int
3075 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3076 {
3077 RF_ComponentLabel_t clabel;
3078 struct vnode *vp;
3079 dev_t dev;
3080 int column;
3081 int sparecol;
3082
3083 raidPtr->root_partition = new_value;
3084 for(column=0; column<raidPtr->numCol; column++) {
3085 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3086 dev = raidPtr->Disks[column].dev;
3087 vp = raidPtr->raid_cinfo[column].ci_vp;
3088 raidread_component_label(dev, vp, &clabel);
3089 clabel.root_partition = new_value;
3090 raidwrite_component_label(dev, vp, &clabel);
3091 }
3092 }
3093 for(column = 0; column < raidPtr->numSpare ; column++) {
3094 sparecol = raidPtr->numCol + column;
3095 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3096 dev = raidPtr->Disks[sparecol].dev;
3097 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3098 raidread_component_label(dev, vp, &clabel);
3099 clabel.root_partition = new_value;
3100 raidwrite_component_label(dev, vp, &clabel);
3101 }
3102 }
3103 return(new_value);
3104 }
3105
3106 void
3107 rf_release_all_vps(RF_ConfigSet_t *cset)
3108 {
3109 RF_AutoConfig_t *ac;
3110
3111 ac = cset->ac;
3112 while(ac!=NULL) {
3113 /* Close the vp, and give it back */
3114 if (ac->vp) {
3115 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3116 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3117 vput(ac->vp);
3118 ac->vp = NULL;
3119 }
3120 ac = ac->next;
3121 }
3122 }
3123
3124
3125 void
3126 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3127 {
3128 RF_AutoConfig_t *ac;
3129 RF_AutoConfig_t *next_ac;
3130
3131 ac = cset->ac;
3132 while(ac!=NULL) {
3133 next_ac = ac->next;
3134 /* nuke the label */
3135 free(ac->clabel, M_RAIDFRAME);
3136 /* cleanup the config structure */
3137 free(ac, M_RAIDFRAME);
3138 /* "next.." */
3139 ac = next_ac;
3140 }
3141 /* and, finally, nuke the config set */
3142 free(cset, M_RAIDFRAME);
3143 }
3144
3145
3146 void
3147 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3148 {
3149 /* current version number */
3150 clabel->version = RF_COMPONENT_LABEL_VERSION;
3151 clabel->serial_number = raidPtr->serial_number;
3152 clabel->mod_counter = raidPtr->mod_counter;
3153 clabel->num_rows = 1;
3154 clabel->num_columns = raidPtr->numCol;
3155 clabel->clean = RF_RAID_DIRTY; /* not clean */
3156 clabel->status = rf_ds_optimal; /* "It's good!" */
3157
3158 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3159 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3160 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3161
3162 clabel->blockSize = raidPtr->bytesPerSector;
3163 clabel->numBlocks = raidPtr->sectorsPerDisk;
3164
3165 /* XXX not portable */
3166 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3167 clabel->maxOutstanding = raidPtr->maxOutstanding;
3168 clabel->autoconfigure = raidPtr->autoconfigure;
3169 clabel->root_partition = raidPtr->root_partition;
3170 clabel->last_unit = raidPtr->raidid;
3171 clabel->config_order = raidPtr->config_order;
3172 }
3173
3174 int
3175 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
3176 {
3177 RF_Raid_t *raidPtr;
3178 RF_Config_t *config;
3179 int raidID;
3180 int retcode;
3181
3182 #if DEBUG
3183 printf("RAID autoconfigure\n");
3184 #endif
3185
3186 retcode = 0;
3187 *unit = -1;
3188
3189 /* 1. Create a config structure */
3190
3191 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3192 M_RAIDFRAME,
3193 M_NOWAIT);
3194 if (config==NULL) {
3195 printf("Out of mem!?!?\n");
3196 /* XXX do something more intelligent here. */
3197 return(1);
3198 }
3199
3200 memset(config, 0, sizeof(RF_Config_t));
3201
3202 /*
3203 2. Figure out what RAID ID this one is supposed to live at
3204 See if we can get the same RAID dev that it was configured
3205 on last time..
3206 */
3207
3208 raidID = cset->ac->clabel->last_unit;
3209 if ((raidID < 0) || (raidID >= numraid)) {
3210 /* let's not wander off into lala land. */
3211 raidID = numraid - 1;
3212 }
3213 if (raidPtrs[raidID]->valid != 0) {
3214
3215 /*
3216 Nope... Go looking for an alternative...
3217 Start high so we don't immediately use raid0 if that's
3218 not taken.
3219 */
3220
3221 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3222 if (raidPtrs[raidID]->valid == 0) {
3223 /* can use this one! */
3224 break;
3225 }
3226 }
3227 }
3228
3229 if (raidID < 0) {
3230 /* punt... */
3231 printf("Unable to auto configure this set!\n");
3232 printf("(Out of RAID devs!)\n");
3233 return(1);
3234 }
3235
3236 #if DEBUG
3237 printf("Configuring raid%d:\n",raidID);
3238 #endif
3239
3240 raidPtr = raidPtrs[raidID];
3241
3242 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3243 raidPtr->raidid = raidID;
3244 raidPtr->openings = RAIDOUTSTANDING;
3245
3246 /* 3. Build the configuration structure */
3247 rf_create_configuration(cset->ac, config, raidPtr);
3248
3249 /* 4. Do the configuration */
3250 retcode = rf_Configure(raidPtr, config, cset->ac);
3251
3252 if (retcode == 0) {
3253
3254 raidinit(raidPtrs[raidID]);
3255
3256 rf_markalldirty(raidPtrs[raidID]);
3257 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3258 if (cset->ac->clabel->root_partition==1) {
3259 /* everything configured just fine. Make a note
3260 that this set is eligible to be root. */
3261 cset->rootable = 1;
3262 /* XXX do this here? */
3263 raidPtrs[raidID]->root_partition = 1;
3264 }
3265 }
3266
3267 /* 5. Cleanup */
3268 free(config, M_RAIDFRAME);
3269
3270 *unit = raidID;
3271 return(retcode);
3272 }
3273
3274 void
3275 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3276 {
3277 struct buf *bp;
3278
3279 bp = (struct buf *)desc->bp;
3280 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3281 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
3282 }
3283
3284 void
3285 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3286 size_t xmin, size_t xmax)
3287 {
3288 pool_init(p, size, 0, 0, 0, w_chan, NULL);
3289 pool_sethiwat(p, xmax);
3290 pool_prime(p, xmin);
3291 pool_setlowat(p, xmin);
3292 }
3293
3294 /*
3295 * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see
3296 * if there is IO pending and if that IO could possibly be done for a
3297 * given RAID set. Returns 0 if IO is waiting and can be done, 1
3298 * otherwise.
3299 *
3300 */
3301
3302 int
3303 rf_buf_queue_check(int raidid)
3304 {
3305 if ((BUFQ_PEEK(raid_softc[raidid].buf_queue) != NULL) &&
3306 raidPtrs[raidid]->openings > 0) {
3307 /* there is work to do */
3308 return 0;
3309 }
3310 /* default is nothing to do */
3311 return 1;
3312 }
3313