rf_netbsdkintf.c revision 1.168 1 /* $NetBSD: rf_netbsdkintf.c,v 1.168 2003/12/29 04:56:26 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1990, 1993
40 * The Regents of the University of California. All rights reserved.
41 *
42 * This code is derived from software contributed to Berkeley by
43 * the Systems Programming Group of the University of Utah Computer
44 * Science Department.
45 *
46 * Redistribution and use in source and binary forms, with or without
47 * modification, are permitted provided that the following conditions
48 * are met:
49 * 1. Redistributions of source code must retain the above copyright
50 * notice, this list of conditions and the following disclaimer.
51 * 2. Redistributions in binary form must reproduce the above copyright
52 * notice, this list of conditions and the following disclaimer in the
53 * documentation and/or other materials provided with the distribution.
54 * 3. Neither the name of the University nor the names of its contributors
55 * may be used to endorse or promote products derived from this software
56 * without specific prior written permission.
57 *
58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE.
69 *
70 * from: Utah $Hdr: cd.c 1.6 90/11/28$
71 *
72 * @(#)cd.c 8.2 (Berkeley) 11/16/93
73 */
74
75 /*
76 * Copyright (c) 1988 University of Utah.
77 *
78 * This code is derived from software contributed to Berkeley by
79 * the Systems Programming Group of the University of Utah Computer
80 * Science Department.
81 *
82 * Redistribution and use in source and binary forms, with or without
83 * modification, are permitted provided that the following conditions
84 * are met:
85 * 1. Redistributions of source code must retain the above copyright
86 * notice, this list of conditions and the following disclaimer.
87 * 2. Redistributions in binary form must reproduce the above copyright
88 * notice, this list of conditions and the following disclaimer in the
89 * documentation and/or other materials provided with the distribution.
90 * 3. All advertising materials mentioning features or use of this software
91 * must display the following acknowledgement:
92 * This product includes software developed by the University of
93 * California, Berkeley and its contributors.
94 * 4. Neither the name of the University nor the names of its contributors
95 * may be used to endorse or promote products derived from this software
96 * without specific prior written permission.
97 *
98 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
99 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
100 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
101 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
102 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
103 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
104 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
105 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
106 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
107 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
108 * SUCH DAMAGE.
109 *
110 * from: Utah $Hdr: cd.c 1.6 90/11/28$
111 *
112 * @(#)cd.c 8.2 (Berkeley) 11/16/93
113 */
114
115 /*
116 * Copyright (c) 1995 Carnegie-Mellon University.
117 * All rights reserved.
118 *
119 * Authors: Mark Holland, Jim Zelenka
120 *
121 * Permission to use, copy, modify and distribute this software and
122 * its documentation is hereby granted, provided that both the copyright
123 * notice and this permission notice appear in all copies of the
124 * software, derivative works or modified versions, and any portions
125 * thereof, and that both notices appear in supporting documentation.
126 *
127 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
128 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
129 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
130 *
131 * Carnegie Mellon requests users of this software to return to
132 *
133 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
134 * School of Computer Science
135 * Carnegie Mellon University
136 * Pittsburgh PA 15213-3890
137 *
138 * any improvements or extensions that they make and grant Carnegie the
139 * rights to redistribute these changes.
140 */
141
142 /***********************************************************
143 *
144 * rf_kintf.c -- the kernel interface routines for RAIDframe
145 *
146 ***********************************************************/
147
148 #include <sys/cdefs.h>
149 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.168 2003/12/29 04:56:26 oster Exp $");
150
151 #include <sys/param.h>
152 #include <sys/errno.h>
153 #include <sys/pool.h>
154 #include <sys/proc.h>
155 #include <sys/queue.h>
156 #include <sys/disk.h>
157 #include <sys/device.h>
158 #include <sys/stat.h>
159 #include <sys/ioctl.h>
160 #include <sys/fcntl.h>
161 #include <sys/systm.h>
162 #include <sys/namei.h>
163 #include <sys/vnode.h>
164 #include <sys/disklabel.h>
165 #include <sys/conf.h>
166 #include <sys/lock.h>
167 #include <sys/buf.h>
168 #include <sys/user.h>
169 #include <sys/reboot.h>
170
171 #include <dev/raidframe/raidframevar.h>
172 #include <dev/raidframe/raidframeio.h>
173 #include "raid.h"
174 #include "opt_raid_autoconfig.h"
175 #include "rf_raid.h"
176 #include "rf_copyback.h"
177 #include "rf_dag.h"
178 #include "rf_dagflags.h"
179 #include "rf_desc.h"
180 #include "rf_diskqueue.h"
181 #include "rf_etimer.h"
182 #include "rf_general.h"
183 #include "rf_kintf.h"
184 #include "rf_options.h"
185 #include "rf_driver.h"
186 #include "rf_parityscan.h"
187 #include "rf_threadstuff.h"
188
189 #ifdef DEBUG
190 int rf_kdebug_level = 0;
191 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
192 #else /* DEBUG */
193 #define db1_printf(a) { }
194 #endif /* DEBUG */
195
196 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
197
198 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
199
200 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
201 * spare table */
202 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
203 * installation process */
204
205 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
206
207 /* prototypes */
208 static void KernelWakeupFunc(struct buf * bp);
209 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
210 dev_t dev, RF_SectorNum_t startSect,
211 RF_SectorCount_t numSect, caddr_t buf,
212 void (*cbFunc) (struct buf *), void *cbArg,
213 int logBytesPerSector, struct proc * b_proc);
214 static void raidinit(RF_Raid_t *);
215
216 void raidattach(int);
217
218 dev_type_open(raidopen);
219 dev_type_close(raidclose);
220 dev_type_read(raidread);
221 dev_type_write(raidwrite);
222 dev_type_ioctl(raidioctl);
223 dev_type_strategy(raidstrategy);
224 dev_type_dump(raiddump);
225 dev_type_size(raidsize);
226
227 const struct bdevsw raid_bdevsw = {
228 raidopen, raidclose, raidstrategy, raidioctl,
229 raiddump, raidsize, D_DISK
230 };
231
232 const struct cdevsw raid_cdevsw = {
233 raidopen, raidclose, raidread, raidwrite, raidioctl,
234 nostop, notty, nopoll, nommap, nokqfilter, D_DISK
235 };
236
237 /*
238 * Pilfered from ccd.c
239 */
240
241 struct raidbuf {
242 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
243 struct buf *rf_obp; /* ptr. to original I/O buf */
244 RF_DiskQueueData_t *req;/* the request that this was part of.. */
245 };
246
247 /* component buffer pool */
248 struct pool raidframe_cbufpool;
249
250 /* XXX Not sure if the following should be replacing the raidPtrs above,
251 or if it should be used in conjunction with that...
252 */
253
254 struct raid_softc {
255 int sc_flags; /* flags */
256 int sc_cflags; /* configuration flags */
257 size_t sc_size; /* size of the raid device */
258 char sc_xname[20]; /* XXX external name */
259 struct disk sc_dkdev; /* generic disk device info */
260 struct bufq_state buf_queue; /* used for the device queue */
261 };
262 /* sc_flags */
263 #define RAIDF_INITED 0x01 /* unit has been initialized */
264 #define RAIDF_WLABEL 0x02 /* label area is writable */
265 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
266 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
267 #define RAIDF_LOCKED 0x80 /* unit is locked */
268
269 #define raidunit(x) DISKUNIT(x)
270 int numraid = 0;
271
272 /*
273 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
274 * Be aware that large numbers can allow the driver to consume a lot of
275 * kernel memory, especially on writes, and in degraded mode reads.
276 *
277 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
278 * a single 64K write will typically require 64K for the old data,
279 * 64K for the old parity, and 64K for the new parity, for a total
280 * of 192K (if the parity buffer is not re-used immediately).
281 * Even it if is used immediately, that's still 128K, which when multiplied
282 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
283 *
284 * Now in degraded mode, for example, a 64K read on the above setup may
285 * require data reconstruction, which will require *all* of the 4 remaining
286 * disks to participate -- 4 * 32K/disk == 128K again.
287 */
288
289 #ifndef RAIDOUTSTANDING
290 #define RAIDOUTSTANDING 6
291 #endif
292
293 #define RAIDLABELDEV(dev) \
294 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
295
296 /* declared here, and made public, for the benefit of KVM stuff.. */
297 struct raid_softc *raid_softc;
298
299 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
300 struct disklabel *);
301 static void raidgetdisklabel(dev_t);
302 static void raidmakedisklabel(struct raid_softc *);
303
304 static int raidlock(struct raid_softc *);
305 static void raidunlock(struct raid_softc *);
306
307 static void rf_markalldirty(RF_Raid_t *);
308
309 struct device *raidrootdev;
310
311 void rf_ReconThread(struct rf_recon_req *);
312 /* XXX what I want is: */
313 /*void rf_ReconThread(RF_Raid_t *raidPtr); */
314 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
315 void rf_CopybackThread(RF_Raid_t *raidPtr);
316 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
317 int rf_autoconfig(struct device *self);
318 void rf_buildroothack(RF_ConfigSet_t *);
319
320 RF_AutoConfig_t *rf_find_raid_components(void);
321 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
322 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
323 static int rf_reasonable_label(RF_ComponentLabel_t *);
324 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
325 int rf_set_autoconfig(RF_Raid_t *, int);
326 int rf_set_rootpartition(RF_Raid_t *, int);
327 void rf_release_all_vps(RF_ConfigSet_t *);
328 void rf_cleanup_config_set(RF_ConfigSet_t *);
329 int rf_have_enough_components(RF_ConfigSet_t *);
330 int rf_auto_config_set(RF_ConfigSet_t *, int *);
331
332 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
333 allow autoconfig to take place.
334 Note that this is overridden by having
335 RAID_AUTOCONFIG as an option in the
336 kernel config file. */
337
338 void
339 raidattach(num)
340 int num;
341 {
342 int raidID;
343 int i, rc;
344
345 #ifdef DEBUG
346 printf("raidattach: Asked for %d units\n", num);
347 #endif
348
349 if (num <= 0) {
350 #ifdef DIAGNOSTIC
351 panic("raidattach: count <= 0");
352 #endif
353 return;
354 }
355 /* This is where all the initialization stuff gets done. */
356
357 numraid = num;
358
359 /* Make some space for requested number of units... */
360
361 RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
362 if (raidPtrs == NULL) {
363 panic("raidPtrs is NULL!!");
364 }
365
366 /* Initialize the component buffer pool. */
367 pool_init(&raidframe_cbufpool, sizeof(struct raidbuf), 0,
368 0, 0, "raidpl", NULL);
369
370 rf_mutex_init(&rf_sparet_wait_mutex);
371
372 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
373
374 for (i = 0; i < num; i++)
375 raidPtrs[i] = NULL;
376 rc = rf_BootRaidframe();
377 if (rc == 0)
378 printf("Kernelized RAIDframe activated\n");
379 else
380 panic("Serious error booting RAID!!");
381
382 /* put together some datastructures like the CCD device does.. This
383 * lets us lock the device and what-not when it gets opened. */
384
385 raid_softc = (struct raid_softc *)
386 malloc(num * sizeof(struct raid_softc),
387 M_RAIDFRAME, M_NOWAIT);
388 if (raid_softc == NULL) {
389 printf("WARNING: no memory for RAIDframe driver\n");
390 return;
391 }
392
393 memset(raid_softc, 0, num * sizeof(struct raid_softc));
394
395 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
396 M_RAIDFRAME, M_NOWAIT);
397 if (raidrootdev == NULL) {
398 panic("No memory for RAIDframe driver!!?!?!");
399 }
400
401 for (raidID = 0; raidID < num; raidID++) {
402 bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_FCFS);
403
404 raidrootdev[raidID].dv_class = DV_DISK;
405 raidrootdev[raidID].dv_cfdata = NULL;
406 raidrootdev[raidID].dv_unit = raidID;
407 raidrootdev[raidID].dv_parent = NULL;
408 raidrootdev[raidID].dv_flags = 0;
409 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
410
411 RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
412 (RF_Raid_t *));
413 if (raidPtrs[raidID] == NULL) {
414 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
415 numraid = raidID;
416 return;
417 }
418 }
419
420 #ifdef RAID_AUTOCONFIG
421 raidautoconfig = 1;
422 #endif
423
424 /*
425 * Register a finalizer which will be used to auto-config RAID
426 * sets once all real hardware devices have been found.
427 */
428 if (config_finalize_register(NULL, rf_autoconfig) != 0)
429 printf("WARNING: unable to register RAIDframe finalizer\n");
430 }
431
432 int
433 rf_autoconfig(struct device *self)
434 {
435 RF_AutoConfig_t *ac_list;
436 RF_ConfigSet_t *config_sets;
437
438 if (raidautoconfig == 0)
439 return (0);
440
441 /* XXX This code can only be run once. */
442 raidautoconfig = 0;
443
444 /* 1. locate all RAID components on the system */
445 #ifdef DEBUG
446 printf("Searching for RAID components...\n");
447 #endif
448 ac_list = rf_find_raid_components();
449
450 /* 2. Sort them into their respective sets. */
451 config_sets = rf_create_auto_sets(ac_list);
452
453 /*
454 * 3. Evaluate each set andconfigure the valid ones.
455 * This gets done in rf_buildroothack().
456 */
457 rf_buildroothack(config_sets);
458
459 return (1);
460 }
461
462 void
463 rf_buildroothack(RF_ConfigSet_t *config_sets)
464 {
465 RF_ConfigSet_t *cset;
466 RF_ConfigSet_t *next_cset;
467 int retcode;
468 int raidID;
469 int rootID;
470 int num_root;
471
472 rootID = 0;
473 num_root = 0;
474 cset = config_sets;
475 while(cset != NULL ) {
476 next_cset = cset->next;
477 if (rf_have_enough_components(cset) &&
478 cset->ac->clabel->autoconfigure==1) {
479 retcode = rf_auto_config_set(cset,&raidID);
480 if (!retcode) {
481 if (cset->rootable) {
482 rootID = raidID;
483 num_root++;
484 }
485 } else {
486 /* The autoconfig didn't work :( */
487 #if DEBUG
488 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
489 #endif
490 rf_release_all_vps(cset);
491 }
492 } else {
493 /* we're not autoconfiguring this set...
494 release the associated resources */
495 rf_release_all_vps(cset);
496 }
497 /* cleanup */
498 rf_cleanup_config_set(cset);
499 cset = next_cset;
500 }
501
502 /* we found something bootable... */
503
504 if (num_root == 1) {
505 booted_device = &raidrootdev[rootID];
506 } else if (num_root > 1) {
507 /* we can't guess.. require the user to answer... */
508 boothowto |= RB_ASKNAME;
509 }
510 }
511
512
513 int
514 raidsize(dev)
515 dev_t dev;
516 {
517 struct raid_softc *rs;
518 struct disklabel *lp;
519 int part, unit, omask, size;
520
521 unit = raidunit(dev);
522 if (unit >= numraid)
523 return (-1);
524 rs = &raid_softc[unit];
525
526 if ((rs->sc_flags & RAIDF_INITED) == 0)
527 return (-1);
528
529 part = DISKPART(dev);
530 omask = rs->sc_dkdev.dk_openmask & (1 << part);
531 lp = rs->sc_dkdev.dk_label;
532
533 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
534 return (-1);
535
536 if (lp->d_partitions[part].p_fstype != FS_SWAP)
537 size = -1;
538 else
539 size = lp->d_partitions[part].p_size *
540 (lp->d_secsize / DEV_BSIZE);
541
542 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
543 return (-1);
544
545 return (size);
546
547 }
548
549 int
550 raiddump(dev, blkno, va, size)
551 dev_t dev;
552 daddr_t blkno;
553 caddr_t va;
554 size_t size;
555 {
556 /* Not implemented. */
557 return ENXIO;
558 }
559 /* ARGSUSED */
560 int
561 raidopen(dev, flags, fmt, p)
562 dev_t dev;
563 int flags, fmt;
564 struct proc *p;
565 {
566 int unit = raidunit(dev);
567 struct raid_softc *rs;
568 struct disklabel *lp;
569 int part, pmask;
570 int error = 0;
571
572 if (unit >= numraid)
573 return (ENXIO);
574 rs = &raid_softc[unit];
575
576 if ((error = raidlock(rs)) != 0)
577 return (error);
578 lp = rs->sc_dkdev.dk_label;
579
580 part = DISKPART(dev);
581 pmask = (1 << part);
582
583 if ((rs->sc_flags & RAIDF_INITED) &&
584 (rs->sc_dkdev.dk_openmask == 0))
585 raidgetdisklabel(dev);
586
587 /* make sure that this partition exists */
588
589 if (part != RAW_PART) {
590 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
591 ((part >= lp->d_npartitions) ||
592 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
593 error = ENXIO;
594 raidunlock(rs);
595 return (error);
596 }
597 }
598 /* Prevent this unit from being unconfigured while open. */
599 switch (fmt) {
600 case S_IFCHR:
601 rs->sc_dkdev.dk_copenmask |= pmask;
602 break;
603
604 case S_IFBLK:
605 rs->sc_dkdev.dk_bopenmask |= pmask;
606 break;
607 }
608
609 if ((rs->sc_dkdev.dk_openmask == 0) &&
610 ((rs->sc_flags & RAIDF_INITED) != 0)) {
611 /* First one... mark things as dirty... Note that we *MUST*
612 have done a configure before this. I DO NOT WANT TO BE
613 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
614 THAT THEY BELONG TOGETHER!!!!! */
615 /* XXX should check to see if we're only open for reading
616 here... If so, we needn't do this, but then need some
617 other way of keeping track of what's happened.. */
618
619 rf_markalldirty( raidPtrs[unit] );
620 }
621
622
623 rs->sc_dkdev.dk_openmask =
624 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
625
626 raidunlock(rs);
627
628 return (error);
629
630
631 }
632 /* ARGSUSED */
633 int
634 raidclose(dev, flags, fmt, p)
635 dev_t dev;
636 int flags, fmt;
637 struct proc *p;
638 {
639 int unit = raidunit(dev);
640 struct raid_softc *rs;
641 int error = 0;
642 int part;
643
644 if (unit >= numraid)
645 return (ENXIO);
646 rs = &raid_softc[unit];
647
648 if ((error = raidlock(rs)) != 0)
649 return (error);
650
651 part = DISKPART(dev);
652
653 /* ...that much closer to allowing unconfiguration... */
654 switch (fmt) {
655 case S_IFCHR:
656 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
657 break;
658
659 case S_IFBLK:
660 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
661 break;
662 }
663 rs->sc_dkdev.dk_openmask =
664 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
665
666 if ((rs->sc_dkdev.dk_openmask == 0) &&
667 ((rs->sc_flags & RAIDF_INITED) != 0)) {
668 /* Last one... device is not unconfigured yet.
669 Device shutdown has taken care of setting the
670 clean bits if RAIDF_INITED is not set
671 mark things as clean... */
672
673 rf_update_component_labels(raidPtrs[unit],
674 RF_FINAL_COMPONENT_UPDATE);
675 if (doing_shutdown) {
676 /* last one, and we're going down, so
677 lights out for this RAID set too. */
678 error = rf_Shutdown(raidPtrs[unit]);
679
680 /* It's no longer initialized... */
681 rs->sc_flags &= ~RAIDF_INITED;
682
683 /* Detach the disk. */
684 disk_detach(&rs->sc_dkdev);
685 }
686 }
687
688 raidunlock(rs);
689 return (0);
690
691 }
692
693 void
694 raidstrategy(bp)
695 struct buf *bp;
696 {
697 int s;
698
699 unsigned int raidID = raidunit(bp->b_dev);
700 RF_Raid_t *raidPtr;
701 struct raid_softc *rs = &raid_softc[raidID];
702 int wlabel;
703
704 if ((rs->sc_flags & RAIDF_INITED) ==0) {
705 bp->b_error = ENXIO;
706 bp->b_flags |= B_ERROR;
707 bp->b_resid = bp->b_bcount;
708 biodone(bp);
709 return;
710 }
711 if (raidID >= numraid || !raidPtrs[raidID]) {
712 bp->b_error = ENODEV;
713 bp->b_flags |= B_ERROR;
714 bp->b_resid = bp->b_bcount;
715 biodone(bp);
716 return;
717 }
718 raidPtr = raidPtrs[raidID];
719 if (!raidPtr->valid) {
720 bp->b_error = ENODEV;
721 bp->b_flags |= B_ERROR;
722 bp->b_resid = bp->b_bcount;
723 biodone(bp);
724 return;
725 }
726 if (bp->b_bcount == 0) {
727 db1_printf(("b_bcount is zero..\n"));
728 biodone(bp);
729 return;
730 }
731
732 /*
733 * Do bounds checking and adjust transfer. If there's an
734 * error, the bounds check will flag that for us.
735 */
736
737 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
738 if (DISKPART(bp->b_dev) != RAW_PART)
739 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
740 db1_printf(("Bounds check failed!!:%d %d\n",
741 (int) bp->b_blkno, (int) wlabel));
742 biodone(bp);
743 return;
744 }
745 s = splbio();
746
747 bp->b_resid = 0;
748
749 /* stuff it onto our queue */
750 BUFQ_PUT(&rs->buf_queue, bp);
751
752 raidstart(raidPtrs[raidID]);
753
754 splx(s);
755 }
756 /* ARGSUSED */
757 int
758 raidread(dev, uio, flags)
759 dev_t dev;
760 struct uio *uio;
761 int flags;
762 {
763 int unit = raidunit(dev);
764 struct raid_softc *rs;
765
766 if (unit >= numraid)
767 return (ENXIO);
768 rs = &raid_softc[unit];
769
770 if ((rs->sc_flags & RAIDF_INITED) == 0)
771 return (ENXIO);
772
773 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
774
775 }
776 /* ARGSUSED */
777 int
778 raidwrite(dev, uio, flags)
779 dev_t dev;
780 struct uio *uio;
781 int flags;
782 {
783 int unit = raidunit(dev);
784 struct raid_softc *rs;
785
786 if (unit >= numraid)
787 return (ENXIO);
788 rs = &raid_softc[unit];
789
790 if ((rs->sc_flags & RAIDF_INITED) == 0)
791 return (ENXIO);
792
793 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
794
795 }
796
797 int
798 raidioctl(dev, cmd, data, flag, p)
799 dev_t dev;
800 u_long cmd;
801 caddr_t data;
802 int flag;
803 struct proc *p;
804 {
805 int unit = raidunit(dev);
806 int error = 0;
807 int part, pmask;
808 struct raid_softc *rs;
809 RF_Config_t *k_cfg, *u_cfg;
810 RF_Raid_t *raidPtr;
811 RF_RaidDisk_t *diskPtr;
812 RF_AccTotals_t *totals;
813 RF_DeviceConfig_t *d_cfg, **ucfgp;
814 u_char *specific_buf;
815 int retcode = 0;
816 int column;
817 int raidid;
818 struct rf_recon_req *rrcopy, *rr;
819 RF_ComponentLabel_t *clabel;
820 RF_ComponentLabel_t ci_label;
821 RF_ComponentLabel_t **clabel_ptr;
822 RF_SingleComponent_t *sparePtr,*componentPtr;
823 RF_SingleComponent_t hot_spare;
824 RF_SingleComponent_t component;
825 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
826 int i, j, d;
827 #ifdef __HAVE_OLD_DISKLABEL
828 struct disklabel newlabel;
829 #endif
830
831 if (unit >= numraid)
832 return (ENXIO);
833 rs = &raid_softc[unit];
834 raidPtr = raidPtrs[unit];
835
836 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
837 (int) DISKPART(dev), (int) unit, (int) cmd));
838
839 /* Must be open for writes for these commands... */
840 switch (cmd) {
841 case DIOCSDINFO:
842 case DIOCWDINFO:
843 #ifdef __HAVE_OLD_DISKLABEL
844 case ODIOCWDINFO:
845 case ODIOCSDINFO:
846 #endif
847 case DIOCWLABEL:
848 if ((flag & FWRITE) == 0)
849 return (EBADF);
850 }
851
852 /* Must be initialized for these... */
853 switch (cmd) {
854 case DIOCGDINFO:
855 case DIOCSDINFO:
856 case DIOCWDINFO:
857 #ifdef __HAVE_OLD_DISKLABEL
858 case ODIOCGDINFO:
859 case ODIOCWDINFO:
860 case ODIOCSDINFO:
861 case ODIOCGDEFLABEL:
862 #endif
863 case DIOCGPART:
864 case DIOCWLABEL:
865 case DIOCGDEFLABEL:
866 case RAIDFRAME_SHUTDOWN:
867 case RAIDFRAME_REWRITEPARITY:
868 case RAIDFRAME_GET_INFO:
869 case RAIDFRAME_RESET_ACCTOTALS:
870 case RAIDFRAME_GET_ACCTOTALS:
871 case RAIDFRAME_KEEP_ACCTOTALS:
872 case RAIDFRAME_GET_SIZE:
873 case RAIDFRAME_FAIL_DISK:
874 case RAIDFRAME_COPYBACK:
875 case RAIDFRAME_CHECK_RECON_STATUS:
876 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
877 case RAIDFRAME_GET_COMPONENT_LABEL:
878 case RAIDFRAME_SET_COMPONENT_LABEL:
879 case RAIDFRAME_ADD_HOT_SPARE:
880 case RAIDFRAME_REMOVE_HOT_SPARE:
881 case RAIDFRAME_INIT_LABELS:
882 case RAIDFRAME_REBUILD_IN_PLACE:
883 case RAIDFRAME_CHECK_PARITY:
884 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
885 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
886 case RAIDFRAME_CHECK_COPYBACK_STATUS:
887 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
888 case RAIDFRAME_SET_AUTOCONFIG:
889 case RAIDFRAME_SET_ROOT:
890 case RAIDFRAME_DELETE_COMPONENT:
891 case RAIDFRAME_INCORPORATE_HOT_SPARE:
892 if ((rs->sc_flags & RAIDF_INITED) == 0)
893 return (ENXIO);
894 }
895
896 switch (cmd) {
897
898 /* configure the system */
899 case RAIDFRAME_CONFIGURE:
900
901 if (raidPtr->valid) {
902 /* There is a valid RAID set running on this unit! */
903 printf("raid%d: Device already configured!\n",unit);
904 return(EINVAL);
905 }
906
907 /* copy-in the configuration information */
908 /* data points to a pointer to the configuration structure */
909
910 u_cfg = *((RF_Config_t **) data);
911 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
912 if (k_cfg == NULL) {
913 return (ENOMEM);
914 }
915 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
916 if (retcode) {
917 RF_Free(k_cfg, sizeof(RF_Config_t));
918 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
919 retcode));
920 return (retcode);
921 }
922 /* allocate a buffer for the layout-specific data, and copy it
923 * in */
924 if (k_cfg->layoutSpecificSize) {
925 if (k_cfg->layoutSpecificSize > 10000) {
926 /* sanity check */
927 RF_Free(k_cfg, sizeof(RF_Config_t));
928 return (EINVAL);
929 }
930 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
931 (u_char *));
932 if (specific_buf == NULL) {
933 RF_Free(k_cfg, sizeof(RF_Config_t));
934 return (ENOMEM);
935 }
936 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
937 k_cfg->layoutSpecificSize);
938 if (retcode) {
939 RF_Free(k_cfg, sizeof(RF_Config_t));
940 RF_Free(specific_buf,
941 k_cfg->layoutSpecificSize);
942 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
943 retcode));
944 return (retcode);
945 }
946 } else
947 specific_buf = NULL;
948 k_cfg->layoutSpecific = specific_buf;
949
950 /* should do some kind of sanity check on the configuration.
951 * Store the sum of all the bytes in the last byte? */
952
953 /* configure the system */
954
955 /*
956 * Clear the entire RAID descriptor, just to make sure
957 * there is no stale data left in the case of a
958 * reconfiguration
959 */
960 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
961 raidPtr->raidid = unit;
962
963 retcode = rf_Configure(raidPtr, k_cfg, NULL);
964
965 if (retcode == 0) {
966
967 /* allow this many simultaneous IO's to
968 this RAID device */
969 raidPtr->openings = RAIDOUTSTANDING;
970
971 raidinit(raidPtr);
972 rf_markalldirty(raidPtr);
973 }
974 /* free the buffers. No return code here. */
975 if (k_cfg->layoutSpecificSize) {
976 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
977 }
978 RF_Free(k_cfg, sizeof(RF_Config_t));
979
980 return (retcode);
981
982 /* shutdown the system */
983 case RAIDFRAME_SHUTDOWN:
984
985 if ((error = raidlock(rs)) != 0)
986 return (error);
987
988 /*
989 * If somebody has a partition mounted, we shouldn't
990 * shutdown.
991 */
992
993 part = DISKPART(dev);
994 pmask = (1 << part);
995 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
996 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
997 (rs->sc_dkdev.dk_copenmask & pmask))) {
998 raidunlock(rs);
999 return (EBUSY);
1000 }
1001
1002 retcode = rf_Shutdown(raidPtr);
1003
1004 /* It's no longer initialized... */
1005 rs->sc_flags &= ~RAIDF_INITED;
1006
1007 /* Detach the disk. */
1008 disk_detach(&rs->sc_dkdev);
1009
1010 raidunlock(rs);
1011
1012 return (retcode);
1013 case RAIDFRAME_GET_COMPONENT_LABEL:
1014 clabel_ptr = (RF_ComponentLabel_t **) data;
1015 /* need to read the component label for the disk indicated
1016 by row,column in clabel */
1017
1018 /* For practice, let's get it directly fromdisk, rather
1019 than from the in-core copy */
1020 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
1021 (RF_ComponentLabel_t *));
1022 if (clabel == NULL)
1023 return (ENOMEM);
1024
1025 memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
1026
1027 retcode = copyin( *clabel_ptr, clabel,
1028 sizeof(RF_ComponentLabel_t));
1029
1030 if (retcode) {
1031 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1032 return(retcode);
1033 }
1034
1035 clabel->row = 0; /* Don't allow looking at anything else.*/
1036
1037 column = clabel->column;
1038
1039 if ((column < 0) || (column >= raidPtr->numCol +
1040 raidPtr->numSpare)) {
1041 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1042 return(EINVAL);
1043 }
1044
1045 raidread_component_label(raidPtr->Disks[column].dev,
1046 raidPtr->raid_cinfo[column].ci_vp,
1047 clabel );
1048
1049 retcode = copyout(clabel, *clabel_ptr,
1050 sizeof(RF_ComponentLabel_t));
1051 RF_Free(clabel, sizeof(RF_ComponentLabel_t));
1052 return (retcode);
1053
1054 case RAIDFRAME_SET_COMPONENT_LABEL:
1055 clabel = (RF_ComponentLabel_t *) data;
1056
1057 /* XXX check the label for valid stuff... */
1058 /* Note that some things *should not* get modified --
1059 the user should be re-initing the labels instead of
1060 trying to patch things.
1061 */
1062
1063 raidid = raidPtr->raidid;
1064 printf("raid%d: Got component label:\n", raidid);
1065 printf("raid%d: Version: %d\n", raidid, clabel->version);
1066 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1067 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1068 printf("raid%d: Column: %d\n", raidid, clabel->column);
1069 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1070 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1071 printf("raid%d: Status: %d\n", raidid, clabel->status);
1072
1073 clabel->row = 0;
1074 column = clabel->column;
1075
1076 if ((column < 0) || (column >= raidPtr->numCol)) {
1077 return(EINVAL);
1078 }
1079
1080 /* XXX this isn't allowed to do anything for now :-) */
1081
1082 /* XXX and before it is, we need to fill in the rest
1083 of the fields!?!?!?! */
1084 #if 0
1085 raidwrite_component_label(
1086 raidPtr->Disks[column].dev,
1087 raidPtr->raid_cinfo[column].ci_vp,
1088 clabel );
1089 #endif
1090 return (0);
1091
1092 case RAIDFRAME_INIT_LABELS:
1093 clabel = (RF_ComponentLabel_t *) data;
1094 /*
1095 we only want the serial number from
1096 the above. We get all the rest of the information
1097 from the config that was used to create this RAID
1098 set.
1099 */
1100
1101 raidPtr->serial_number = clabel->serial_number;
1102
1103 raid_init_component_label(raidPtr, &ci_label);
1104 ci_label.serial_number = clabel->serial_number;
1105 ci_label.row = 0; /* we dont' pretend to support more */
1106
1107 for(column=0;column<raidPtr->numCol;column++) {
1108 diskPtr = &raidPtr->Disks[column];
1109 if (!RF_DEAD_DISK(diskPtr->status)) {
1110 ci_label.partitionSize = diskPtr->partitionSize;
1111 ci_label.column = column;
1112 raidwrite_component_label(
1113 raidPtr->Disks[column].dev,
1114 raidPtr->raid_cinfo[column].ci_vp,
1115 &ci_label );
1116 }
1117 }
1118
1119 return (retcode);
1120 case RAIDFRAME_SET_AUTOCONFIG:
1121 d = rf_set_autoconfig(raidPtr, *(int *) data);
1122 printf("raid%d: New autoconfig value is: %d\n",
1123 raidPtr->raidid, d);
1124 *(int *) data = d;
1125 return (retcode);
1126
1127 case RAIDFRAME_SET_ROOT:
1128 d = rf_set_rootpartition(raidPtr, *(int *) data);
1129 printf("raid%d: New rootpartition value is: %d\n",
1130 raidPtr->raidid, d);
1131 *(int *) data = d;
1132 return (retcode);
1133
1134 /* initialize all parity */
1135 case RAIDFRAME_REWRITEPARITY:
1136
1137 if (raidPtr->Layout.map->faultsTolerated == 0) {
1138 /* Parity for RAID 0 is trivially correct */
1139 raidPtr->parity_good = RF_RAID_CLEAN;
1140 return(0);
1141 }
1142
1143 if (raidPtr->parity_rewrite_in_progress == 1) {
1144 /* Re-write is already in progress! */
1145 return(EINVAL);
1146 }
1147
1148 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1149 rf_RewriteParityThread,
1150 raidPtr,"raid_parity");
1151 return (retcode);
1152
1153
1154 case RAIDFRAME_ADD_HOT_SPARE:
1155 sparePtr = (RF_SingleComponent_t *) data;
1156 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1157 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1158 return(retcode);
1159
1160 case RAIDFRAME_REMOVE_HOT_SPARE:
1161 return(retcode);
1162
1163 case RAIDFRAME_DELETE_COMPONENT:
1164 componentPtr = (RF_SingleComponent_t *)data;
1165 memcpy( &component, componentPtr,
1166 sizeof(RF_SingleComponent_t));
1167 retcode = rf_delete_component(raidPtr, &component);
1168 return(retcode);
1169
1170 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1171 componentPtr = (RF_SingleComponent_t *)data;
1172 memcpy( &component, componentPtr,
1173 sizeof(RF_SingleComponent_t));
1174 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1175 return(retcode);
1176
1177 case RAIDFRAME_REBUILD_IN_PLACE:
1178
1179 if (raidPtr->Layout.map->faultsTolerated == 0) {
1180 /* Can't do this on a RAID 0!! */
1181 return(EINVAL);
1182 }
1183
1184 if (raidPtr->recon_in_progress == 1) {
1185 /* a reconstruct is already in progress! */
1186 return(EINVAL);
1187 }
1188
1189 componentPtr = (RF_SingleComponent_t *) data;
1190 memcpy( &component, componentPtr,
1191 sizeof(RF_SingleComponent_t));
1192 component.row = 0; /* we don't support any more */
1193 column = component.column;
1194
1195 if ((column < 0) || (column >= raidPtr->numCol)) {
1196 return(EINVAL);
1197 }
1198
1199 RF_LOCK_MUTEX(raidPtr->mutex);
1200 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1201 (raidPtr->numFailures > 0)) {
1202 /* XXX 0 above shouldn't be constant!!! */
1203 /* some component other than this has failed.
1204 Let's not make things worse than they already
1205 are... */
1206 printf("raid%d: Unable to reconstruct to disk at:\n",
1207 raidPtr->raidid);
1208 printf("raid%d: Col: %d Too many failures.\n",
1209 raidPtr->raidid, column);
1210 RF_UNLOCK_MUTEX(raidPtr->mutex);
1211 return (EINVAL);
1212 }
1213 if (raidPtr->Disks[column].status ==
1214 rf_ds_reconstructing) {
1215 printf("raid%d: Unable to reconstruct to disk at:\n",
1216 raidPtr->raidid);
1217 printf("raid%d: Col: %d Reconstruction already occuring!\n", raidPtr->raidid, column);
1218
1219 RF_UNLOCK_MUTEX(raidPtr->mutex);
1220 return (EINVAL);
1221 }
1222 if (raidPtr->Disks[column].status == rf_ds_spared) {
1223 RF_UNLOCK_MUTEX(raidPtr->mutex);
1224 return (EINVAL);
1225 }
1226 RF_UNLOCK_MUTEX(raidPtr->mutex);
1227
1228 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1229 if (rrcopy == NULL)
1230 return(ENOMEM);
1231
1232 rrcopy->raidPtr = (void *) raidPtr;
1233 rrcopy->col = column;
1234
1235 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1236 rf_ReconstructInPlaceThread,
1237 rrcopy,"raid_reconip");
1238 return(retcode);
1239
1240 case RAIDFRAME_GET_INFO:
1241 if (!raidPtr->valid)
1242 return (ENODEV);
1243 ucfgp = (RF_DeviceConfig_t **) data;
1244 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1245 (RF_DeviceConfig_t *));
1246 if (d_cfg == NULL)
1247 return (ENOMEM);
1248 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
1249 d_cfg->rows = 1; /* there is only 1 row now */
1250 d_cfg->cols = raidPtr->numCol;
1251 d_cfg->ndevs = raidPtr->numCol;
1252 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1253 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1254 return (ENOMEM);
1255 }
1256 d_cfg->nspares = raidPtr->numSpare;
1257 if (d_cfg->nspares >= RF_MAX_DISKS) {
1258 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1259 return (ENOMEM);
1260 }
1261 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1262 d = 0;
1263 for (j = 0; j < d_cfg->cols; j++) {
1264 d_cfg->devs[d] = raidPtr->Disks[j];
1265 d++;
1266 }
1267 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1268 d_cfg->spares[i] = raidPtr->Disks[j];
1269 }
1270 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1271 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1272
1273 return (retcode);
1274
1275 case RAIDFRAME_CHECK_PARITY:
1276 *(int *) data = raidPtr->parity_good;
1277 return (0);
1278
1279 case RAIDFRAME_RESET_ACCTOTALS:
1280 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1281 return (0);
1282
1283 case RAIDFRAME_GET_ACCTOTALS:
1284 totals = (RF_AccTotals_t *) data;
1285 *totals = raidPtr->acc_totals;
1286 return (0);
1287
1288 case RAIDFRAME_KEEP_ACCTOTALS:
1289 raidPtr->keep_acc_totals = *(int *)data;
1290 return (0);
1291
1292 case RAIDFRAME_GET_SIZE:
1293 *(int *) data = raidPtr->totalSectors;
1294 return (0);
1295
1296 /* fail a disk & optionally start reconstruction */
1297 case RAIDFRAME_FAIL_DISK:
1298
1299 if (raidPtr->Layout.map->faultsTolerated == 0) {
1300 /* Can't do this on a RAID 0!! */
1301 return(EINVAL);
1302 }
1303
1304 rr = (struct rf_recon_req *) data;
1305 rr->row = 0;
1306 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1307 return (EINVAL);
1308
1309
1310 RF_LOCK_MUTEX(raidPtr->mutex);
1311 if ((raidPtr->Disks[rr->col].status ==
1312 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1313 /* some other component has failed. Let's not make
1314 things worse. XXX wrong for RAID6 */
1315 RF_UNLOCK_MUTEX(raidPtr->mutex);
1316 return (EINVAL);
1317 }
1318 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1319 /* Can't fail a spared disk! */
1320 RF_UNLOCK_MUTEX(raidPtr->mutex);
1321 return (EINVAL);
1322 }
1323 RF_UNLOCK_MUTEX(raidPtr->mutex);
1324
1325 /* make a copy of the recon request so that we don't rely on
1326 * the user's buffer */
1327 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1328 if (rrcopy == NULL)
1329 return(ENOMEM);
1330 memcpy(rrcopy, rr, sizeof(*rr));
1331 rrcopy->raidPtr = (void *) raidPtr;
1332
1333 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1334 rf_ReconThread,
1335 rrcopy,"raid_recon");
1336 return (0);
1337
1338 /* invoke a copyback operation after recon on whatever disk
1339 * needs it, if any */
1340 case RAIDFRAME_COPYBACK:
1341
1342 if (raidPtr->Layout.map->faultsTolerated == 0) {
1343 /* This makes no sense on a RAID 0!! */
1344 return(EINVAL);
1345 }
1346
1347 if (raidPtr->copyback_in_progress == 1) {
1348 /* Copyback is already in progress! */
1349 return(EINVAL);
1350 }
1351
1352 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1353 rf_CopybackThread,
1354 raidPtr,"raid_copyback");
1355 return (retcode);
1356
1357 /* return the percentage completion of reconstruction */
1358 case RAIDFRAME_CHECK_RECON_STATUS:
1359 if (raidPtr->Layout.map->faultsTolerated == 0) {
1360 /* This makes no sense on a RAID 0, so tell the
1361 user it's done. */
1362 *(int *) data = 100;
1363 return(0);
1364 }
1365 if (raidPtr->status != rf_rs_reconstructing)
1366 *(int *) data = 100;
1367 else
1368 *(int *) data = raidPtr->reconControl->percentComplete;
1369 return (0);
1370 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1371 progressInfoPtr = (RF_ProgressInfo_t **) data;
1372 if (raidPtr->status != rf_rs_reconstructing) {
1373 progressInfo.remaining = 0;
1374 progressInfo.completed = 100;
1375 progressInfo.total = 100;
1376 } else {
1377 progressInfo.total =
1378 raidPtr->reconControl->numRUsTotal;
1379 progressInfo.completed =
1380 raidPtr->reconControl->numRUsComplete;
1381 progressInfo.remaining = progressInfo.total -
1382 progressInfo.completed;
1383 }
1384 retcode = copyout(&progressInfo, *progressInfoPtr,
1385 sizeof(RF_ProgressInfo_t));
1386 return (retcode);
1387
1388 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1389 if (raidPtr->Layout.map->faultsTolerated == 0) {
1390 /* This makes no sense on a RAID 0, so tell the
1391 user it's done. */
1392 *(int *) data = 100;
1393 return(0);
1394 }
1395 if (raidPtr->parity_rewrite_in_progress == 1) {
1396 *(int *) data = 100 *
1397 raidPtr->parity_rewrite_stripes_done /
1398 raidPtr->Layout.numStripe;
1399 } else {
1400 *(int *) data = 100;
1401 }
1402 return (0);
1403
1404 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1405 progressInfoPtr = (RF_ProgressInfo_t **) data;
1406 if (raidPtr->parity_rewrite_in_progress == 1) {
1407 progressInfo.total = raidPtr->Layout.numStripe;
1408 progressInfo.completed =
1409 raidPtr->parity_rewrite_stripes_done;
1410 progressInfo.remaining = progressInfo.total -
1411 progressInfo.completed;
1412 } else {
1413 progressInfo.remaining = 0;
1414 progressInfo.completed = 100;
1415 progressInfo.total = 100;
1416 }
1417 retcode = copyout(&progressInfo, *progressInfoPtr,
1418 sizeof(RF_ProgressInfo_t));
1419 return (retcode);
1420
1421 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1422 if (raidPtr->Layout.map->faultsTolerated == 0) {
1423 /* This makes no sense on a RAID 0 */
1424 *(int *) data = 100;
1425 return(0);
1426 }
1427 if (raidPtr->copyback_in_progress == 1) {
1428 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1429 raidPtr->Layout.numStripe;
1430 } else {
1431 *(int *) data = 100;
1432 }
1433 return (0);
1434
1435 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1436 progressInfoPtr = (RF_ProgressInfo_t **) data;
1437 if (raidPtr->copyback_in_progress == 1) {
1438 progressInfo.total = raidPtr->Layout.numStripe;
1439 progressInfo.completed =
1440 raidPtr->copyback_stripes_done;
1441 progressInfo.remaining = progressInfo.total -
1442 progressInfo.completed;
1443 } else {
1444 progressInfo.remaining = 0;
1445 progressInfo.completed = 100;
1446 progressInfo.total = 100;
1447 }
1448 retcode = copyout(&progressInfo, *progressInfoPtr,
1449 sizeof(RF_ProgressInfo_t));
1450 return (retcode);
1451
1452 /* the sparetable daemon calls this to wait for the kernel to
1453 * need a spare table. this ioctl does not return until a
1454 * spare table is needed. XXX -- calling mpsleep here in the
1455 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1456 * -- I should either compute the spare table in the kernel,
1457 * or have a different -- XXX XXX -- interface (a different
1458 * character device) for delivering the table -- XXX */
1459 #if 0
1460 case RAIDFRAME_SPARET_WAIT:
1461 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1462 while (!rf_sparet_wait_queue)
1463 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1464 waitreq = rf_sparet_wait_queue;
1465 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1466 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1467
1468 /* structure assignment */
1469 *((RF_SparetWait_t *) data) = *waitreq;
1470
1471 RF_Free(waitreq, sizeof(*waitreq));
1472 return (0);
1473
1474 /* wakes up a process waiting on SPARET_WAIT and puts an error
1475 * code in it that will cause the dameon to exit */
1476 case RAIDFRAME_ABORT_SPARET_WAIT:
1477 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1478 waitreq->fcol = -1;
1479 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1480 waitreq->next = rf_sparet_wait_queue;
1481 rf_sparet_wait_queue = waitreq;
1482 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1483 wakeup(&rf_sparet_wait_queue);
1484 return (0);
1485
1486 /* used by the spare table daemon to deliver a spare table
1487 * into the kernel */
1488 case RAIDFRAME_SEND_SPARET:
1489
1490 /* install the spare table */
1491 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1492
1493 /* respond to the requestor. the return status of the spare
1494 * table installation is passed in the "fcol" field */
1495 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1496 waitreq->fcol = retcode;
1497 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1498 waitreq->next = rf_sparet_resp_queue;
1499 rf_sparet_resp_queue = waitreq;
1500 wakeup(&rf_sparet_resp_queue);
1501 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1502
1503 return (retcode);
1504 #endif
1505
1506 default:
1507 break; /* fall through to the os-specific code below */
1508
1509 }
1510
1511 if (!raidPtr->valid)
1512 return (EINVAL);
1513
1514 /*
1515 * Add support for "regular" device ioctls here.
1516 */
1517
1518 switch (cmd) {
1519 case DIOCGDINFO:
1520 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1521 break;
1522 #ifdef __HAVE_OLD_DISKLABEL
1523 case ODIOCGDINFO:
1524 newlabel = *(rs->sc_dkdev.dk_label);
1525 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1526 return ENOTTY;
1527 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1528 break;
1529 #endif
1530
1531 case DIOCGPART:
1532 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1533 ((struct partinfo *) data)->part =
1534 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1535 break;
1536
1537 case DIOCWDINFO:
1538 case DIOCSDINFO:
1539 #ifdef __HAVE_OLD_DISKLABEL
1540 case ODIOCWDINFO:
1541 case ODIOCSDINFO:
1542 #endif
1543 {
1544 struct disklabel *lp;
1545 #ifdef __HAVE_OLD_DISKLABEL
1546 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1547 memset(&newlabel, 0, sizeof newlabel);
1548 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1549 lp = &newlabel;
1550 } else
1551 #endif
1552 lp = (struct disklabel *)data;
1553
1554 if ((error = raidlock(rs)) != 0)
1555 return (error);
1556
1557 rs->sc_flags |= RAIDF_LABELLING;
1558
1559 error = setdisklabel(rs->sc_dkdev.dk_label,
1560 lp, 0, rs->sc_dkdev.dk_cpulabel);
1561 if (error == 0) {
1562 if (cmd == DIOCWDINFO
1563 #ifdef __HAVE_OLD_DISKLABEL
1564 || cmd == ODIOCWDINFO
1565 #endif
1566 )
1567 error = writedisklabel(RAIDLABELDEV(dev),
1568 raidstrategy, rs->sc_dkdev.dk_label,
1569 rs->sc_dkdev.dk_cpulabel);
1570 }
1571 rs->sc_flags &= ~RAIDF_LABELLING;
1572
1573 raidunlock(rs);
1574
1575 if (error)
1576 return (error);
1577 break;
1578 }
1579
1580 case DIOCWLABEL:
1581 if (*(int *) data != 0)
1582 rs->sc_flags |= RAIDF_WLABEL;
1583 else
1584 rs->sc_flags &= ~RAIDF_WLABEL;
1585 break;
1586
1587 case DIOCGDEFLABEL:
1588 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1589 break;
1590
1591 #ifdef __HAVE_OLD_DISKLABEL
1592 case ODIOCGDEFLABEL:
1593 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1594 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1595 return ENOTTY;
1596 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1597 break;
1598 #endif
1599
1600 default:
1601 retcode = ENOTTY;
1602 }
1603 return (retcode);
1604
1605 }
1606
1607
1608 /* raidinit -- complete the rest of the initialization for the
1609 RAIDframe device. */
1610
1611
1612 static void
1613 raidinit(raidPtr)
1614 RF_Raid_t *raidPtr;
1615 {
1616 struct raid_softc *rs;
1617 int unit;
1618
1619 unit = raidPtr->raidid;
1620
1621 rs = &raid_softc[unit];
1622
1623 /* XXX should check return code first... */
1624 rs->sc_flags |= RAIDF_INITED;
1625
1626 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1627
1628 rs->sc_dkdev.dk_name = rs->sc_xname;
1629
1630 /* disk_attach actually creates space for the CPU disklabel, among
1631 * other things, so it's critical to call this *BEFORE* we try putzing
1632 * with disklabels. */
1633
1634 disk_attach(&rs->sc_dkdev);
1635
1636 /* XXX There may be a weird interaction here between this, and
1637 * protectedSectors, as used in RAIDframe. */
1638
1639 rs->sc_size = raidPtr->totalSectors;
1640
1641 }
1642 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1643 /* wake up the daemon & tell it to get us a spare table
1644 * XXX
1645 * the entries in the queues should be tagged with the raidPtr
1646 * so that in the extremely rare case that two recons happen at once,
1647 * we know for which device were requesting a spare table
1648 * XXX
1649 *
1650 * XXX This code is not currently used. GO
1651 */
1652 int
1653 rf_GetSpareTableFromDaemon(req)
1654 RF_SparetWait_t *req;
1655 {
1656 int retcode;
1657
1658 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1659 req->next = rf_sparet_wait_queue;
1660 rf_sparet_wait_queue = req;
1661 wakeup(&rf_sparet_wait_queue);
1662
1663 /* mpsleep unlocks the mutex */
1664 while (!rf_sparet_resp_queue) {
1665 tsleep(&rf_sparet_resp_queue, PRIBIO,
1666 "raidframe getsparetable", 0);
1667 }
1668 req = rf_sparet_resp_queue;
1669 rf_sparet_resp_queue = req->next;
1670 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1671
1672 retcode = req->fcol;
1673 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1674 * alloc'd */
1675 return (retcode);
1676 }
1677 #endif
1678
1679 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1680 * bp & passes it down.
1681 * any calls originating in the kernel must use non-blocking I/O
1682 * do some extra sanity checking to return "appropriate" error values for
1683 * certain conditions (to make some standard utilities work)
1684 *
1685 * Formerly known as: rf_DoAccessKernel
1686 */
1687 void
1688 raidstart(raidPtr)
1689 RF_Raid_t *raidPtr;
1690 {
1691 RF_SectorCount_t num_blocks, pb, sum;
1692 RF_RaidAddr_t raid_addr;
1693 struct partition *pp;
1694 daddr_t blocknum;
1695 int unit;
1696 struct raid_softc *rs;
1697 int do_async;
1698 struct buf *bp;
1699
1700 unit = raidPtr->raidid;
1701 rs = &raid_softc[unit];
1702
1703 /* quick check to see if anything has died recently */
1704 RF_LOCK_MUTEX(raidPtr->mutex);
1705 if (raidPtr->numNewFailures > 0) {
1706 RF_UNLOCK_MUTEX(raidPtr->mutex);
1707 rf_update_component_labels(raidPtr,
1708 RF_NORMAL_COMPONENT_UPDATE);
1709 RF_LOCK_MUTEX(raidPtr->mutex);
1710 raidPtr->numNewFailures--;
1711 }
1712
1713 /* Check to see if we're at the limit... */
1714 while (raidPtr->openings > 0) {
1715 RF_UNLOCK_MUTEX(raidPtr->mutex);
1716
1717 /* get the next item, if any, from the queue */
1718 if ((bp = BUFQ_GET(&rs->buf_queue)) == NULL) {
1719 /* nothing more to do */
1720 return;
1721 }
1722
1723 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1724 * partition.. Need to make it absolute to the underlying
1725 * device.. */
1726
1727 blocknum = bp->b_blkno;
1728 if (DISKPART(bp->b_dev) != RAW_PART) {
1729 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1730 blocknum += pp->p_offset;
1731 }
1732
1733 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1734 (int) blocknum));
1735
1736 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1737 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1738
1739 /* *THIS* is where we adjust what block we're going to...
1740 * but DO NOT TOUCH bp->b_blkno!!! */
1741 raid_addr = blocknum;
1742
1743 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1744 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1745 sum = raid_addr + num_blocks + pb;
1746 if (1 || rf_debugKernelAccess) {
1747 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1748 (int) raid_addr, (int) sum, (int) num_blocks,
1749 (int) pb, (int) bp->b_resid));
1750 }
1751 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1752 || (sum < num_blocks) || (sum < pb)) {
1753 bp->b_error = ENOSPC;
1754 bp->b_flags |= B_ERROR;
1755 bp->b_resid = bp->b_bcount;
1756 biodone(bp);
1757 RF_LOCK_MUTEX(raidPtr->mutex);
1758 continue;
1759 }
1760 /*
1761 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1762 */
1763
1764 if (bp->b_bcount & raidPtr->sectorMask) {
1765 bp->b_error = EINVAL;
1766 bp->b_flags |= B_ERROR;
1767 bp->b_resid = bp->b_bcount;
1768 biodone(bp);
1769 RF_LOCK_MUTEX(raidPtr->mutex);
1770 continue;
1771
1772 }
1773 db1_printf(("Calling DoAccess..\n"));
1774
1775
1776 RF_LOCK_MUTEX(raidPtr->mutex);
1777 raidPtr->openings--;
1778 RF_UNLOCK_MUTEX(raidPtr->mutex);
1779
1780 /*
1781 * Everything is async.
1782 */
1783 do_async = 1;
1784
1785 disk_busy(&rs->sc_dkdev);
1786
1787 /* XXX we're still at splbio() here... do we *really*
1788 need to be? */
1789
1790 /* don't ever condition on bp->b_flags & B_WRITE.
1791 * always condition on B_READ instead */
1792
1793 bp->b_error = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1794 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1795 do_async, raid_addr, num_blocks,
1796 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1797
1798 if (bp->b_error) {
1799 bp->b_flags |= B_ERROR;
1800 }
1801
1802 RF_LOCK_MUTEX(raidPtr->mutex);
1803 }
1804 RF_UNLOCK_MUTEX(raidPtr->mutex);
1805 }
1806
1807
1808
1809
1810 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1811
1812 int
1813 rf_DispatchKernelIO(queue, req)
1814 RF_DiskQueue_t *queue;
1815 RF_DiskQueueData_t *req;
1816 {
1817 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1818 struct buf *bp;
1819 struct raidbuf *raidbp = NULL;
1820
1821 req->queue = queue;
1822
1823 #if DIAGNOSTIC
1824 if (queue->raidPtr->raidid >= numraid) {
1825 printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
1826 numraid);
1827 panic("Invalid Unit number in rf_DispatchKernelIO");
1828 }
1829 #endif
1830
1831 bp = req->bp;
1832 #if 1
1833 /* XXX when there is a physical disk failure, someone is passing us a
1834 * buffer that contains old stuff!! Attempt to deal with this problem
1835 * without taking a performance hit... (not sure where the real bug
1836 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1837
1838 if (bp->b_flags & B_ERROR) {
1839 bp->b_flags &= ~B_ERROR;
1840 }
1841 if (bp->b_error != 0) {
1842 bp->b_error = 0;
1843 }
1844 #endif
1845 raidbp = pool_get(&raidframe_cbufpool, PR_NOWAIT);
1846 if (raidbp == NULL) {
1847 bp->b_flags |= B_ERROR;
1848 bp->b_error = ENOMEM;
1849 return (ENOMEM);
1850 }
1851 BUF_INIT(&raidbp->rf_buf);
1852
1853 /*
1854 * context for raidiodone
1855 */
1856 raidbp->rf_obp = bp;
1857 raidbp->req = req;
1858
1859 switch (req->type) {
1860 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1861 /* XXX need to do something extra here.. */
1862 /* I'm leaving this in, as I've never actually seen it used,
1863 * and I'd like folks to report it... GO */
1864 printf(("WAKEUP CALLED\n"));
1865 queue->numOutstanding++;
1866
1867 /* XXX need to glue the original buffer into this?? */
1868
1869 KernelWakeupFunc(&raidbp->rf_buf);
1870 break;
1871
1872 case RF_IO_TYPE_READ:
1873 case RF_IO_TYPE_WRITE:
1874
1875 if (req->tracerec) {
1876 RF_ETIMER_START(req->tracerec->timer);
1877 }
1878 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1879 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1880 req->sectorOffset, req->numSector,
1881 req->buf, KernelWakeupFunc, (void *) req,
1882 queue->raidPtr->logBytesPerSector, req->b_proc);
1883
1884 if (rf_debugKernelAccess) {
1885 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1886 (long) bp->b_blkno));
1887 }
1888 queue->numOutstanding++;
1889 queue->last_deq_sector = req->sectorOffset;
1890 /* acc wouldn't have been let in if there were any pending
1891 * reqs at any other priority */
1892 queue->curPriority = req->priority;
1893
1894 db1_printf(("Going for %c to unit %d col %d\n",
1895 req->type, queue->raidPtr->raidid,
1896 queue->col));
1897 db1_printf(("sector %d count %d (%d bytes) %d\n",
1898 (int) req->sectorOffset, (int) req->numSector,
1899 (int) (req->numSector <<
1900 queue->raidPtr->logBytesPerSector),
1901 (int) queue->raidPtr->logBytesPerSector));
1902 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1903 raidbp->rf_buf.b_vp->v_numoutput++;
1904 }
1905 VOP_STRATEGY(&raidbp->rf_buf);
1906
1907 break;
1908
1909 default:
1910 panic("bad req->type in rf_DispatchKernelIO");
1911 }
1912 db1_printf(("Exiting from DispatchKernelIO\n"));
1913
1914 return (0);
1915 }
1916 /* this is the callback function associated with a I/O invoked from
1917 kernel code.
1918 */
1919 static void
1920 KernelWakeupFunc(vbp)
1921 struct buf *vbp;
1922 {
1923 RF_DiskQueueData_t *req = NULL;
1924 RF_DiskQueue_t *queue;
1925 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1926 struct buf *bp;
1927 int s;
1928
1929 s = splbio();
1930 db1_printf(("recovering the request queue:\n"));
1931 req = raidbp->req;
1932
1933 bp = raidbp->rf_obp;
1934
1935 queue = (RF_DiskQueue_t *) req->queue;
1936
1937 if (raidbp->rf_buf.b_flags & B_ERROR) {
1938 bp->b_flags |= B_ERROR;
1939 bp->b_error = raidbp->rf_buf.b_error ?
1940 raidbp->rf_buf.b_error : EIO;
1941 }
1942
1943 /* XXX methinks this could be wrong... */
1944 #if 1
1945 bp->b_resid = raidbp->rf_buf.b_resid;
1946 #endif
1947
1948 if (req->tracerec) {
1949 RF_ETIMER_STOP(req->tracerec->timer);
1950 RF_ETIMER_EVAL(req->tracerec->timer);
1951 RF_LOCK_MUTEX(rf_tracing_mutex);
1952 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1953 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1954 req->tracerec->num_phys_ios++;
1955 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1956 }
1957 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1958
1959 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1960 * ballistic, and mark the component as hosed... */
1961
1962 if (bp->b_flags & B_ERROR) {
1963 /* Mark the disk as dead */
1964 /* but only mark it once... */
1965 if (queue->raidPtr->Disks[queue->col].status ==
1966 rf_ds_optimal) {
1967 printf("raid%d: IO Error. Marking %s as failed.\n",
1968 queue->raidPtr->raidid,
1969 queue->raidPtr->Disks[queue->col].devname);
1970 queue->raidPtr->Disks[queue->col].status =
1971 rf_ds_failed;
1972 queue->raidPtr->status = rf_rs_degraded;
1973 queue->raidPtr->numFailures++;
1974 queue->raidPtr->numNewFailures++;
1975 } else { /* Disk is already dead... */
1976 /* printf("Disk already marked as dead!\n"); */
1977 }
1978
1979 }
1980
1981 pool_put(&raidframe_cbufpool, raidbp);
1982
1983 /* Fill in the error value */
1984
1985 req->error = (bp->b_flags & B_ERROR) ? bp->b_error : 0;
1986
1987 simple_lock(&queue->raidPtr->iodone_lock);
1988
1989 /* Drop this one on the "finished" queue... */
1990 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
1991
1992 /* Let the raidio thread know there is work to be done. */
1993 wakeup(&(queue->raidPtr->iodone));
1994
1995 simple_unlock(&queue->raidPtr->iodone_lock);
1996
1997 splx(s);
1998 }
1999
2000
2001
2002 /*
2003 * initialize a buf structure for doing an I/O in the kernel.
2004 */
2005 static void
2006 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
2007 logBytesPerSector, b_proc)
2008 struct buf *bp;
2009 struct vnode *b_vp;
2010 unsigned rw_flag;
2011 dev_t dev;
2012 RF_SectorNum_t startSect;
2013 RF_SectorCount_t numSect;
2014 caddr_t buf;
2015 void (*cbFunc) (struct buf *);
2016 void *cbArg;
2017 int logBytesPerSector;
2018 struct proc *b_proc;
2019 {
2020 /* bp->b_flags = B_PHYS | rw_flag; */
2021 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
2022 bp->b_bcount = numSect << logBytesPerSector;
2023 bp->b_bufsize = bp->b_bcount;
2024 bp->b_error = 0;
2025 bp->b_dev = dev;
2026 bp->b_data = buf;
2027 bp->b_blkno = startSect;
2028 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2029 if (bp->b_bcount == 0) {
2030 panic("bp->b_bcount is zero in InitBP!!");
2031 }
2032 bp->b_proc = b_proc;
2033 bp->b_iodone = cbFunc;
2034 bp->b_vp = b_vp;
2035
2036 }
2037
2038 static void
2039 raidgetdefaultlabel(raidPtr, rs, lp)
2040 RF_Raid_t *raidPtr;
2041 struct raid_softc *rs;
2042 struct disklabel *lp;
2043 {
2044 memset(lp, 0, sizeof(*lp));
2045
2046 /* fabricate a label... */
2047 lp->d_secperunit = raidPtr->totalSectors;
2048 lp->d_secsize = raidPtr->bytesPerSector;
2049 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2050 lp->d_ntracks = 4 * raidPtr->numCol;
2051 lp->d_ncylinders = raidPtr->totalSectors /
2052 (lp->d_nsectors * lp->d_ntracks);
2053 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2054
2055 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2056 lp->d_type = DTYPE_RAID;
2057 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2058 lp->d_rpm = 3600;
2059 lp->d_interleave = 1;
2060 lp->d_flags = 0;
2061
2062 lp->d_partitions[RAW_PART].p_offset = 0;
2063 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2064 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2065 lp->d_npartitions = RAW_PART + 1;
2066
2067 lp->d_magic = DISKMAGIC;
2068 lp->d_magic2 = DISKMAGIC;
2069 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2070
2071 }
2072 /*
2073 * Read the disklabel from the raid device. If one is not present, fake one
2074 * up.
2075 */
2076 static void
2077 raidgetdisklabel(dev)
2078 dev_t dev;
2079 {
2080 int unit = raidunit(dev);
2081 struct raid_softc *rs = &raid_softc[unit];
2082 const char *errstring;
2083 struct disklabel *lp = rs->sc_dkdev.dk_label;
2084 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2085 RF_Raid_t *raidPtr;
2086
2087 db1_printf(("Getting the disklabel...\n"));
2088
2089 memset(clp, 0, sizeof(*clp));
2090
2091 raidPtr = raidPtrs[unit];
2092
2093 raidgetdefaultlabel(raidPtr, rs, lp);
2094
2095 /*
2096 * Call the generic disklabel extraction routine.
2097 */
2098 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2099 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2100 if (errstring)
2101 raidmakedisklabel(rs);
2102 else {
2103 int i;
2104 struct partition *pp;
2105
2106 /*
2107 * Sanity check whether the found disklabel is valid.
2108 *
2109 * This is necessary since total size of the raid device
2110 * may vary when an interleave is changed even though exactly
2111 * same componets are used, and old disklabel may used
2112 * if that is found.
2113 */
2114 if (lp->d_secperunit != rs->sc_size)
2115 printf("raid%d: WARNING: %s: "
2116 "total sector size in disklabel (%d) != "
2117 "the size of raid (%ld)\n", unit, rs->sc_xname,
2118 lp->d_secperunit, (long) rs->sc_size);
2119 for (i = 0; i < lp->d_npartitions; i++) {
2120 pp = &lp->d_partitions[i];
2121 if (pp->p_offset + pp->p_size > rs->sc_size)
2122 printf("raid%d: WARNING: %s: end of partition `%c' "
2123 "exceeds the size of raid (%ld)\n",
2124 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
2125 }
2126 }
2127
2128 }
2129 /*
2130 * Take care of things one might want to take care of in the event
2131 * that a disklabel isn't present.
2132 */
2133 static void
2134 raidmakedisklabel(rs)
2135 struct raid_softc *rs;
2136 {
2137 struct disklabel *lp = rs->sc_dkdev.dk_label;
2138 db1_printf(("Making a label..\n"));
2139
2140 /*
2141 * For historical reasons, if there's no disklabel present
2142 * the raw partition must be marked FS_BSDFFS.
2143 */
2144
2145 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2146
2147 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2148
2149 lp->d_checksum = dkcksum(lp);
2150 }
2151 /*
2152 * Lookup the provided name in the filesystem. If the file exists,
2153 * is a valid block device, and isn't being used by anyone else,
2154 * set *vpp to the file's vnode.
2155 * You'll find the original of this in ccd.c
2156 */
2157 int
2158 raidlookup(path, p, vpp)
2159 char *path;
2160 struct proc *p;
2161 struct vnode **vpp; /* result */
2162 {
2163 struct nameidata nd;
2164 struct vnode *vp;
2165 struct vattr va;
2166 int error;
2167
2168 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2169 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2170 return (error);
2171 }
2172 vp = nd.ni_vp;
2173 if (vp->v_usecount > 1) {
2174 VOP_UNLOCK(vp, 0);
2175 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2176 return (EBUSY);
2177 }
2178 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2179 VOP_UNLOCK(vp, 0);
2180 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2181 return (error);
2182 }
2183 /* XXX: eventually we should handle VREG, too. */
2184 if (va.va_type != VBLK) {
2185 VOP_UNLOCK(vp, 0);
2186 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2187 return (ENOTBLK);
2188 }
2189 VOP_UNLOCK(vp, 0);
2190 *vpp = vp;
2191 return (0);
2192 }
2193 /*
2194 * Wait interruptibly for an exclusive lock.
2195 *
2196 * XXX
2197 * Several drivers do this; it should be abstracted and made MP-safe.
2198 * (Hmm... where have we seen this warning before :-> GO )
2199 */
2200 static int
2201 raidlock(rs)
2202 struct raid_softc *rs;
2203 {
2204 int error;
2205
2206 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2207 rs->sc_flags |= RAIDF_WANTED;
2208 if ((error =
2209 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2210 return (error);
2211 }
2212 rs->sc_flags |= RAIDF_LOCKED;
2213 return (0);
2214 }
2215 /*
2216 * Unlock and wake up any waiters.
2217 */
2218 static void
2219 raidunlock(rs)
2220 struct raid_softc *rs;
2221 {
2222
2223 rs->sc_flags &= ~RAIDF_LOCKED;
2224 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2225 rs->sc_flags &= ~RAIDF_WANTED;
2226 wakeup(rs);
2227 }
2228 }
2229
2230
2231 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2232 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2233
2234 int
2235 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2236 {
2237 RF_ComponentLabel_t clabel;
2238 raidread_component_label(dev, b_vp, &clabel);
2239 clabel.mod_counter = mod_counter;
2240 clabel.clean = RF_RAID_CLEAN;
2241 raidwrite_component_label(dev, b_vp, &clabel);
2242 return(0);
2243 }
2244
2245
2246 int
2247 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2248 {
2249 RF_ComponentLabel_t clabel;
2250 raidread_component_label(dev, b_vp, &clabel);
2251 clabel.mod_counter = mod_counter;
2252 clabel.clean = RF_RAID_DIRTY;
2253 raidwrite_component_label(dev, b_vp, &clabel);
2254 return(0);
2255 }
2256
2257 /* ARGSUSED */
2258 int
2259 raidread_component_label(dev, b_vp, clabel)
2260 dev_t dev;
2261 struct vnode *b_vp;
2262 RF_ComponentLabel_t *clabel;
2263 {
2264 struct buf *bp;
2265 const struct bdevsw *bdev;
2266 int error;
2267
2268 /* XXX should probably ensure that we don't try to do this if
2269 someone has changed rf_protected_sectors. */
2270
2271 if (b_vp == NULL) {
2272 /* For whatever reason, this component is not valid.
2273 Don't try to read a component label from it. */
2274 return(EINVAL);
2275 }
2276
2277 /* get a block of the appropriate size... */
2278 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2279 bp->b_dev = dev;
2280
2281 /* get our ducks in a row for the read */
2282 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2283 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2284 bp->b_flags |= B_READ;
2285 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2286
2287 bdev = bdevsw_lookup(bp->b_dev);
2288 if (bdev == NULL)
2289 return (ENXIO);
2290 (*bdev->d_strategy)(bp);
2291
2292 error = biowait(bp);
2293
2294 if (!error) {
2295 memcpy(clabel, bp->b_data,
2296 sizeof(RF_ComponentLabel_t));
2297 }
2298
2299 brelse(bp);
2300 return(error);
2301 }
2302 /* ARGSUSED */
2303 int
2304 raidwrite_component_label(dev, b_vp, clabel)
2305 dev_t dev;
2306 struct vnode *b_vp;
2307 RF_ComponentLabel_t *clabel;
2308 {
2309 struct buf *bp;
2310 const struct bdevsw *bdev;
2311 int error;
2312
2313 /* get a block of the appropriate size... */
2314 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2315 bp->b_dev = dev;
2316
2317 /* get our ducks in a row for the write */
2318 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2319 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2320 bp->b_flags |= B_WRITE;
2321 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2322
2323 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2324
2325 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2326
2327 bdev = bdevsw_lookup(bp->b_dev);
2328 if (bdev == NULL)
2329 return (ENXIO);
2330 (*bdev->d_strategy)(bp);
2331 error = biowait(bp);
2332 brelse(bp);
2333 if (error) {
2334 #if 1
2335 printf("Failed to write RAID component info!\n");
2336 #endif
2337 }
2338
2339 return(error);
2340 }
2341
2342 void
2343 rf_markalldirty(raidPtr)
2344 RF_Raid_t *raidPtr;
2345 {
2346 RF_ComponentLabel_t clabel;
2347 int sparecol;
2348 int c;
2349 int j;
2350 int scol = -1;
2351
2352 raidPtr->mod_counter++;
2353 for (c = 0; c < raidPtr->numCol; c++) {
2354 /* we don't want to touch (at all) a disk that has
2355 failed */
2356 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2357 raidread_component_label(
2358 raidPtr->Disks[c].dev,
2359 raidPtr->raid_cinfo[c].ci_vp,
2360 &clabel);
2361 if (clabel.status == rf_ds_spared) {
2362 /* XXX do something special...
2363 but whatever you do, don't
2364 try to access it!! */
2365 } else {
2366 raidmarkdirty(
2367 raidPtr->Disks[c].dev,
2368 raidPtr->raid_cinfo[c].ci_vp,
2369 raidPtr->mod_counter);
2370 }
2371 }
2372 }
2373
2374 for( c = 0; c < raidPtr->numSpare ; c++) {
2375 sparecol = raidPtr->numCol + c;
2376 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2377 /*
2378
2379 we claim this disk is "optimal" if it's
2380 rf_ds_used_spare, as that means it should be
2381 directly substitutable for the disk it replaced.
2382 We note that too...
2383
2384 */
2385
2386 for(j=0;j<raidPtr->numCol;j++) {
2387 if (raidPtr->Disks[j].spareCol == sparecol) {
2388 scol = j;
2389 break;
2390 }
2391 }
2392
2393 raidread_component_label(
2394 raidPtr->Disks[sparecol].dev,
2395 raidPtr->raid_cinfo[sparecol].ci_vp,
2396 &clabel);
2397 /* make sure status is noted */
2398
2399 raid_init_component_label(raidPtr, &clabel);
2400
2401 clabel.row = 0;
2402 clabel.column = scol;
2403 /* Note: we *don't* change status from rf_ds_used_spare
2404 to rf_ds_optimal */
2405 /* clabel.status = rf_ds_optimal; */
2406
2407 raidmarkdirty(raidPtr->Disks[sparecol].dev,
2408 raidPtr->raid_cinfo[sparecol].ci_vp,
2409 raidPtr->mod_counter);
2410 }
2411 }
2412 }
2413
2414
2415 void
2416 rf_update_component_labels(raidPtr, final)
2417 RF_Raid_t *raidPtr;
2418 int final;
2419 {
2420 RF_ComponentLabel_t clabel;
2421 int sparecol;
2422 int c;
2423 int j;
2424 int scol;
2425
2426 scol = -1;
2427
2428 /* XXX should do extra checks to make sure things really are clean,
2429 rather than blindly setting the clean bit... */
2430
2431 raidPtr->mod_counter++;
2432
2433 for (c = 0; c < raidPtr->numCol; c++) {
2434 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2435 raidread_component_label(
2436 raidPtr->Disks[c].dev,
2437 raidPtr->raid_cinfo[c].ci_vp,
2438 &clabel);
2439 /* make sure status is noted */
2440 clabel.status = rf_ds_optimal;
2441 /* bump the counter */
2442 clabel.mod_counter = raidPtr->mod_counter;
2443
2444 raidwrite_component_label(
2445 raidPtr->Disks[c].dev,
2446 raidPtr->raid_cinfo[c].ci_vp,
2447 &clabel);
2448 if (final == RF_FINAL_COMPONENT_UPDATE) {
2449 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2450 raidmarkclean(
2451 raidPtr->Disks[c].dev,
2452 raidPtr->raid_cinfo[c].ci_vp,
2453 raidPtr->mod_counter);
2454 }
2455 }
2456 }
2457 /* else we don't touch it.. */
2458 }
2459
2460 for( c = 0; c < raidPtr->numSpare ; c++) {
2461 sparecol = raidPtr->numCol + c;
2462 /* Need to ensure that the reconstruct actually completed! */
2463 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2464 /*
2465
2466 we claim this disk is "optimal" if it's
2467 rf_ds_used_spare, as that means it should be
2468 directly substitutable for the disk it replaced.
2469 We note that too...
2470
2471 */
2472
2473 for(j=0;j<raidPtr->numCol;j++) {
2474 if (raidPtr->Disks[j].spareCol == sparecol) {
2475 scol = j;
2476 break;
2477 }
2478 }
2479
2480 /* XXX shouldn't *really* need this... */
2481 raidread_component_label(
2482 raidPtr->Disks[sparecol].dev,
2483 raidPtr->raid_cinfo[sparecol].ci_vp,
2484 &clabel);
2485 /* make sure status is noted */
2486
2487 raid_init_component_label(raidPtr, &clabel);
2488
2489 clabel.mod_counter = raidPtr->mod_counter;
2490 clabel.column = scol;
2491 clabel.status = rf_ds_optimal;
2492
2493 raidwrite_component_label(
2494 raidPtr->Disks[sparecol].dev,
2495 raidPtr->raid_cinfo[sparecol].ci_vp,
2496 &clabel);
2497 if (final == RF_FINAL_COMPONENT_UPDATE) {
2498 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2499 raidmarkclean( raidPtr->Disks[sparecol].dev,
2500 raidPtr->raid_cinfo[sparecol].ci_vp,
2501 raidPtr->mod_counter);
2502 }
2503 }
2504 }
2505 }
2506 }
2507
2508 void
2509 rf_close_component(raidPtr, vp, auto_configured)
2510 RF_Raid_t *raidPtr;
2511 struct vnode *vp;
2512 int auto_configured;
2513 {
2514 struct proc *p;
2515
2516 p = raidPtr->engine_thread;
2517
2518 if (vp != NULL) {
2519 if (auto_configured == 1) {
2520 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2521 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2522 vput(vp);
2523
2524 } else {
2525 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2526 }
2527 }
2528 }
2529
2530
2531 void
2532 rf_UnconfigureVnodes(raidPtr)
2533 RF_Raid_t *raidPtr;
2534 {
2535 int r,c;
2536 struct vnode *vp;
2537 int acd;
2538
2539
2540 /* We take this opportunity to close the vnodes like we should.. */
2541
2542 for (c = 0; c < raidPtr->numCol; c++) {
2543 vp = raidPtr->raid_cinfo[c].ci_vp;
2544 acd = raidPtr->Disks[c].auto_configured;
2545 rf_close_component(raidPtr, vp, acd);
2546 raidPtr->raid_cinfo[c].ci_vp = NULL;
2547 raidPtr->Disks[c].auto_configured = 0;
2548 }
2549
2550 for (r = 0; r < raidPtr->numSpare; r++) {
2551 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2552 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2553 rf_close_component(raidPtr, vp, acd);
2554 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2555 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2556 }
2557 }
2558
2559
2560 void
2561 rf_ReconThread(req)
2562 struct rf_recon_req *req;
2563 {
2564 int s;
2565 RF_Raid_t *raidPtr;
2566
2567 s = splbio();
2568 raidPtr = (RF_Raid_t *) req->raidPtr;
2569 raidPtr->recon_in_progress = 1;
2570
2571 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2572 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2573
2574 /* XXX get rid of this! we don't need it at all.. */
2575 RF_Free(req, sizeof(*req));
2576
2577 raidPtr->recon_in_progress = 0;
2578 splx(s);
2579
2580 /* That's all... */
2581 kthread_exit(0); /* does not return */
2582 }
2583
2584 void
2585 rf_RewriteParityThread(raidPtr)
2586 RF_Raid_t *raidPtr;
2587 {
2588 int retcode;
2589 int s;
2590
2591 raidPtr->parity_rewrite_in_progress = 1;
2592 s = splbio();
2593 retcode = rf_RewriteParity(raidPtr);
2594 splx(s);
2595 if (retcode) {
2596 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2597 } else {
2598 /* set the clean bit! If we shutdown correctly,
2599 the clean bit on each component label will get
2600 set */
2601 raidPtr->parity_good = RF_RAID_CLEAN;
2602 }
2603 raidPtr->parity_rewrite_in_progress = 0;
2604
2605 /* Anyone waiting for us to stop? If so, inform them... */
2606 if (raidPtr->waitShutdown) {
2607 wakeup(&raidPtr->parity_rewrite_in_progress);
2608 }
2609
2610 /* That's all... */
2611 kthread_exit(0); /* does not return */
2612 }
2613
2614
2615 void
2616 rf_CopybackThread(raidPtr)
2617 RF_Raid_t *raidPtr;
2618 {
2619 int s;
2620
2621 raidPtr->copyback_in_progress = 1;
2622 s = splbio();
2623 rf_CopybackReconstructedData(raidPtr);
2624 splx(s);
2625 raidPtr->copyback_in_progress = 0;
2626
2627 /* That's all... */
2628 kthread_exit(0); /* does not return */
2629 }
2630
2631
2632 void
2633 rf_ReconstructInPlaceThread(req)
2634 struct rf_recon_req *req;
2635 {
2636 int s;
2637 RF_Raid_t *raidPtr;
2638
2639 s = splbio();
2640 raidPtr = req->raidPtr;
2641 raidPtr->recon_in_progress = 1;
2642 rf_ReconstructInPlace(raidPtr, req->col);
2643 RF_Free(req, sizeof(*req));
2644 raidPtr->recon_in_progress = 0;
2645 splx(s);
2646
2647 /* That's all... */
2648 kthread_exit(0); /* does not return */
2649 }
2650
2651 RF_AutoConfig_t *
2652 rf_find_raid_components()
2653 {
2654 struct vnode *vp;
2655 struct disklabel label;
2656 struct device *dv;
2657 dev_t dev;
2658 int bmajor;
2659 int error;
2660 int i;
2661 int good_one;
2662 RF_ComponentLabel_t *clabel;
2663 RF_AutoConfig_t *ac_list;
2664 RF_AutoConfig_t *ac;
2665
2666
2667 /* initialize the AutoConfig list */
2668 ac_list = NULL;
2669
2670 /* we begin by trolling through *all* the devices on the system */
2671
2672 for (dv = alldevs.tqh_first; dv != NULL;
2673 dv = dv->dv_list.tqe_next) {
2674
2675 /* we are only interested in disks... */
2676 if (dv->dv_class != DV_DISK)
2677 continue;
2678
2679 /* we don't care about floppies... */
2680 if (!strcmp(dv->dv_cfdata->cf_name,"fd")) {
2681 continue;
2682 }
2683
2684 /* we don't care about CD's... */
2685 if (!strcmp(dv->dv_cfdata->cf_name,"cd")) {
2686 continue;
2687 }
2688
2689 /* hdfd is the Atari/Hades floppy driver */
2690 if (!strcmp(dv->dv_cfdata->cf_name,"hdfd")) {
2691 continue;
2692 }
2693 /* fdisa is the Atari/Milan floppy driver */
2694 if (!strcmp(dv->dv_cfdata->cf_name,"fdisa")) {
2695 continue;
2696 }
2697
2698 /* need to find the device_name_to_block_device_major stuff */
2699 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
2700
2701 /* get a vnode for the raw partition of this disk */
2702
2703 dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART);
2704 if (bdevvp(dev, &vp))
2705 panic("RAID can't alloc vnode");
2706
2707 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2708
2709 if (error) {
2710 /* "Who cares." Continue looking
2711 for something that exists*/
2712 vput(vp);
2713 continue;
2714 }
2715
2716 /* Ok, the disk exists. Go get the disklabel. */
2717 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED, 0);
2718 if (error) {
2719 /*
2720 * XXX can't happen - open() would
2721 * have errored out (or faked up one)
2722 */
2723 printf("can't get label for dev %s%c (%d)!?!?\n",
2724 dv->dv_xname, 'a' + RAW_PART, error);
2725 }
2726
2727 /* don't need this any more. We'll allocate it again
2728 a little later if we really do... */
2729 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2730 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2731 vput(vp);
2732
2733 for (i=0; i < label.d_npartitions; i++) {
2734 /* We only support partitions marked as RAID */
2735 if (label.d_partitions[i].p_fstype != FS_RAID)
2736 continue;
2737
2738 dev = MAKEDISKDEV(bmajor, dv->dv_unit, i);
2739 if (bdevvp(dev, &vp))
2740 panic("RAID can't alloc vnode");
2741
2742 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2743 if (error) {
2744 /* Whatever... */
2745 vput(vp);
2746 continue;
2747 }
2748
2749 good_one = 0;
2750
2751 clabel = (RF_ComponentLabel_t *)
2752 malloc(sizeof(RF_ComponentLabel_t),
2753 M_RAIDFRAME, M_NOWAIT);
2754 if (clabel == NULL) {
2755 /* XXX CLEANUP HERE */
2756 printf("RAID auto config: out of memory!\n");
2757 return(NULL); /* XXX probably should panic? */
2758 }
2759
2760 if (!raidread_component_label(dev, vp, clabel)) {
2761 /* Got the label. Does it look reasonable? */
2762 if (rf_reasonable_label(clabel) &&
2763 (clabel->partitionSize <=
2764 label.d_partitions[i].p_size)) {
2765 #if DEBUG
2766 printf("Component on: %s%c: %d\n",
2767 dv->dv_xname, 'a'+i,
2768 label.d_partitions[i].p_size);
2769 rf_print_component_label(clabel);
2770 #endif
2771 /* if it's reasonable, add it,
2772 else ignore it. */
2773 ac = (RF_AutoConfig_t *)
2774 malloc(sizeof(RF_AutoConfig_t),
2775 M_RAIDFRAME,
2776 M_NOWAIT);
2777 if (ac == NULL) {
2778 /* XXX should panic?? */
2779 return(NULL);
2780 }
2781
2782 sprintf(ac->devname, "%s%c",
2783 dv->dv_xname, 'a'+i);
2784 ac->dev = dev;
2785 ac->vp = vp;
2786 ac->clabel = clabel;
2787 ac->next = ac_list;
2788 ac_list = ac;
2789 good_one = 1;
2790 }
2791 }
2792 if (!good_one) {
2793 /* cleanup */
2794 free(clabel, M_RAIDFRAME);
2795 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2796 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2797 vput(vp);
2798 }
2799 }
2800 }
2801 return(ac_list);
2802 }
2803
2804 static int
2805 rf_reasonable_label(clabel)
2806 RF_ComponentLabel_t *clabel;
2807 {
2808
2809 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2810 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2811 ((clabel->clean == RF_RAID_CLEAN) ||
2812 (clabel->clean == RF_RAID_DIRTY)) &&
2813 clabel->row >=0 &&
2814 clabel->column >= 0 &&
2815 clabel->num_rows > 0 &&
2816 clabel->num_columns > 0 &&
2817 clabel->row < clabel->num_rows &&
2818 clabel->column < clabel->num_columns &&
2819 clabel->blockSize > 0 &&
2820 clabel->numBlocks > 0) {
2821 /* label looks reasonable enough... */
2822 return(1);
2823 }
2824 return(0);
2825 }
2826
2827
2828 #if DEBUG
2829 void
2830 rf_print_component_label(clabel)
2831 RF_ComponentLabel_t *clabel;
2832 {
2833 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2834 clabel->row, clabel->column,
2835 clabel->num_rows, clabel->num_columns);
2836 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2837 clabel->version, clabel->serial_number,
2838 clabel->mod_counter);
2839 printf(" Clean: %s Status: %d\n",
2840 clabel->clean ? "Yes" : "No", clabel->status );
2841 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2842 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2843 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2844 (char) clabel->parityConfig, clabel->blockSize,
2845 clabel->numBlocks);
2846 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2847 printf(" Contains root partition: %s\n",
2848 clabel->root_partition ? "Yes" : "No" );
2849 printf(" Last configured as: raid%d\n", clabel->last_unit );
2850 #if 0
2851 printf(" Config order: %d\n", clabel->config_order);
2852 #endif
2853
2854 }
2855 #endif
2856
2857 RF_ConfigSet_t *
2858 rf_create_auto_sets(ac_list)
2859 RF_AutoConfig_t *ac_list;
2860 {
2861 RF_AutoConfig_t *ac;
2862 RF_ConfigSet_t *config_sets;
2863 RF_ConfigSet_t *cset;
2864 RF_AutoConfig_t *ac_next;
2865
2866
2867 config_sets = NULL;
2868
2869 /* Go through the AutoConfig list, and figure out which components
2870 belong to what sets. */
2871 ac = ac_list;
2872 while(ac!=NULL) {
2873 /* we're going to putz with ac->next, so save it here
2874 for use at the end of the loop */
2875 ac_next = ac->next;
2876
2877 if (config_sets == NULL) {
2878 /* will need at least this one... */
2879 config_sets = (RF_ConfigSet_t *)
2880 malloc(sizeof(RF_ConfigSet_t),
2881 M_RAIDFRAME, M_NOWAIT);
2882 if (config_sets == NULL) {
2883 panic("rf_create_auto_sets: No memory!");
2884 }
2885 /* this one is easy :) */
2886 config_sets->ac = ac;
2887 config_sets->next = NULL;
2888 config_sets->rootable = 0;
2889 ac->next = NULL;
2890 } else {
2891 /* which set does this component fit into? */
2892 cset = config_sets;
2893 while(cset!=NULL) {
2894 if (rf_does_it_fit(cset, ac)) {
2895 /* looks like it matches... */
2896 ac->next = cset->ac;
2897 cset->ac = ac;
2898 break;
2899 }
2900 cset = cset->next;
2901 }
2902 if (cset==NULL) {
2903 /* didn't find a match above... new set..*/
2904 cset = (RF_ConfigSet_t *)
2905 malloc(sizeof(RF_ConfigSet_t),
2906 M_RAIDFRAME, M_NOWAIT);
2907 if (cset == NULL) {
2908 panic("rf_create_auto_sets: No memory!");
2909 }
2910 cset->ac = ac;
2911 ac->next = NULL;
2912 cset->next = config_sets;
2913 cset->rootable = 0;
2914 config_sets = cset;
2915 }
2916 }
2917 ac = ac_next;
2918 }
2919
2920
2921 return(config_sets);
2922 }
2923
2924 static int
2925 rf_does_it_fit(cset, ac)
2926 RF_ConfigSet_t *cset;
2927 RF_AutoConfig_t *ac;
2928 {
2929 RF_ComponentLabel_t *clabel1, *clabel2;
2930
2931 /* If this one matches the *first* one in the set, that's good
2932 enough, since the other members of the set would have been
2933 through here too... */
2934 /* note that we are not checking partitionSize here..
2935
2936 Note that we are also not checking the mod_counters here.
2937 If everything else matches execpt the mod_counter, that's
2938 good enough for this test. We will deal with the mod_counters
2939 a little later in the autoconfiguration process.
2940
2941 (clabel1->mod_counter == clabel2->mod_counter) &&
2942
2943 The reason we don't check for this is that failed disks
2944 will have lower modification counts. If those disks are
2945 not added to the set they used to belong to, then they will
2946 form their own set, which may result in 2 different sets,
2947 for example, competing to be configured at raid0, and
2948 perhaps competing to be the root filesystem set. If the
2949 wrong ones get configured, or both attempt to become /,
2950 weird behaviour and or serious lossage will occur. Thus we
2951 need to bring them into the fold here, and kick them out at
2952 a later point.
2953
2954 */
2955
2956 clabel1 = cset->ac->clabel;
2957 clabel2 = ac->clabel;
2958 if ((clabel1->version == clabel2->version) &&
2959 (clabel1->serial_number == clabel2->serial_number) &&
2960 (clabel1->num_rows == clabel2->num_rows) &&
2961 (clabel1->num_columns == clabel2->num_columns) &&
2962 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2963 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2964 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2965 (clabel1->parityConfig == clabel2->parityConfig) &&
2966 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2967 (clabel1->blockSize == clabel2->blockSize) &&
2968 (clabel1->numBlocks == clabel2->numBlocks) &&
2969 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2970 (clabel1->root_partition == clabel2->root_partition) &&
2971 (clabel1->last_unit == clabel2->last_unit) &&
2972 (clabel1->config_order == clabel2->config_order)) {
2973 /* if it get's here, it almost *has* to be a match */
2974 } else {
2975 /* it's not consistent with somebody in the set..
2976 punt */
2977 return(0);
2978 }
2979 /* all was fine.. it must fit... */
2980 return(1);
2981 }
2982
2983 int
2984 rf_have_enough_components(cset)
2985 RF_ConfigSet_t *cset;
2986 {
2987 RF_AutoConfig_t *ac;
2988 RF_AutoConfig_t *auto_config;
2989 RF_ComponentLabel_t *clabel;
2990 int c;
2991 int num_cols;
2992 int num_missing;
2993 int mod_counter;
2994 int mod_counter_found;
2995 int even_pair_failed;
2996 char parity_type;
2997
2998
2999 /* check to see that we have enough 'live' components
3000 of this set. If so, we can configure it if necessary */
3001
3002 num_cols = cset->ac->clabel->num_columns;
3003 parity_type = cset->ac->clabel->parityConfig;
3004
3005 /* XXX Check for duplicate components!?!?!? */
3006
3007 /* Determine what the mod_counter is supposed to be for this set. */
3008
3009 mod_counter_found = 0;
3010 mod_counter = 0;
3011 ac = cset->ac;
3012 while(ac!=NULL) {
3013 if (mod_counter_found==0) {
3014 mod_counter = ac->clabel->mod_counter;
3015 mod_counter_found = 1;
3016 } else {
3017 if (ac->clabel->mod_counter > mod_counter) {
3018 mod_counter = ac->clabel->mod_counter;
3019 }
3020 }
3021 ac = ac->next;
3022 }
3023
3024 num_missing = 0;
3025 auto_config = cset->ac;
3026
3027 even_pair_failed = 0;
3028 for(c=0; c<num_cols; c++) {
3029 ac = auto_config;
3030 while(ac!=NULL) {
3031 if ((ac->clabel->column == c) &&
3032 (ac->clabel->mod_counter == mod_counter)) {
3033 /* it's this one... */
3034 #if DEBUG
3035 printf("Found: %s at %d\n",
3036 ac->devname,c);
3037 #endif
3038 break;
3039 }
3040 ac=ac->next;
3041 }
3042 if (ac==NULL) {
3043 /* Didn't find one here! */
3044 /* special case for RAID 1, especially
3045 where there are more than 2
3046 components (where RAIDframe treats
3047 things a little differently :( ) */
3048 if (parity_type == '1') {
3049 if (c%2 == 0) { /* even component */
3050 even_pair_failed = 1;
3051 } else { /* odd component. If
3052 we're failed, and
3053 so is the even
3054 component, it's
3055 "Good Night, Charlie" */
3056 if (even_pair_failed == 1) {
3057 return(0);
3058 }
3059 }
3060 } else {
3061 /* normal accounting */
3062 num_missing++;
3063 }
3064 }
3065 if ((parity_type == '1') && (c%2 == 1)) {
3066 /* Just did an even component, and we didn't
3067 bail.. reset the even_pair_failed flag,
3068 and go on to the next component.... */
3069 even_pair_failed = 0;
3070 }
3071 }
3072
3073 clabel = cset->ac->clabel;
3074
3075 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3076 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3077 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3078 /* XXX this needs to be made *much* more general */
3079 /* Too many failures */
3080 return(0);
3081 }
3082 /* otherwise, all is well, and we've got enough to take a kick
3083 at autoconfiguring this set */
3084 return(1);
3085 }
3086
3087 void
3088 rf_create_configuration(ac,config,raidPtr)
3089 RF_AutoConfig_t *ac;
3090 RF_Config_t *config;
3091 RF_Raid_t *raidPtr;
3092 {
3093 RF_ComponentLabel_t *clabel;
3094 int i;
3095
3096 clabel = ac->clabel;
3097
3098 /* 1. Fill in the common stuff */
3099 config->numRow = clabel->num_rows = 1;
3100 config->numCol = clabel->num_columns;
3101 config->numSpare = 0; /* XXX should this be set here? */
3102 config->sectPerSU = clabel->sectPerSU;
3103 config->SUsPerPU = clabel->SUsPerPU;
3104 config->SUsPerRU = clabel->SUsPerRU;
3105 config->parityConfig = clabel->parityConfig;
3106 /* XXX... */
3107 strcpy(config->diskQueueType,"fifo");
3108 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3109 config->layoutSpecificSize = 0; /* XXX ?? */
3110
3111 while(ac!=NULL) {
3112 /* row/col values will be in range due to the checks
3113 in reasonable_label() */
3114 strcpy(config->devnames[0][ac->clabel->column],
3115 ac->devname);
3116 ac = ac->next;
3117 }
3118
3119 for(i=0;i<RF_MAXDBGV;i++) {
3120 config->debugVars[i][0] = 0;
3121 }
3122 }
3123
3124 int
3125 rf_set_autoconfig(raidPtr, new_value)
3126 RF_Raid_t *raidPtr;
3127 int new_value;
3128 {
3129 RF_ComponentLabel_t clabel;
3130 struct vnode *vp;
3131 dev_t dev;
3132 int column;
3133 int sparecol;
3134
3135 raidPtr->autoconfigure = new_value;
3136
3137 for(column=0; column<raidPtr->numCol; column++) {
3138 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3139 dev = raidPtr->Disks[column].dev;
3140 vp = raidPtr->raid_cinfo[column].ci_vp;
3141 raidread_component_label(dev, vp, &clabel);
3142 clabel.autoconfigure = new_value;
3143 raidwrite_component_label(dev, vp, &clabel);
3144 }
3145 }
3146 for(column = 0; column < raidPtr->numSpare ; column++) {
3147 sparecol = raidPtr->numCol + column;
3148 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3149 dev = raidPtr->Disks[sparecol].dev;
3150 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3151 raidread_component_label(dev, vp, &clabel);
3152 clabel.autoconfigure = new_value;
3153 raidwrite_component_label(dev, vp, &clabel);
3154 }
3155 }
3156 return(new_value);
3157 }
3158
3159 int
3160 rf_set_rootpartition(raidPtr, new_value)
3161 RF_Raid_t *raidPtr;
3162 int new_value;
3163 {
3164 RF_ComponentLabel_t clabel;
3165 struct vnode *vp;
3166 dev_t dev;
3167 int column;
3168 int sparecol;
3169
3170 raidPtr->root_partition = new_value;
3171 for(column=0; column<raidPtr->numCol; column++) {
3172 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3173 dev = raidPtr->Disks[column].dev;
3174 vp = raidPtr->raid_cinfo[column].ci_vp;
3175 raidread_component_label(dev, vp, &clabel);
3176 clabel.root_partition = new_value;
3177 raidwrite_component_label(dev, vp, &clabel);
3178 }
3179 }
3180 for(column = 0; column < raidPtr->numSpare ; column++) {
3181 sparecol = raidPtr->numCol + column;
3182 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3183 dev = raidPtr->Disks[sparecol].dev;
3184 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3185 raidread_component_label(dev, vp, &clabel);
3186 clabel.root_partition = new_value;
3187 raidwrite_component_label(dev, vp, &clabel);
3188 }
3189 }
3190 return(new_value);
3191 }
3192
3193 void
3194 rf_release_all_vps(cset)
3195 RF_ConfigSet_t *cset;
3196 {
3197 RF_AutoConfig_t *ac;
3198
3199 ac = cset->ac;
3200 while(ac!=NULL) {
3201 /* Close the vp, and give it back */
3202 if (ac->vp) {
3203 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3204 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3205 vput(ac->vp);
3206 ac->vp = NULL;
3207 }
3208 ac = ac->next;
3209 }
3210 }
3211
3212
3213 void
3214 rf_cleanup_config_set(cset)
3215 RF_ConfigSet_t *cset;
3216 {
3217 RF_AutoConfig_t *ac;
3218 RF_AutoConfig_t *next_ac;
3219
3220 ac = cset->ac;
3221 while(ac!=NULL) {
3222 next_ac = ac->next;
3223 /* nuke the label */
3224 free(ac->clabel, M_RAIDFRAME);
3225 /* cleanup the config structure */
3226 free(ac, M_RAIDFRAME);
3227 /* "next.." */
3228 ac = next_ac;
3229 }
3230 /* and, finally, nuke the config set */
3231 free(cset, M_RAIDFRAME);
3232 }
3233
3234
3235 void
3236 raid_init_component_label(raidPtr, clabel)
3237 RF_Raid_t *raidPtr;
3238 RF_ComponentLabel_t *clabel;
3239 {
3240 /* current version number */
3241 clabel->version = RF_COMPONENT_LABEL_VERSION;
3242 clabel->serial_number = raidPtr->serial_number;
3243 clabel->mod_counter = raidPtr->mod_counter;
3244 clabel->num_rows = 1;
3245 clabel->num_columns = raidPtr->numCol;
3246 clabel->clean = RF_RAID_DIRTY; /* not clean */
3247 clabel->status = rf_ds_optimal; /* "It's good!" */
3248
3249 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3250 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3251 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3252
3253 clabel->blockSize = raidPtr->bytesPerSector;
3254 clabel->numBlocks = raidPtr->sectorsPerDisk;
3255
3256 /* XXX not portable */
3257 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3258 clabel->maxOutstanding = raidPtr->maxOutstanding;
3259 clabel->autoconfigure = raidPtr->autoconfigure;
3260 clabel->root_partition = raidPtr->root_partition;
3261 clabel->last_unit = raidPtr->raidid;
3262 clabel->config_order = raidPtr->config_order;
3263 }
3264
3265 int
3266 rf_auto_config_set(cset,unit)
3267 RF_ConfigSet_t *cset;
3268 int *unit;
3269 {
3270 RF_Raid_t *raidPtr;
3271 RF_Config_t *config;
3272 int raidID;
3273 int retcode;
3274
3275 #if DEBUG
3276 printf("RAID autoconfigure\n");
3277 #endif
3278
3279 retcode = 0;
3280 *unit = -1;
3281
3282 /* 1. Create a config structure */
3283
3284 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3285 M_RAIDFRAME,
3286 M_NOWAIT);
3287 if (config==NULL) {
3288 printf("Out of mem!?!?\n");
3289 /* XXX do something more intelligent here. */
3290 return(1);
3291 }
3292
3293 memset(config, 0, sizeof(RF_Config_t));
3294
3295 /*
3296 2. Figure out what RAID ID this one is supposed to live at
3297 See if we can get the same RAID dev that it was configured
3298 on last time..
3299 */
3300
3301 raidID = cset->ac->clabel->last_unit;
3302 if ((raidID < 0) || (raidID >= numraid)) {
3303 /* let's not wander off into lala land. */
3304 raidID = numraid - 1;
3305 }
3306 if (raidPtrs[raidID]->valid != 0) {
3307
3308 /*
3309 Nope... Go looking for an alternative...
3310 Start high so we don't immediately use raid0 if that's
3311 not taken.
3312 */
3313
3314 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3315 if (raidPtrs[raidID]->valid == 0) {
3316 /* can use this one! */
3317 break;
3318 }
3319 }
3320 }
3321
3322 if (raidID < 0) {
3323 /* punt... */
3324 printf("Unable to auto configure this set!\n");
3325 printf("(Out of RAID devs!)\n");
3326 return(1);
3327 }
3328
3329 #if DEBUG
3330 printf("Configuring raid%d:\n",raidID);
3331 #endif
3332
3333 raidPtr = raidPtrs[raidID];
3334
3335 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3336 raidPtr->raidid = raidID;
3337 raidPtr->openings = RAIDOUTSTANDING;
3338
3339 /* 3. Build the configuration structure */
3340 rf_create_configuration(cset->ac, config, raidPtr);
3341
3342 /* 4. Do the configuration */
3343 retcode = rf_Configure(raidPtr, config, cset->ac);
3344
3345 if (retcode == 0) {
3346
3347 raidinit(raidPtrs[raidID]);
3348
3349 rf_markalldirty(raidPtrs[raidID]);
3350 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3351 if (cset->ac->clabel->root_partition==1) {
3352 /* everything configured just fine. Make a note
3353 that this set is eligible to be root. */
3354 cset->rootable = 1;
3355 /* XXX do this here? */
3356 raidPtrs[raidID]->root_partition = 1;
3357 }
3358 }
3359
3360 /* 5. Cleanup */
3361 free(config, M_RAIDFRAME);
3362
3363 *unit = raidID;
3364 return(retcode);
3365 }
3366
3367 void
3368 rf_disk_unbusy(desc)
3369 RF_RaidAccessDesc_t *desc;
3370 {
3371 struct buf *bp;
3372
3373 bp = (struct buf *)desc->bp;
3374 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3375 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
3376 }
3377